sml2h3 / dddd_trainer

ddddocr训练工具

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

学习素材怎么准备

26038874 opened this issue · comments

这些是原始素材,有两个问题,一这些素材的答案怎么准备,只能手动来准备吗;二这个是点选类验证码,应该怎么训练,全部一起训练就好了吗
image

还有一个疑问,使用默认的模型识别率很低,如果重新训练是只能用自己后面打算识别的这些素材,还是可以去下载其他素材

一个简单的示例,生成10万张长度为4-6的包含数字和大小写字符串的验证码图片

from captcha.image import ImageCaptcha
import numpy as np
from PIL import Image
import random
import os
import hashlib

number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
            'v', 'w', 'x', 'y', 'z']
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
            'V', 'W', 'X', 'Y', 'Z']


def random_captcha_text(char_set=None, captcha_size=None):
    if char_set is None:
        char_set = number + alphabet + ALPHABET
    if captcha_size is None:
        captcha_size = random.randint(4, 6)  # 随机长度从4到6
    captcha_text = random.choices(char_set, k=captcha_size)
    return captcha_text


def gen_captcha_text_and_image():
    # 随机选择难度参数,例如图片宽度和高度
    width, height = random.randint(100, 200), random.randint(50, 100)
    image = ImageCaptcha(width=width, height=height)

    captcha_text = random_captcha_text()
    captcha_text = ''.join(captcha_text)

    hash_obj = hashlib.sha256(captcha_text.encode('utf-8'))
    hash_hex = hash_obj.hexdigest()
    filename = f"{captcha_text}_{hash_hex}.jpg"
    file_path = os.path.join("dist", filename)

    captcha = image.generate(captcha_text)
    # 写到文件
    with open(file_path, 'wb') as f:
        f.write(captcha.getbuffer())

    captcha_image = Image.open(captcha)
    captcha_image = np.array(captcha_image)
    return captcha_text, captcha_image


if __name__ == '__main__':
    if not os.path.exists('dist'):
        os.makedirs('dist')

    for _ in range(100000):
        gen_captcha_text_and_image()

一个简单的示例,生成10万张长度为4-6的包含数字和大小写字符串的验证码图片

from captcha.image import ImageCaptcha
import numpy as np
from PIL import Image
import random
import os
import hashlib

number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
            'v', 'w', 'x', 'y', 'z']
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
            'V', 'W', 'X', 'Y', 'Z']


def random_captcha_text(char_set=None, captcha_size=None):
    if char_set is None:
        char_set = number + alphabet + ALPHABET
    if captcha_size is None:
        captcha_size = random.randint(4, 6)  # 随机长度从4到6
    captcha_text = random.choices(char_set, k=captcha_size)
    return captcha_text


def gen_captcha_text_and_image():
    # 随机选择难度参数,例如图片宽度和高度
    width, height = random.randint(100, 200), random.randint(50, 100)
    image = ImageCaptcha(width=width, height=height)

    captcha_text = random_captcha_text()
    captcha_text = ''.join(captcha_text)

    hash_obj = hashlib.sha256(captcha_text.encode('utf-8'))
    hash_hex = hash_obj.hexdigest()
    filename = f"{captcha_text}_{hash_hex}.jpg"
    file_path = os.path.join("dist", filename)

    captcha = image.generate(captcha_text)
    # 写到文件
    with open(file_path, 'wb') as f:
        f.write(captcha.getbuffer())

    captcha_image = Image.open(captcha)
    captcha_image = np.array(captcha_image)
    return captcha_text, captcha_image


if __name__ == '__main__':
    if not os.path.exists('dist'):
        os.makedirs('dist')

    for _ in range(100000):
        gen_captcha_text_and_image()

这个能生成那种空心字体的形式么? 比如这种:
1