学习素材怎么准备

Question

学习素材怎么准备

26038874 opened this issue a year ago · comments

miracle commented a year ago

这些是原始素材，有两个问题，一这些素材的答案怎么准备，只能手动来准备吗；二这个是点选类验证码，应该怎么训练，全部一起训练就好了吗

miracle · Answer 1 · Wed Aug 23 2023 00:12:11 GMT+0800 (China Standard Time)

还有一个疑问，使用默认的模型识别率很低，如果重新训练是只能用自己后面打算识别的这些素材，还是可以去下载其他素材

SXueckShen · Answer 2 · Wed Dec 13 2023 01:27:18 GMT+0800 (China Standard Time)

一个简单的示例，生成10万张长度为4-6的包含数字和大小写字符串的验证码图片

from captcha.image import ImageCaptcha
import numpy as np
from PIL import Image
import random
import os
import hashlib

number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
            'v', 'w', 'x', 'y', 'z']
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
            'V', 'W', 'X', 'Y', 'Z']


def random_captcha_text(char_set=None, captcha_size=None):
    if char_set is None:
        char_set = number + alphabet + ALPHABET
    if captcha_size is None:
        captcha_size = random.randint(4, 6)  # 随机长度从4到6
    captcha_text = random.choices(char_set, k=captcha_size)
    return captcha_text


def gen_captcha_text_and_image():
    # 随机选择难度参数，例如图片宽度和高度
    width, height = random.randint(100, 200), random.randint(50, 100)
    image = ImageCaptcha(width=width, height=height)

    captcha_text = random_captcha_text()
    captcha_text = ''.join(captcha_text)

    hash_obj = hashlib.sha256(captcha_text.encode('utf-8'))
    hash_hex = hash_obj.hexdigest()
    filename = f"{captcha_text}_{hash_hex}.jpg"
    file_path = os.path.join("dist", filename)

    captcha = image.generate(captcha_text)
    # 写到文件
    with open(file_path, 'wb') as f:
        f.write(captcha.getbuffer())

    captcha_image = Image.open(captcha)
    captcha_image = np.array(captcha_image)
    return captcha_text, captcha_image


if __name__ == '__main__':
    if not os.path.exists('dist'):
        os.makedirs('dist')

    for _ in range(100000):
        gen_captcha_text_and_image()

cynen · Answer 3 · Wed Jul 24 2024 16:50:40 GMT+0800 (China Standard Time)

一个简单的示例，生成10万张长度为4-6的包含数字和大小写字符串的验证码图片

from captcha.image import ImageCaptcha
import numpy as np
from PIL import Image
import random
import os
import hashlib

number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
            'v', 'w', 'x', 'y', 'z']
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
            'V', 'W', 'X', 'Y', 'Z']


def random_captcha_text(char_set=None, captcha_size=None):
    if char_set is None:
        char_set = number + alphabet + ALPHABET
    if captcha_size is None:
        captcha_size = random.randint(4, 6)  # 随机长度从4到6
    captcha_text = random.choices(char_set, k=captcha_size)
    return captcha_text


def gen_captcha_text_and_image():
    # 随机选择难度参数，例如图片宽度和高度
    width, height = random.randint(100, 200), random.randint(50, 100)
    image = ImageCaptcha(width=width, height=height)

    captcha_text = random_captcha_text()
    captcha_text = ''.join(captcha_text)

    hash_obj = hashlib.sha256(captcha_text.encode('utf-8'))
    hash_hex = hash_obj.hexdigest()
    filename = f"{captcha_text}_{hash_hex}.jpg"
    file_path = os.path.join("dist", filename)

    captcha = image.generate(captcha_text)
    # 写到文件
    with open(file_path, 'wb') as f:
        f.write(captcha.getbuffer())

    captcha_image = Image.open(captcha)
    captcha_image = np.array(captcha_image)
    return captcha_text, captcha_image


if __name__ == '__main__':
    if not os.path.exists('dist'):
        os.makedirs('dist')

    for _ in range(100000):
        gen_captcha_text_and_image()

这个能生成那种空心字体的形式么? 比如这种: