captcha_input.py(读取验证码图片文件以及目标标签并保存到tfrecords文件中,图片与目标标签要一一对应):
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 设置警告级别
# 读取验证码图片以及目标标签数据,并存放到tfrecords文件中(图片验证码与目标标签一一对应)
# 自定义命令行参数
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string("tfrecords_dir", "./tfrecords/captcha.tfrecords", "验证码tfrecords文件")
tf.app.flags.DEFINE_string("captcha_dir", "../data/Genpics/", "验证码图片路径")
tf.app.flags.DEFINE_string("letter", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "验证码字符的种类")
def dealwithlabel(label_str):
# [b'NZPP', b'WKHK', b'WPSJ', ...] ---> [[13, 25, 15, 15], [22, 10, 7, 10], [22, 15, 18, 9], ...]
# 构建字符索引 {0:'A', 1:'B'......}
num_letter = dict(enumerate(list(FLAGS.letter)))
# 键值对反转 {'A':0, 'B':1......}
letter_num = dict(zip(num_letter.values(), num_letter.keys()))
print(letter_num)
# 构建标签的列表
array = []
# 给标签数据进行处理 label_str:[b'NZPP', b'WKHK', b'WPSJ', ...]
for string in label_str:
letter_list = [] # [13, 25, 15, 15]
# b'FVQJ'解码成字符串,并且循环找到每张验证码的字符对应的数字标记
for letter in string.decode('utf-8'):
letter_list.append(letter_num[letter])
array.append(letter_list)
# [[13, 25, 15, 15], [22, 10, 7, 10], [22, 15, 18, 9], [16, 6, 13, 10], [1, 0, 8, 17], [0, 9, 24, 14].....]
print(array)
# 将array转换成tensor类型
label = tf.constant(array)
return label
def get_captcha_image():
"""
获取验证码图片数据
:param file_list: 路径+文件名列表
:return: image
"""
# 构造文件名
filename = []
for i in range(6000):
string = str(i) + ".jpg"
filename.append(string)
# 构造路径+文件
file_list = [os.path.join(FLAGS.captcha_dir, file) for file