python验证码识别cnn_使用卷积神经网络CNN完成验证码识别

gen_sample_by_captcha.py 生成验证码图片

# -*- coding: UTF-8 -*-

"""

使用captcha lib生成验证码(前提:pip install captcha)

"""

from captcha.image import ImageCaptcha

import os

import random

import time

def gen_special_img(text, file_path):

# 生成img文件

generator = ImageCaptcha(width=width, height=height) # 指定大小

img = generator.generate_image(text) # 生成图片

img.save(file_path) # 保存图片

if __name__ == '__main__':

# 配置参数

root_dir = "../sample/origin/" # 图片储存路径

image_suffix = "png" # 图片储存后缀

characters = "0123456789" # 图片上显示的字符集

# characters = "0123456789abcdefghijklmnopqrstuvwxyz"

count = 10000 # 生成多少张样本

char_count = 4 # 图片上的字符数量

# 设置图片高度和宽度

width = 100

height = 60

# 判断文件夹是否存在

if not os.path.exists(root_dir):

os.mkdir(root_dir)

for i in range(count):

text = ""

for j in range(char_count):

text += random.choice(characters)

timec = str(time.time()).replace(".", "")

p = os.path.join(root_dir, "{}_{}.{}".format(text, timec, image_suffix))

gen_special_img(text, p)

sample.py 配置文件

from easydict import EasyDict

import os

import json

# 可以使得以属性的方式去访问字典的值

sample_conf = EasyDict()

# 图片文件夹

sample_conf.origin_image_dir = "./sample/origin/"

sample_conf.train_image_dir = "./sample/train/"

sample_conf.test_image_dir = "./sample/test/"

sample_conf.api_image_dir = "./sample/api/"

sample_conf.online_image_dir = "./sample/online/"

sample_conf.local_image_dir = "./sample/local/"

# 模型文件夹

sample_conf.model_save_dir = "./model/"

# 图片相关参数

sample_conf.image_width = 100

sample_conf.image_height = 60

sample_conf.max_captcha = 4

sample_conf.image_suffix = "png"

# 验证码字符相关参数

sample_conf.char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',

'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

# char_set = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']

# char_set = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']

use_labels_json_file = False

if use_labels_json_file:

if os.path.exists("gen_image/labels.json"):

with open("gen_image/labels.json", "r") as f:

content = f.read()

if content:

sample_conf.char_set = json.loads(content)

else:

pass

else:

pass

sample_conf.remote_url = "https://www.xxxxx.com/getImg"

verify_and_split_data.py

"""

验证图片尺寸和划分测试集(5%)和训练集(95%)

"""

from PIL import Image

import random

import os

import shutil

from sample import sample_conf

def verify(origin_dir, real_width, real_height, image_suffix):

"""

校验图片大小

:return:

"""

print("开始校验原始图片集")

# 图片真实尺寸

real_size = (real_width, real_height)

# 图片名称列表和数量

img_list = os.listdir(origin_dir)

total_count = len(img_list)

print("原始集共有图片: {}张".format(total_count))

# 无效图片列表

bad_img = []

# 遍历所有图片进行验证

for index, img_name in enumerate(img_list):

file_path = os.path.join(origin_dir, img_name)

# 过滤图片不正确的后缀

if not img_name.endswith(image_suffix):

bad_img.append((index, img_name, "文件后缀不正确"))

continue

# 过滤图片标签不标准的情况

prefix, posfix = img_name.split("_")

if prefix == "" or posfix == "":

bad_img.append((index, img_name, "图片标签异常"))

continue

# 图片无法正常打开

try:

img = Image.open(file_path)

except OSError:

bad_img.append((index, img_name, "图片无法正常打开"))

continue

# 图片尺寸有异常

if real_size == img.size:

print("{} pass".format(index), end='\r')

else:

bad_img.append((index, img_name, "图片尺寸异常为:{}".format(img.size)))

print("====以下{}张图片有异常====".format(len(bad_img)))

if bad_img:

for b in bad_img:

print("[第{}张图片] [{}] [{}]".format(b[0], b[1], b[2]))

else:

print("未发现异常(共 {} 张图片)".format(len(img_list)))

print("========end\n")

return bad_img

def split(origin_dir, train_dir, test_dir, bad_imgs):

"""

分离训练集和测试集

:return:

"""

print("开始分离原始图片集为:测试集(5%)和训练集(95%)")

# 图片名称列表和数量

img_list = os.listdir(origin_dir)

for img in bad_imgs:

img_list.remove(img)

total_count = len(img_list)

print("共分配{}张图片到训练集和测试集,其中{}张为异常留在原始目录".format(total_count, len(bad_imgs)))

# 创建文件夹

if not os.path.exists(train_dir):

os.mkdir(train_dir)

if not os.path.exists(test_dir):

os.mkdir(test_dir)

# 测试集

test_count = int(total_count * 0.05)

test_set = set()

for i in range(test_count):

while True:

file_name = random.choice(img_list)

if file_name in test_set:

pass

else:

test_set.add(file_name)

img_list.remove(file_name)

break

test_list = list(test_set)

print("测试集数量为:{}".format(len(test_list)))

for file_name in test_list:

src = os.path.join(origin_dir, file_name)

dst = os.path.join(test_dir, file_name)

shutil.move(src, dst)

# 训练集

train_list = img_list

print("训练集数量为:{}".format(len(train_list)))

for file_name in train_list:

src = os.path.join(origin_dir, file_name)

dst = os.path.join(train_dir, file_name)

shutil.move(src, dst)

if os.listdir(origin_dir) == 0:

print("migration done")

def main():

# 图片路径

origin_dir = sample_conf["origin_image_dir"]

train_dir = sample_conf["train_image_dir"]

test_dir = sample_conf["test_image_dir"]

# 图片尺寸

real_width = sample_conf["image_width"]

real_height = sample_conf["image_height"]

# 图片后缀

image_suffix = sample_conf["image_suffix"]

bad_images_info = verify(origin_dir, real_width, real_height, image_suffix)

bad_imgs = []

for info in bad_images_info:

bad_imgs.append(info[1])

split(origin_dir, train_dir, test_dir, bad_imgs)

if __name__ == '__main__':

main()

train_model_v2.py 训练模型,训练过程中同时输出训练集和验证集的准确率

# -*- coding: utf-8 -*-

import tensorflow as tf

import numpy as np

import matplotlib.pyplot as plt

import time

from PIL import Image

import random

import os

from sample import sample_conf

from tensorflow.python.framework.errors_impl import NotFoundError

# 设置以下环境变量可开启CPU识别

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

class TrainError(Exception):

pass

class TrainModel(object):

def __init__(self, train_img_path, verify_img_path, char_set, model_save_dir, verify=False):

# 模型路径

self.model_save_dir = model_save_dir

# 打乱文件顺序+校验图片格式

self.train_img_path = train_img_path

self.train_images_list = os.listdir(train_img_path)

# 校验格式

if verify:

self.confirm_image_suffix()

# 打乱文件顺序

random.seed(time.time())

random.shuffle(self.train_images_list)

# 验证集文件

self.verify_img_path = verify_img_path

self.verify_images_list = os.listdir(verify_img_path)

# 获得图片宽高和字符长度基本信息

label, captcha_array = self.gen_captcha_text_image(train_img_path, self.train_images_list[0])

captcha_shape = captcha_array.shape

captcha_shape_len = len(captcha_shape)

if captcha_shape_len == 3:

image_height, image_width, channel = captcha_shape

self.channel = channel

elif captcha_shape_len == 2:

image_height, image_width = captcha_shape

else:

raise TrainError("图片转换为矩阵时出错,请检查图片格式")

# 初始化变量

# 图片尺寸

self.image_height = image_height

self.image_width = image_width

# 验证码长度(位数)

self.max_captcha = len(label)

# 验证码字符类别

self.char_set = char_set

self.char_set_len = len(char_set)

# 相关信息打印

print("-->图片尺寸: {} X {}".format(image_height, image_width))

print("-->验证码长度: {}".format(self.max_captcha))

print("-->验证码共{}类 {}".format(self.char_set_len, char_set))

print("-->使用测试集为 {}".format(train_img_path))

print("-->使验证集为 {}".format(verify_img_path))

# tf初始化占位符

self.X = tf.placeholder(tf.float32, [None, image_height * image_width]) # 特征向量

self.Y = tf.placeholder(tf.float32, [None, self.max_captcha * self.char_set_len]) # 标签

self.keep_prob = tf.placeholder(tf.float32) # dropout值

self.w_alpha = 0.01

self.b_alpha = 0.1

# test model input and output

print(">>> Start model test")

batch_x, batch_y = self.get_batch(0, size=100)

print(">>> input batch images shape: {}".format(batch_x.shape))

print(">>> input batch labels shape: {}".format(batch_y.shape))

@staticmethod

def gen_captcha_text_image(img_path, img_name):

"""

返回一个验证码的array形式和对应的字符串标签

:return:tuple (str, numpy.array)

"""

# 标签

label = img_name.split("_")[0]

# 文件

img_file = os.path.join(img_path, img_name)

captcha_image = Image.open(img_file)

captcha_array = np.array(captcha_image) # 向量化

return label, captcha_array

@staticmethod

def convert2gray(img):

"""

图片转为灰度图,如果是3通道图则计算,单通道图则直接返回

:param img:

:return:

"""

if len(img.shape) > 2:

r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]

gray = 0.2989 * r + 0.5870 * g + 0.1140 * b

return gray

else:

return img

def text2vec(self, text):

"""

转标签为oneHot编码

:param text: str

:return: numpy.array

"""

text_len = len(text)

if text_len > self.max_captcha:

raise ValueError('验证码最长{}个字符'.format(self.max_captcha))

vector = np.zeros(self.max_captcha * self.char_set_len)

for i, ch in enumerate(text):

idx = i * self.char_set_len + self.char_set.index(ch)

vector[idx] = 1

return vector

def get_batch(self, n, size=128):

batch_x = np.zeros([size, self.image_height * self.image_width]) # 初始化

batch_y = np.zeros([size, self.max_captcha * self.char_set_len]) # 初始化

max_batch = int(len(self.train_images_list) / size)

# print(max_batch)

if max_batch - 1 < 0:

raise TrainError("训练集图片数量需要大于每批次训练的图片数量")

if n > max_batch - 1:

n = n % max_batch

s = n * size

e = (n + 1) * size

this_batch = self.train_images_list[s:e]

# print("{}:{}".format(s, e))

for i, img_name in enumerate(this_batch):

label, image_array = self.gen_captcha_text_image(self.train_img_path, img_name)

image_array = self.convert2gray(image_array) # 灰度化图片

batch_x[i, :] = image_array.flatten() / 255 # flatten 转为一维

batch_y[i, :] = self.text2vec(label) # 生成 oneHot

return batch_x, batch_y

def get_verify_batch(self, size=100):

batch_x = np.zeros([size, self.image_height * self.image_width]) # 初始化

batch_y = np.zeros([size, self.max_captcha * self.char_set_len]) # 初始化

verify_images = []

for i in range(size):

verify_images.append(random.choice(self.verify_images_list))

for i, img_name in enumerate(verify_images):

label, image_array = self.gen_captcha_text_image(self.verify_img_path, img_name)

image_array = self.convert2gray(image_array) # 灰度化图片

batch_x[i, :] = image_array.flatten() / 255 # flatten 转为一维

batch_y[i, :] = self.text2vec(label) # 生成 oneHot

return batch_x, batch_y

def confirm_image_suffix(self):

# 在训练前校验所有文件格式

print("开始校验所有图片后缀")

for index, img_name in enumerate(self.train_images_list):

print("{} image pass".format(index), end='\r')

if not img_name.endswith(sample_conf['image_suffix']):

raise TrainError('confirm images suffix:you request [.{}] file but get file [{}]'

.format(sample_conf['image_suffix'], img_name))

print("所有图片格式校验通过")

def model(self):

x = tf.reshape(self.X, shape=[-1, self.image_height, self.image_width, 1])

print(">>> input x: {}".format(x))

# 卷积层1

wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bc1 = tf.Variable(self.b_alpha * tf.random_normal([32]))

conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))

conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

conv1 = tf.nn.dropout(conv1, self.keep_prob)

# 卷积层2

wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bc2 = tf.Variable(self.b_alpha * tf.random_normal([64]))

conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))

conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

conv2 = tf.nn.dropout(conv2, self.keep_prob)

# 卷积层3

wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bc3 = tf.Variable(self.b_alpha * tf.random_normal([128]))

conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))

conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

conv3 = tf.nn.dropout(conv3, self.keep_prob)

print(">>> convolution 3: ", conv3.shape)

next_shape = conv3.shape[1] * conv3.shape[2] * conv3.shape[3]

# 全连接层1

wd1 = tf.get_variable(name='wd1', shape=[next_shape, 1024], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bd1 = tf.Variable(self.b_alpha * tf.random_normal([1024]))

dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])

dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))

dense = tf.nn.dropout(dense, self.keep_prob)

# 全连接层2

wout = tf.get_variable('name', shape=[1024, self.max_captcha * self.char_set_len], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bout = tf.Variable(self.b_alpha * tf.random_normal([self.max_captcha * self.char_set_len]))

y_predict = tf.add(tf.matmul(dense, wout), bout)

return y_predict

def train_cnn(self):

y_predict = self.model()

print(">>> input batch predict shape: {}".format(y_predict.shape))

print(">>> End model test")

# 计算概率 损失

cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_predict, labels=self.Y))

# 梯度下降

optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost)

# 计算准确率

predict = tf.reshape(y_predict, [-1, self.max_captcha, self.char_set_len]) # 预测结果

max_idx_p = tf.argmax(predict, 2) # 预测结果

max_idx_l = tf.argmax(tf.reshape(self.Y, [-1, self.max_captcha, self.char_set_len]), 2) # 标签

correct_pred = tf.equal(max_idx_p, max_idx_l)

accuracy_char_count = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

accuracy_image_count = tf.reduce_mean(tf.reduce_min(tf.cast(correct_pred, tf.float32), axis=1))

# 模型保存对象

saver = tf.train.Saver()

with tf.Session() as sess:

init = tf.global_variables_initializer()

sess.run(init)

# 恢复模型

if os.path.exists(self.model_save_dir):

try:

saver.restore(sess, self.model_save_dir)

# 判断捕获model文件夹中没有模型文件的错误

except ValueError:

print("model文件夹为空,将创建新模型")

else:

pass

step = 1

for i in range(3000):

batch_x, batch_y = self.get_batch(i, size=128)

# 梯度下降训练

_, cost_ = sess.run([optimizer, cost], feed_dict={self.X: batch_x, self.Y: batch_y, self.keep_prob: 0.75})

if step % 10 == 0:

# 基于训练集的测试

batch_x_test, batch_y_test = self.get_batch(i, size=100)

acc_char = sess.run(accuracy_char_count, feed_dict={self.X: batch_x_test, self.Y: batch_y_test, self.keep_prob: 1.})

acc_image = sess.run(accuracy_image_count, feed_dict={self.X: batch_x_test, self.Y: batch_y_test, self.keep_prob: 1.})

print("第{}次训练 >>> ".format(step))

print("[训练集] 字符准确率为 {:.5f} 图片准确率为 {:.5f} >>> loss {:.10f}".format(acc_char, acc_image, cost_))

# 基于验证集的测试

batch_x_verify, batch_y_verify = self.get_verify_batch(size=100)

acc_char = sess.run(accuracy_char_count, feed_dict={self.X: batch_x_verify, self.Y: batch_y_verify, self.keep_prob: 1.})

acc_image = sess.run(accuracy_image_count, feed_dict={self.X: batch_x_verify, self.Y: batch_y_verify, self.keep_prob: 1.})

print("[验证集] 字符准确率为 {:.5f} 图片准确率为 {:.5f} >>> loss {:.10f}".format(acc_char, acc_image, cost_))

# 准确率达到99%后保存并停止

if acc_image > 0.99:

saver.save(sess, self.model_save_dir)

print("验证集准确率达到99%,保存模型成功")

break

# 每训练500轮就保存一次

if i % 500 == 0:

saver.save(sess, self.model_save_dir)

print("定时保存模型成功")

step += 1

saver.save(sess, self.model_save_dir)

def main():

train_image_dir = sample_conf["train_image_dir"]

verify_image_dir = sample_conf["test_image_dir"]

char_set = sample_conf["char_set"]

model_save_dir = sample_conf["model_save_dir"]

tm = TrainModel(train_image_dir, verify_image_dir, char_set, model_save_dir, verify=False)

tm.train_cnn() # 开始训练模型

if __name__ == '__main__':

main()

训练结果

第2960次训练 >>>

[训练集] 字符准确率为 0.87500 图片准确率为 0.61000 >>> loss 0.0337208398

[验证集] 字符准确率为 0.81500 图片准确率为 0.45000 >>> loss 0.0337208398

第2970次训练 >>>

[训练集] 字符准确率为 0.88500 图片准确率为 0.62000 >>> loss 0.0343154743

[验证集] 字符准确率为 0.80750 图片准确率为 0.39000 >>> loss 0.0343154743

第2980次训练 >>>

[训练集] 字符准确率为 0.89250 图片准确率为 0.65000 >>> loss 0.0298477933

[验证集] 字符准确率为 0.80000 图片准确率为 0.38000 >>> loss 0.0298477933

第2990次训练 >>>

[训练集] 字符准确率为 0.90250 图片准确率为 0.71000 >>> loss 0.0316790938

[验证集] 字符准确率为 0.83500 图片准确率为 0.48000 >>> loss 0.0316790938

第3000次训练 >>>

[训练集] 字符准确率为 0.89000 图片准确率为 0.69000 >>> loss 0.0330378339

[验证集] 字符准确率为 0.83750 图片准确率为 0.53000 >>> loss 0.0330378339

test_batch.py 批量验证

# -*- coding: utf-8 -*-

import tensorflow as tf

import numpy as np

import time

from PIL import Image

import random

import os

from sample import sample_conf

class TestError(Exception):

pass

class TestBatch(object):

def __init__(self, img_path, char_set, model_save_dir, total):

# 模型路径

self.model_save_dir = model_save_dir

# 打乱文件顺序

self.img_path = img_path

self.img_list = os.listdir(img_path)

random.seed(time.time())

random.shuffle(self.img_list)

# 获得图片宽高和字符长度基本信息

label, captcha_array = self.gen_captcha_text_image()

captcha_shape = captcha_array.shape

captcha_shape_len = len(captcha_shape)

if captcha_shape_len == 3:

image_height, image_width, channel = captcha_shape

self.channel = channel

elif captcha_shape_len == 2:

image_height, image_width = captcha_shape

else:

raise TestError("图片转换为矩阵时出错,请检查图片格式")

# 初始化变量

# 图片尺寸

self.image_height = image_height

self.image_width = image_width

# 验证码长度(位数)

self.max_captcha = len(label)

# 验证码字符类别

self.char_set = char_set

self.char_set_len = len(char_set)

# 测试个数

self.total = total

# 相关信息打印

print("-->图片尺寸: {} X {}".format(image_height, image_width))

print("-->验证码长度: {}".format(self.max_captcha))

print("-->验证码共{}类 {}".format(self.char_set_len, char_set))

print("-->使用测试集为 {}".format(img_path))

# tf初始化占位符

self.X = tf.placeholder(tf.float32, [None, image_height * image_width]) # 特征向量

self.Y = tf.placeholder(tf.float32, [None, self.max_captcha * self.char_set_len]) # 标签

self.keep_prob = tf.placeholder(tf.float32) # dropout值

self.w_alpha = 0.01

self.b_alpha = 0.1

def gen_captcha_text_image(self):

"""

返回一个验证码的array形式和对应的字符串标签

:return:tuple (str, numpy.array)

"""

img_name = random.choice(self.img_list)

# 标签

label = img_name.split("_")[0]

# 文件

img_file = os.path.join(self.img_path, img_name)

captcha_image = Image.open(img_file)

captcha_array = np.array(captcha_image) # 向量化

return label, captcha_array

@staticmethod

def convert2gray(img):

"""

图片转为灰度图,如果是3通道图则计算,单通道图则直接返回

:param img:

:return:

"""

if len(img.shape) > 2:

r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]

gray = 0.2989 * r + 0.5870 * g + 0.1140 * b

return gray

else:

return img

def text2vec(self, text):

"""

转标签为oneHot编码

:param text: str

:return: numpy.array

"""

text_len = len(text)

if text_len > self.max_captcha:

raise ValueError('验证码最长{}个字符'.format(self.max_captcha))

vector = np.zeros(self.max_captcha * self.char_set_len)

for i, ch in enumerate(text):

idx = i * self.char_set_len + self.char_set.index(ch)

vector[idx] = 1

return vector

def model(self):

x = tf.reshape(self.X, shape=[-1, self.image_height, self.image_width, 1])

print(">>> input x: {}".format(x))

# 卷积层1

wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bc1 = tf.Variable(self.b_alpha * tf.random_normal([32]))

conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))

conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

conv1 = tf.nn.dropout(conv1, self.keep_prob)

# 卷积层2

wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bc2 = tf.Variable(self.b_alpha * tf.random_normal([64]))

conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))

conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

conv2 = tf.nn.dropout(conv2, self.keep_prob)

# 卷积层3

wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bc3 = tf.Variable(self.b_alpha * tf.random_normal([128]))

conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))

conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

conv3 = tf.nn.dropout(conv3, self.keep_prob)

print(">>> convolution 3: ", conv3.shape)

next_shape = conv3.shape[1]*conv3.shape[2]*conv3.shape[3]

# 全连接层1

wd1 = tf.get_variable(name='wd1', shape=[next_shape, 1024], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bd1 = tf.Variable(self.b_alpha * tf.random_normal([1024]))

dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])

dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))

dense = tf.nn.dropout(dense, self.keep_prob)

# 全连接层2

wout = tf.get_variable('name', shape=[1024, self.max_captcha * self.char_set_len], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bout = tf.Variable(self.b_alpha * tf.random_normal([self.max_captcha * self.char_set_len]))

y_predict = tf.add(tf.matmul(dense, wout), bout)

return y_predict

def test_batch(self):

y_predict = self.model()

total = self.total

right = 0

saver = tf.train.Saver()

with tf.Session() as sess:

saver.restore(sess, self.model_save_dir)

s = time.time()

for i in range(total):

# test_text, test_image = gen_special_num_image(i)

test_text, test_image = self.gen_captcha_text_image() # 随机

test_image = self.convert2gray(test_image)

test_image = test_image.flatten() / 255

predict = tf.argmax(tf.reshape(y_predict, [-1, self.max_captcha, self.char_set_len]), 2)

text_list = sess.run(predict, feed_dict={self.X: [test_image], self.keep_prob: 1.})

predict_text = text_list[0].tolist()

p_text = ""

for p in predict_text:

p_text += str(self.char_set[p])

print("origin: {} predict: {}".format(test_text, p_text))

if test_text == p_text:

right += 1

else:

pass

e = time.time()

rate = str(right/total) + "%"

print("测试结果: {}/{}".format(right, total))

print("{}个样本识别耗时{}秒,准确率{}".format(total, e-s, rate))

def main():

test_image_dir = sample_conf["test_image_dir"]

model_save_dir = sample_conf["model_save_dir"]

char_set = sample_conf["char_set"]

total = 100

tb = TestBatch(test_image_dir, char_set, model_save_dir, total)

tb.test_batch()

if __name__ == '__main__':

main()

程序结果

origin: 4958 predict: 4958

origin: 0409 predict: 0409

origin: 1328 predict: 1228

origin: 6181 predict: 6181

origin: 7017 predict: 7002

origin: 5355 predict: 5355

origin: 1780 predict: 7180

origin: 4122 predict: 4122

测试结果: 46/100

100个样本识别耗时3.113262891769409秒,准确率0.46%

封装识别类

# -*- coding: utf-8 -*-

"""

识别图像的类,为了快速进行多次识别可以调用此类下面的方法:

R = Recognizer(image_height, image_width, max_captcha)

for i in range(10):

r_img = Image.open(str(i) + ".jpg")

t = R.rec_image(r_img)

简单的图片每张基本上可以达到毫秒级的识别速度

"""

import tensorflow as tf

import numpy as np

from PIL import Image

from sample import sample_conf

class Recognizer(object):

def __init__(self, image_height, image_width, max_captcha, char_set, model_save_dir):

self.w_alpha = 0.01

self.b_alpha = 0.1

self.image_height = image_height

self.image_width = image_width

self.max_captcha = max_captcha

self.char_set = char_set

self.char_set_len = len(self.char_set)

self.model_save_dir = model_save_dir

# 新建图和会话

self.g = tf.Graph()

self.sess = tf.Session(graph=self.g)

# 使用指定的图和会话

with self.g.as_default():

# 迭代循环前,写出所有用到的张量的计算表达式,如果写在循环中,会发生内存泄漏,拖慢识别的速度

# tf初始化占位符

self.X = tf.placeholder(tf.float32, [None, self.image_height * self.image_width]) # 特征向量

self.Y = tf.placeholder(tf.float32, [None, self.max_captcha * self.char_set_len]) # 标签

self.keep_prob = tf.placeholder(tf.float32) # dropout值

# 加载网络和模型参数

self.y_predict = self.model()

self.predict = tf.argmax(tf.reshape(self.y_predict, [-1, self.max_captcha, self.char_set_len]), 2)

saver = tf.train.Saver()

with self.sess.as_default() as sess:

saver.restore(sess, self.model_save_dir)

# def __del__(self):

# self.sess.close()

# print("session close")

@staticmethod

def convert2gray(img):

"""

图片转为灰度图,如果是3通道图则计算,单通道图则直接返回

:param img:

:return:

"""

if len(img.shape) > 2:

r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]

gray = 0.2989 * r + 0.5870 * g + 0.1140 * b

return gray

else:

return img

def text2vec(self, text):

"""

转标签为oneHot编码

:param text: str

:return: numpy.array

"""

text_len = len(text)

if text_len > self.max_captcha:

raise ValueError('验证码最长{}个字符'.format(self.max_captcha))

vector = np.zeros(self.max_captcha * self.char_set_len)

for i, ch in enumerate(text):

idx = i * self.char_set_len + self.char_set.index(ch)

vector[idx] = 1

return vector

def model(self):

x = tf.reshape(self.X, shape=[-1, self.image_height, self.image_width, 1])

print(">>> input x: {}".format(x))

# 卷积层1

wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bc1 = tf.Variable(self.b_alpha * tf.random_normal([32]))

conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))

conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

conv1 = tf.nn.dropout(conv1, self.keep_prob)

# 卷积层2

wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bc2 = tf.Variable(self.b_alpha * tf.random_normal([64]))

conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))

conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

conv2 = tf.nn.dropout(conv2, self.keep_prob)

# 卷积层3

wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bc3 = tf.Variable(self.b_alpha * tf.random_normal([128]))

conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))

conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

conv3 = tf.nn.dropout(conv3, self.keep_prob)

print(">>> convolution 3: ", conv3.shape)

next_shape = conv3.shape[1] * conv3.shape[2] * conv3.shape[3]

# 全连接层1

wd1 = tf.get_variable(name='wd1', shape=[next_shape, 1024], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bd1 = tf.Variable(self.b_alpha * tf.random_normal([1024]))

dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])

dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))

dense = tf.nn.dropout(dense, self.keep_prob)

# 全连接层2

wout = tf.get_variable('name', shape=[1024, self.max_captcha * self.char_set_len], dtype=tf.float32,

initializer=tf.contrib.layers.xavier_initializer())

bout = tf.Variable(self.b_alpha * tf.random_normal([self.max_captcha * self.char_set_len]))

y_predict = tf.add(tf.matmul(dense, wout), bout)

return y_predict

def rec_image(self, img):

# 读取图片

img_array = np.array(img)

test_image = self.convert2gray(img_array)

test_image = test_image.flatten() / 255

# 使用指定的图和会话

with self.g.as_default():

with self.sess.as_default() as sess:

text_list = sess.run(self.predict, feed_dict={self.X: [test_image], self.keep_prob: 1.})

# 获取结果

predict_text = text_list[0].tolist()

p_text = ""

for p in predict_text:

p_text += str(self.char_set[p])

# 返回识别结果

return p_text

def main():

image_height = sample_conf["image_height"]

image_width = sample_conf["image_width"]

max_captcha = sample_conf["max_captcha"]

char_set = sample_conf["char_set"]

model_save_dir = sample_conf["model_save_dir"]

R = Recognizer(image_height, image_width, max_captcha, char_set, model_save_dir)

r_img = Image.open("./sample/test/0059_15553933348531582.png")

t = R.rec_image(r_img)

print(t)

if __name__ == '__main__':

main()

使用flask写的提供在线识别功能的接口

# -*- coding: UTF-8 -*-

"""

构建flask接口服务

接收 files={'image_file': ('captcha.jpg', BytesIO(bytes), 'application')} 参数识别验证码

需要配置参数:

image_height = 40

image_width = 80

max_captcha = 4

"""

import json

from io import BytesIO

import os

from recognition_object import Recognizer

import time

from flask import Flask, request, jsonify, Response

from PIL import Image

from sample import sample_conf

# 默认使用CPU

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# 配置参数

image_height = sample_conf["image_height"]

image_width = sample_conf["image_width"]

max_captcha = sample_conf["max_captcha"]

api_image_dir = sample_conf["api_image_dir"]

model_save_dir = sample_conf["model_save_dir"]

image_suffix = sample_conf["image_suffix"] # 文件后缀

char_set = sample_conf["char_set"]

# Flask对象

app = Flask(__name__)

basedir = os.path.abspath(os.path.dirname(__file__))

# 生成识别对象,需要配置参数

R = Recognizer(image_height, image_width, max_captcha, char_set, model_save_dir)

# 如果你需要使用多个模型,可以参照原有的例子配置路由和编写逻辑

# Q = Recognizer(image_height, image_width, max_captcha, char_set, model_save_dir)

def response_headers(content):

resp = Response(content)

resp.headers['Access-Control-Allow-Origin'] = '*'

return resp

@app.route('/b', methods=['POST'])

def up_image():

if request.method == 'POST' and request.files.get('image_file'):

timec = str(time.time()).replace(".", "")

file = request.files.get('image_file')

img = file.read()

img = BytesIO(img)

img = Image.open(img, mode="r")

# username = request.form.get("name")

print("接收图片尺寸: {}".format(img.size))

s = time.time()

value = R.rec_image(img)

e = time.time()

print("识别结果: {}".format(value))

# 保存图片

print("保存图片: {}{}_{}.{}".format(api_image_dir, value, timec, image_suffix))

file_name = "{}_{}.{}".format(value, timec, image_suffix)

file_path = os.path.join(api_image_dir + file_name)

img.save(file_path)

result = {

'time': timec, # 时间戳

'value': value, # 预测的结果

'speed_time(ms)': int((e - s) * 1000) # 识别耗费的时间

}

img.close()

return jsonify(result)

else:

content = json.dumps({"error_code": "1001"})

resp = response_headers(content)

return resp

if __name__ == '__main__':

app.run(debug=True, port=9999)

为了测试这个接口是否工作正常,还得写一个页面

提交

测试结果

{

"speed_time(ms)": 13,

"time": "15553999504148507",

"value": "0069"

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值