识别四位纯数字的卷积神经网络（精确率可达100%）

最新推荐文章于 2023-09-13 21:46:36 发布

ddq呀

最新推荐文章于 2023-09-13 21:46:36 发布

阅读量391

点赞数 3

分类专栏： Python 文章标签： tensorflow

本文链接：https://blog.csdn.net/m0_53465512/article/details/118442460

版权

Python 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

#模型保存
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import random
import datetime

#图片的文件夹路径
images_dir=r"captcha/images3/"
images_files=os.listdir(images_dir)

#划分训练集和测试集
train_files=images_files[:8000]
test_files=images_files[8000:]

#返回one_hot编码
def one_hot_encode(text):
    zeros=np.zeros([40])
    for i,j in enumerate(text):
        k=ord(j)#返回字符串对应的十进制整数
        if k<48 or k>57:
            raise ValueError("字符不匹配")
        k=k-48
        zeros[i*10+k]=1
    return zeros

#返回一对图片和one_hot标签
def return_a_couple_image_label(images_dir,images_files):
    file=random.choice(images_files)
    image=os.path.join(images_dir,file)
    image=cv2.imread(image,cv2.IMREAD_GRAYSCALE)#将文件进行读取并且转化成灰度图
    image=np.reshape(image,[24,72])
    result=np.zeros([24,72],dtype=np.float32)#一定要进行指定类型，否则会类型不匹配，会出现bug
    
    
    #代码的重中之重，可以尝试着去掉这行代码，看看有啥差别！！！可以使模型的大大优化，降低训练的时间，提高精度
    #同样，类型一定得指定，因为图片的原始数据是32位，零矩阵也是32位，dtype得用32位来指定
    #归一化，将image的数据迁移到[4,10]的这个零矩阵里来，范围指定0-1之间
    cv2.normalize(image,result,0,1,cv2.NORM_MINMAX,dtype=cv2.CV_32F)
    
    
    label1=file[0:4]
    label=one_hot_encode(label1)
    return result,label

#返回一个批次的图片和标签
def return_batch_image_label(batch_size,images_dir,images_files):
    images_list=[]
    labels_list=[]
    for i in range(batch_size):
        image,label=return_a_couple_image_label(images_dir,images_files)
        images_list.append(image)
        labels_list.append(label)
        
    return np.array(images_list),np.array(labels_list)
    
#将一个批次的one_hot标签转文本
def return_text(one_hot_label_list):
    total_labels_list=[]
    for i in one_hot_label_list:
        label_text=""
        one_hot_label=np.reshape(i,[4,10])
        label_list=list(np.argmax(one_hot_label,1))
        for item in label_list:
            label_text=label_text+str(item)
        total_labels_list.append(label_text)
    return total_labels_list

x=tf.placeholder(dtype=tf.float32,shape=[None,24,72])
y=tf.placeholder(dtype=tf.float32,shape=[None,40])


#放入卷积的图片格式[batch_size,height,width,channal]
X=tf.reshape(x,[-1,24,72,1])

#卷积层1
# w1=tf.Variable(tf.random_normal(shape=[3,3,1,32]))
# b1=tf.Variable(tf.random_normal(shape=[32]))
w1=tf.Variable(tf.random_normal(shape=[3,3,1,32],stddev=0.1))
b1=tf.Variable(tf.random_normal(shape=[32],stddev=0.1))
conv1=tf.nn.conv2d(input=X,filter=w1,strides=[1,1,1,1],padding="SAME")
conv1=tf.nn.relu(conv1)
conv1=tf.nn.max_pool(value=conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

#卷积层2
# w2=tf.Variable(tf.random_normal(shape=[3,3,32,64]))
# b2=tf.Variable(tf.random_normal(shape=[64]))
w2=tf.Variable(tf.random_normal(shape=[3,3,32,64],stddev=0.1))
b2=tf.Variable(tf.random_normal(shape=[64],stddev=0.1))
conv2=tf.nn.conv2d(input=conv1,filter=w2,strides=[1,1,1,1],padding="SAME")
conv2=tf.nn.relu(conv2)
conv2=tf.nn.max_pool(value=conv2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

#卷积层3
# w3=tf.Variable(tf.random_normal(shape=[3,3,64,64]))
# b3=tf.Variable(tf.random_normal(shape=[64]))
w3=tf.Variable(tf.random_normal(shape=[3,3,64,64],stddev=0.1))
b3=tf.Variable(tf.random_normal(shape=[64],stddev=0.1))
conv3=tf.nn.conv2d(input=conv2,filter=w3,strides=[1,1,1,1],padding="SAME")
conv3=tf.nn.relu(conv3)
conv3=tf.nn.max_pool(value=conv3,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

conv3=tf.reshape(conv3,[-1,3*9*64])

#全连接层1
fw1=tf.Variable(tf.random_normal(shape=[3*9*64,1024]))
fb1=tf.Variable(tf.random_normal(shape=[1024]))
f1=tf.matmul(conv3,fw1)+fb1
f1=tf.nn.dropout(f1,keep_prob=0.5)

#输出层
fw2=tf.Variable(tf.random_normal(shape=[1024,40]))
fb2=tf.Variable(tf.random_normal(shape=[40]))
f2=tf.matmul(f1,fw2)+fb2

#loss值
loss=tf.reduce_mean(
    tf.nn.sigmoid_cross_entropy_with_logits(labels=y,logits=f2)+
    (1e-3)*tf.nn.l2_loss(w1)+
    (1e-3)*tf.nn.l2_loss(w2)+
    (1e-3)*tf.nn.l2_loss(w3)+
    (1e-3)*tf.nn.l2_loss(fw1)+
    (1e-3)*tf.nn.l2_loss(fw2)
)


# loss=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y,logits=f2))

#训练
train_step=tf.train.AdamOptimizer(1e-3).minimize(loss)

Y=tf.reshape(y,[-1,4,10])
predict=tf.reshape(f2,[-1,4,10])

#精确率
accuracy=tf.equal(tf.argmax(Y,2),tf.argmax(predict,2))
accuracy=tf.reduce_mean(tf.cast(accuracy,tf.float32))

#模型保存
saver=tf.train.Saver()

with tf.Session() as sess:
    train_acc_list=[]
    test_acc_list=[]
    Iter_list=[]
    sess.run(tf.global_variables_initializer())
    for i in range(5000):
        x_train_batch,y_train_batch=return_batch_image_label(500,images_dir,train_files)#获得一个批次训练集图片和标签
        x_test_batch,y_test_batch=return_batch_image_label(100,images_dir,test_files)#获得一个批次测试集图片和标签
        
        loss_,step_=sess.run([loss,train_step],feed_dict={x:x_train_batch,y:y_train_batch})
        
        if(i+1)%100==0:
            time=str(datetime.datetime.now()).split(" ")[-1].split(".")[0]
            acc_train=sess.run(accuracy,feed_dict={x:x_train_batch,y:y_train_batch})
            acc_test=sess.run(accuracy,feed_dict={x:x_test_batch,y:y_test_batch})
            
            train_acc_list.append(acc_train)
            test_acc_list.append(acc_test)
            Iter_list.append(i+1)
            
            print(f"Iter{i+1} ,loss:{loss_} ,train_acc:{acc_train} ,test_acc:{acc_test},时间{time}")
            
            if int(acc_train)==1:
                break
    saver.save(sess,".\\Model\\model.ckpt")#模型保存，相对路径,最好用\\的方式保存
#Iter100 ,loss:840.1920776367188 ,train_acc:0.12349999696016312 ,test_acc:0.09000000357627869,时间22:27:40
#Iter200 ,loss:760.3424072265625 ,train_acc:0.16899999976158142 ,test_acc:0.11749999970197678,时间22:28:59
#Iter300 ,loss:684.1693115234375 ,train_acc:0.2815000116825104 ,test_acc:0.20499999821186066,时间22:30:19
#Iter400 ,loss:613.4215698242188 ,train_acc:0.460999995470047 ,test_acc:0.3199999928474426,时间22:31:38
#Iter500 ,loss:548.9471435546875 ,train_acc:0.5855000019073486 ,test_acc:0.39250001311302185,时间22:32:58
#Iter600 ,loss:490.3861999511719 ,train_acc:0.6545000076293945 ,test_acc:0.4449999928474426,时间22:34:17
#Iter700 ,loss:437.27880859375 ,train_acc:0.6779999732971191 ,test_acc:0.4325000047683716,时间22:35:36
#Iter800 ,loss:389.3033752441406 ,train_acc:0.6924999952316284 ,test_acc:0.5525000095367432,时间22:36:55
#Iter900 ,loss:346.01190185546875 ,train_acc:0.7534999847412109 ,test_acc:0.5475000143051147,时间22:38:15
#Iter1000 ,loss:307.07818603515625 ,train_acc:0.7730000019073486 ,test_acc:0.5450000166893005,时间22:39:34
#Iter1100 ,loss:272.1109924316406 ,train_acc:0.8075000047683716 ,test_acc:0.5824999809265137,时间22:40:54
#Iter1200 ,loss:240.7642822265625 ,train_acc:0.8270000219345093 ,test_acc:0.6100000143051147,时间22:42:13
#Iter1300 ,loss:212.68809509277344 ,train_acc:0.8659999966621399 ,test_acc:0.625,时间22:43:33
#Iter1400 ,loss:187.6605224609375 ,train_acc:0.887499988079071 ,test_acc:0.6474999785423279,时间22:44:53
#Iter1500 ,loss:165.28846740722656 ,train_acc:0.9024999737739563 ,test_acc:0.6725000143051147,时间22:46:13
#Iter1600 ,loss:145.39093017578125 ,train_acc:0.9114999771118164 ,test_acc:0.6775000095367432,时间22:47:39
#Iter1700 ,loss:127.68877410888672 ,train_acc:0.940500020980835 ,test_acc:0.6800000071525574,时间22:49:04
#Iter1800 ,loss:111.97460174560547 ,train_acc:0.9434999823570251 ,test_acc:0.699999988079071,时间22:50:29
#Iter1900 ,loss:98.05056762695312 ,train_acc:0.9679999947547913 ,test_acc:0.7074999809265137,时间22:51:53
#Iter2000 ,loss:85.72267150878906 ,train_acc:0.9695000052452087 ,test_acc:0.7074999809265137,时间22:53:17
#Iter2100 ,loss:74.83501434326172 ,train_acc:0.9735000133514404 ,test_acc:0.7174999713897705,时间22:54:41
#Iter2200 ,loss:65.24179077148438 ,train_acc:0.9729999899864197 ,test_acc:0.7250000238418579,时间22:56:05
#Iter2300 ,loss:56.78237533569336 ,train_acc:0.9894999861717224 ,test_acc:0.75,时间22:57:29
#Iter2400 ,loss:49.353450775146484 ,train_acc:0.984499990940094 ,test_acc:0.737500011920929,时间22:58:52
#Iter2500 ,loss:42.83298110961914 ,train_acc:0.9919999837875366 ,test_acc:0.7450000047683716,时间23:00:18
#Iter2600 ,loss:37.114097595214844 ,train_acc:0.9919999837875366 ,test_acc:0.7450000047683716,时间23:01:42
#Iter2700 ,loss:32.11655044555664 ,train_acc:0.9959999918937683 ,test_acc:0.7400000095367432,时间23:03:05
#Iter2800 ,loss:27.755178451538086 ,train_acc:0.9980000257492065 ,test_acc:0.75,时间23:04:29
#Iter2900 ,loss:23.953184127807617 ,train_acc:0.9980000257492065 ,test_acc:0.7400000095367432,时间23:05:54
#Iter3000 ,loss:20.645999908447266 ,train_acc:0.9975000023841858 ,test_acc:0.7475000023841858,时间23:07:17
#Iter3100 ,loss:17.774030685424805 ,train_acc:0.9984999895095825 ,test_acc:0.7475000023841858,时间23:09:47
#Iter3200 ,loss:15.283675193786621 ,train_acc:0.9990000128746033 ,test_acc:0.75,时间23:12:24
#Iter3300 ,loss:13.129268646240234 ,train_acc:0.9984999895095825 ,test_acc:0.7475000023841858,时间23:14:56
#Iter3400 ,loss:11.267122268676758 ,train_acc:0.9994999766349792 ,test_acc:0.7475000023841858,时间23:17:36
#Iter3500 ,loss:9.66270637512207 ,train_acc:0.9994999766349792 ,test_acc:0.75,时间23:20:23
#Iter3600 ,loss:8.279025077819824 ,train_acc:0.9994999766349792 ,test_acc:0.7524999976158142,时间23:23:14
#Iter3700 ,loss:7.090017318725586 ,train_acc:0.9994999766349792 ,test_acc:0.75,时间23:26:13
#Iter3800 ,loss:6.067821502685547 ,train_acc:1.0 ,test_acc:0.75,时间23:29:22

过拟合

#模型导入和使用
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import random
import datetime

#图片的文件夹路径
images_dir=r"captcha/images3/"
images_files=os.listdir(images_dir)

#划分训练集和测试集
train_files=images_files[:8000]
test_files=images_files[8000:]

#返回one_hot编码
def one_hot_encode(text):
    zeros=np.zeros([40])
    for i,j in enumerate(text):
        k=ord(j)#返回字符串对应的十进制整数
        if k<48 or k>57:
            raise ValueError("字符不匹配")
        k=k-48
        zeros[i*10+k]=1
    return zeros

#返回一对图片和one_hot标签
def return_a_couple_image_label(images_dir,images_files):
    file=random.choice(images_files)
    image=os.path.join(images_dir,file)
    image=cv2.imread(image,cv2.IMREAD_GRAYSCALE)#将文件进行读取并且转化成灰度图
    image=np.reshape(image,[24,72])
    result=np.zeros([24,72],dtype=np.float32)#一定要进行指定类型，否则会类型不匹配，会出现bug
    
    
    #代码的重中之重，可以尝试着去掉这行代码，看看有啥差别！！！可以使模型的大大优化，降低训练的时间，提高精度
    #同样，类型一定得指定，因为图片的原始数据是32位，零矩阵也是32位，dtype得用32位来指定
    #归一化，将image的数据迁移到[4,10]的这个零矩阵里来，范围指定0-1之间
    cv2.normalize(image,result,0,1,cv2.NORM_MINMAX,dtype=cv2.CV_32F)
    
    
    label1=file[0:4]
    label=one_hot_encode(label1)
    return result,label

#返回一个批次的图片和标签
def return_batch_image_label(batch_size,images_dir,images_files):
    images_list=[]
    labels_list=[]
    for i in range(batch_size):
        image,label=return_a_couple_image_label(images_dir,images_files)
        images_list.append(image)
        labels_list.append(label)
        
    return np.array(images_list),np.array(labels_list)
    
#将一个批次的one_hot标签转文本
def return_text(one_hot_label_list):
    total_labels_list=[]
    for i in one_hot_label_list:
        label_text=""
        one_hot_label=np.reshape(i,[4,10])
        label_list=list(np.argmax(one_hot_label,1))
        for item in label_list:
            label_text=label_text+str(item)
        total_labels_list.append(label_text)
    return total_labels_list

x=tf.placeholder(dtype=tf.float32,shape=[None,24,72])
y=tf.placeholder(dtype=tf.float32,shape=[None,40])


#放入卷积的图片格式[batch_size,height,width,channal]
X=tf.reshape(x,[-1,24,72,1])

#卷积层1
# w1=tf.Variable(tf.random_normal(shape=[3,3,1,32]))
# b1=tf.Variable(tf.random_normal(shape=[32]))
w1=tf.Variable(tf.random_normal(shape=[3,3,1,32],stddev=0.1))
b1=tf.Variable(tf.random_normal(shape=[32],stddev=0.1))
conv1=tf.nn.conv2d(input=X,filter=w1,strides=[1,1,1,1],padding="SAME")
conv1=tf.nn.relu(conv1)
conv1=tf.nn.max_pool(value=conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

#卷积层2
# w2=tf.Variable(tf.random_normal(shape=[3,3,32,64]))
# b2=tf.Variable(tf.random_normal(shape=[64]))
w2=tf.Variable(tf.random_normal(shape=[3,3,32,64],stddev=0.1))
b2=tf.Variable(tf.random_normal(shape=[64],stddev=0.1))
conv2=tf.nn.conv2d(input=conv1,filter=w2,strides=[1,1,1,1],padding="SAME")
conv2=tf.nn.relu(conv2)
conv2=tf.nn.max_pool(value=conv2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

#卷积层3
# w3=tf.Variable(tf.random_normal(shape=[3,3,64,64]))
# b3=tf.Variable(tf.random_normal(shape=[64]))
w3=tf.Variable(tf.random_normal(shape=[3,3,64,64],stddev=0.1))
b3=tf.Variable(tf.random_normal(shape=[64],stddev=0.1))
conv3=tf.nn.conv2d(input=conv2,filter=w3,strides=[1,1,1,1],padding="SAME")
conv3=tf.nn.relu(conv3)
conv3=tf.nn.max_pool(value=conv3,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

conv3=tf.reshape(conv3,[-1,3*9*64])

#全连接层1
fw1=tf.Variable(tf.random_normal(shape=[3*9*64,1024]))
fb1=tf.Variable(tf.random_normal(shape=[1024]))
f1=tf.matmul(conv3,fw1)+fb1
f1=tf.nn.dropout(f1,keep_prob=0.5)

#输出层
fw2=tf.Variable(tf.random_normal(shape=[1024,40]))
fb2=tf.Variable(tf.random_normal(shape=[40]))
f2=tf.matmul(f1,fw2)+fb2

#loss值
loss=tf.reduce_mean(
    tf.nn.sigmoid_cross_entropy_with_logits(labels=y,logits=f2)+
    (1e-3)*tf.nn.l2_loss(w1)+
    (1e-3)*tf.nn.l2_loss(w2)+
    (1e-3)*tf.nn.l2_loss(w3)+
    (1e-3)*tf.nn.l2_loss(fw1)+
    (1e-3)*tf.nn.l2_loss(fw2)
)


# loss=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y,logits=f2))

#训练
train_step=tf.train.AdamOptimizer(1e-3).minimize(loss)

Y=tf.reshape(y,[-1,4,10])
predict=tf.reshape(f2,[-1,4,10])

#精确率
accuracy=tf.equal(tf.argmax(Y,2),tf.argmax(predict,2))
accuracy=tf.reduce_mean(tf.cast(accuracy,tf.float32))

#模型保存
saver = tf.train.Saver()
with tf.Session() as sess:
    
    saver.restore(sess,".\\Model\\model.ckpt")#模型导入，得用相对路径,最好用\\的方式
    x_train_batch,y_train_batch=return_batch_image_label(10,images_dir,train_files)#获得一个批次训练集图片和标签
    
    
    predict_=sess.run(predict,feed_dict={x:x_train_batch})
    predict_=return_text(predict_)

    for i in range(10):
        print(predict_[i])
        plt.figure()
        plt.imshow(x_train_batch[i])
        plt.show()

效果图

ddq呀

关注

3
点赞
踩
3

收藏

觉得还不错? 一键收藏
6
评论
识别四位纯数字的卷积神经网络（精确率可达100%）

#本人正好在学习图像识别，正好借这个机会，写一个四位纯数字的验证码识别模型，供一起学习的小伙伴参考。#源码如下（有详细的标注，如有不懂，请在下方评论区留言；数据集用的是接近1W张的纯四位数验证码图片，如有需要，请在下方留言）：import tensorflow as tfimport matplotlib.pyplot as pltimport numpy as npimport cv2import osimport random#图片的文件夹路径images_dir=r"captc
复制链接

扫一扫

专栏目录