代码仓库
1.验证码数据集的生成
利用python的captcha库,可以帮助我们生成许多验证码,我们要先创建一个list,里面包括0-9的数字和26个英文字母,然后随机从里面生成验证码图片。我们可以直接把验证码图片的对应字符作为图片的命名,图片文件命名加上time后缀是为了防止重复文件。程序设置每个验证码图片包含4个字符,字符都来自我们设置的list,程序中加入了进度条方便查看进度。
import random
import time
from captcha.image import ImageCaptcha
from tqdm import tqdm
captcha_array = list("0123456789abcdefghijklmnopqrstuvwxyz")
captcha_size = 4
if __name__ == '__main__':
image = ImageCaptcha()
num=100000
with tqdm(total = num, desc = f'making progress') as pbar:
for i in range(num):
image_val = "".join(random.sample(captcha_array, 4))
image_name = "./data/train/{}_{}.png".format(image_val, int(time.time()))
image.write(image_val, image_name)
pbar.update(1)
整个深度学习项目的文件组织架构如下图
除了生成训练集到test文件夹下,我们还可以生成一个测试集到test文件夹中
2.数据集加载和编码
我们要从数据集中每个图片文件中提取对应的验证码结果,同时还要对于模型的输出进行向量的编码转换,才能得到结果,下面是用到的对应函数和加载数据集的方式
captcha_array=list("0123456789abcdefghijklmnopqrstuvwxyz")
captcha_size=4
def texttovec(text):
vectors=torch.zeros((captcha_size,captcha_array.__len__()))
for i in range(len(text)):
vectors[i,captcha_array.index(text[i])]=1
return vectors
def vectotext(vec):
vec=torch.argmax(vec,dim=1)
text_label=""
for v in vec:
text_label+=captcha_array[v]
return text_label
class datasets(Dataset):
def __init__(self,root_dir):
super(datasets, self).__init__()
self.list_image_path=[ os.path.join(root_dir,image_name) for image_name in os.listdir(root_dir)]
self.transforms=transforms.Compose([
transforms.Resize((60,160)),
transforms.ToTensor(),
transforms.Grayscale()
])
def __getitem__(self, index):
image_path = self.list_image_path[index]
img_ = Image.open(image_path)
img_tesor=self.transforms(img_)
image_name=image_path.split("\\")[-1]
img_lable=image_name.split("_")[0]
img_lable=texttovec(img_lable)
img_lable=img_lable.view(1,-1)[0]
return img_tesor,img_lable
def __len__(self):
return self.list_image_path.__len__()
3模型构建
卷积神经网络是一种深度学习模型,核心思想是局部感知和权值共享,这使得它能够有效地捕捉图像的局部特征,同时减少了模型的参数量,降低了过拟合的风险。我们通过卷积神经网络,把一个60*160的图片经过多次卷积全连接加池化之后,变为一个维度为15360的张量,然后我们在由线性变换降低到36维,实现对应字符的检测输出
具体流程如下图
模型代码如下
class model(nn.Module):
def __init__(self):
super(model, self).__init__()
self.layer1=nn.Sequential(
nn.Conv2d(in_channels=1,out_channels=64,kernel_size=3,padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)#[64, 30, 80]
)
self.layer2=nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2)#[128, 15, 40]
)
self.layer3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2) # [256, 7, 20]
)
self.layer4 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2) # [512, 3, 10]
)
self.layer5 = nn.Sequential(
nn.Flatten(),#[15360]
nn.Linear(in_features=15360,out_features=4096),
nn.Dropout(0.2),
nn.ReLU(),
nn.Linear(in_features=4096,out_features=captcha_size*captcha_array.__len__())
)
def forward(self,x):
x=self.layer1(x)
x=self.layer2(x)
x=self.layer3(x)
x=self.layer4(x)
x=self.layer5(x)
return x
4.训练思路
既然有独立的训练集和测试集,我们可以每进行完一轮训练之后就进行一次测试,测试模型准确率,同时对于准确率和每一轮训练中的loss,都进行保留,醉后绘制图像观察最优。我们还可以加入设置提前结束条件,如准确率达到99%就保存模型然后结束
if __name__ == '__main__':
train_datas=datasets("./data/train")
train_dataloader=DataLoader(train_datas,batch_size=128,shuffle=True)
test_data = datasets("./data/test")
test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
test_length = test_data.__len__()
m=model().cuda()
lossfunc=nn.MultiLabelSoftMarginLoss().cuda()
optimizer = torch.optim.Adam(m.parameters(), lr=0.001)
totalstep=0
modelpath="./model/1.pth"
epochnum=20
loss_history = []
accuracy_history = []
start_time = time.time()
for epoch in range(epochnum):
print("epoch:{}".format(epoch+1))
for i,(imgs,targets) in enumerate(train_dataloader):
imgs=imgs.cuda()
targets=targets.cuda()
outputs=m(imgs)
loss = lossfunc(outputs, targets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
totalstep+=1
if totalstep%100==0:
print("times:{} loss:{:.6f}".format(totalstep, loss.item()))
if epoch>=0:
correct = 0
with torch.no_grad():
for i, (imgs, lables) in enumerate(test_dataloader):
imgs = imgs.cuda()
lables = lables.cuda()
lables = lables.view(-1, captcha_array.__len__())
lables_text = vectotext(lables)
predict_outputs = m(imgs)
predict_outputs = predict_outputs.view(-1, captcha_array.__len__())
predict_labels = vectotext(predict_outputs)
if predict_labels == lables_text:
correct += 1
accuracy = correct/test_length
print("test data size:{} | test accuracy: {:.2%}".format(test_length, accuracy))
loss_history.append(loss.item())
accuracy_history.append(accuracy)
if loss.item()<0.001 or accuracy>0.99:
break
end_time = time.time()
training_time = end_time - start_time
torch.save(m, modelpath)
if epoch+1==epochnum:
print("\nend training | spend time: {:.3f} seconds".format(training_time))
print("loss:{:.6f} accuracy:{:.2%}".format(loss.item(), accuracy))
else:
print("\nfinish training | spend time: {:.3f} seconds".format(training_time))
print("epoch:{} loss:{:.6f} accuracy:{:.2%}".format(epoch+1, loss.item(), accuracy))
print("model save to "+modelpath)
plt.figure()
plt.plot(loss_history)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('training loss')
plt.show()
plt.figure()
plt.plot(accuracy_history)
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('test accuracy')
plt.show()
5.单图片检测
对于训练好的模型,可以单独写一个程序,单个图片检测并且查看检测效果,利用matplotlib库绘制图像并且打上识别标签
import torch
from torch import nn
from torch.utils.data import DataLoader
import os
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
import matplotlib.pyplot as plt
captcha_array=list("0123456789abcdefghijklmnopqrstuvwxyz")
captcha_size=4
def texttovec(text):
vectors=torch.zeros((captcha_size,captcha_array.__len__()))
for i in range(len(text)):
vectors[i,captcha_array.index(text[i])]=1
return vectors
def vectotext(vec):
vec=torch.argmax(vec,dim=1)
text_label=""
for v in vec:
text_label+=captcha_array[v]
return text_label
class datasets(Dataset):
def __init__(self,root_dir):
super(datasets, self).__init__()
self.list_image_path=[ os.path.join(root_dir,image_name) for image_name in os.listdir(root_dir)]
self.transforms=transforms.Compose([
transforms.Resize((60,160)),
transforms.ToTensor(),
transforms.Grayscale()
])
def __getitem__(self, index):
image_path = self.list_image_path[index]
img_ = Image.open(image_path)
img_tesor=self.transforms(img_)
image_name=image_path.split("\\")[-1]
img_lable=image_name.split("_")[0]
img_lable=texttovec(img_lable)
img_lable=img_lable.view(1,-1)[0]
return img_tesor,img_lable
def __len__(self):
return self.list_image_path.__len__()
class model(nn.Module):
def __init__(self):
super(model, self).__init__()
self.layer1=nn.Sequential(
nn.Conv2d(in_channels=1,out_channels=64,kernel_size=3,padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.layer2=nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.layer4 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.layer5 = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=15360,out_features=4096),
nn.Dropout(0.2),
nn.ReLU(),
nn.Linear(in_features=4096,out_features=captcha_size*captcha_array.__len__())
)
def forward(self,x):
x=self.layer1(x)
x=self.layer2(x)
x=self.layer3(x)
x=self.layer4(x)
x=self.layer5(x)
return x
def picture(pic_path):
img=Image.open(pic_path)
plt.imshow(img)
tersor_img=transforms.Compose([
transforms.Grayscale(),
transforms.Resize((60,160)),
transforms.ToTensor()
])
img=tersor_img(img).cuda()
img=torch.reshape(img,(-1,1,60,160))
m = torch.load(modelpath).cuda()
outputs = m(img)
outputs=outputs.view(-1,len(captcha_array))
outputs_lable=vectotext(outputs)
plt.title("model predict:{}".format(outputs_lable))
plt.axis('off')
plt.show()
if __name__ == '__main__':
modelpath = "./model/4.pth"
picture("./data/show/0dna_1692430779.png")
效果展示: 模型在10w数据50轮训练的情况下准确率接近百分之99%