文章目录
- 前言
- 效果预览
- 数据集介绍
- 模型介绍(ResNet18)
- 读取数据
- 训练模型代码
- 源代码下载
手写汉字识别
b站地址:https://www.bilibili.com/video/BV1384y1P76m/?vd_source=65a01bd1c4223f2aede873e40c0cdb3e
前言
本次实验的任务是汉字识别。使用pytorch深度学习框架和opencv在HWDB手写汉字数据集进行实验。由于数据集过于庞大,这里只选取了前1311个类作为实验。
效果预览
数据集介绍
HWDB是一个手写汉字数据集,该数据集来自于中科院自动化研究所,一共有三个版本,分别为HWDB1.0、HWDB1.1和HWDB1.2。
本文使用的数据集共有1311种汉字,大概共有几十万张图片,其中20%的图片用于验证,80%的图片用于训练。图片的格式为png,下图为部分数据集图片。
模型介绍(ResNet18)
resnet18的结构图如下所示:
pytorch内部自带resnet18模型,不过原始的模型最后的分类数为1000,而本文的汉字类别数为1311,所以需要修改模型的最后一层全连接层,代码如下所示:
device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#加载resnet18模型
net=models.resnet18(pretrained=False)
net.conv1=nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
#修改模型最后一层
net.fc=nn.Linear(in_features=512, out_features=1311, bias=True)
net=net.to(device)
读取数据
使用dataset读取数据代码如下:
from torch.utils.data import DataLoader,Dataset
import cv2
import numpy as np
import torch
import imgaug.augmenters as iaa
import random
#读取训练图片类
class Mydataset(Dataset):
def __init__(self,lines,train=True):
super(Mydataset, self).__init__()
#储存图像所有路径
self.lines=lines
self.train=train
def __getitem__(self, item):
"""读取图像,并转换成rgb格式"""
#图片路径
img_path=self.lines[item].strip().split()[0]
#图片标签
img_lab=self.lines[item].strip().split()[1]
img=cv2.imread(img_path)[...,::-1]
# 图像标签转换成整数
img_lab = int(img_lab)
#数据增强
if self.train:
img=self.get_random_data(img)
else:img=cv2.resize(img,(64,64))
#灰度化
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
#进行二值化
_,img=cv2.threshold(img,0,255,cv2.THRESH_OTSU)
img=255-img
#显示图像
# cv2.imshow('img',img)
# cv2.waitKey(0)
"""数据归一化,并在添加一个维度"""
img=np.expand_dims(img,axis=0)/255
img=img.astype('float32')
return img,img_lab
def __len__(self):
#返回训练图片数量
return len(self.lines)
def get_random_data(self,img):
"""随机增强图像"""
seq = iaa.Sequential([
iaa.Multiply((0.8, 1.5)), # change brightness, doesn't affect BBs(bounding boxes)
iaa.GaussianBlur(sigma=(0, 1.0)), # 标准差为0到3之间的值
iaa.Crop(percent=(0, 0.06)),
iaa.Grayscale(alpha=(0, 1)),
iaa.Affine(
scale=(0.9, 1.), # 尺度变换
rotate=(-20, 20),
cval=(250),
mode='constant'),
iaa.Resize(64)
])
img=seq.augment(image=img)
return img
if __name__ == '__main__':
lines=open('data.txt','r').readlines()
mydata=Mydataset(lines=lines)
myloader=DataLoader(mydata,batch_size=3,shuffle=True)
for i,j in myloader:
print(i.shape,j)
训练模型代码
import torch.nn as nn
import torchvision.models as models
import torch
import random
import torch.optim as optim
from dataset import Mydataset
from torch.utils.data import DataLoader
from tqdm import tqdm
#获取学习率函数
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
#计算准确率函数
def metric_func(pred,lab):
_,index=torch.max(pred,dim=-1)
acc=torch.where(index==lab,1.,0.).mean()
return acc
device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#加载resnet18模型
net=models.resnet18(pretrained=False)
net.conv1=nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
#修改模型最后一层
net.fc=nn.Linear(in_features=512, out_features=1311, bias=True)
net=net.to(device)
#划分训练和验证比例
rate=0.2
"""读取所有训练图像路径,并划分成训练集和验证集"""
lines=open('data.txt','r').readlines()[:2618]
val_lines=random.sample(lines,k=int(len(lines)*rate))
train_lines=list(set(lines)-set(val_lines))
#学习率
lr = 2e-3
#设置batchsize
batch_size = 40
num_train = len(train_lines)
epoch_step = num_train // batch_size
#设置损失函数
loss_fun = nn.CrossEntropyLoss()
#设置优化器
optimizer = optim.Adam(net.parameters(), lr=lr, betas=(0.5, 0.999))
#学习率衰减
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)
"""迭代读取训练数据"""
train_data=Mydataset(train_lines,train=True)
val_data=Mydataset(val_lines,train=False)
train_loader = DataLoader(dataset=train_data,batch_size=batch_size,shuffle=True)
val_loader = DataLoader(dataset=val_data,batch_size=batch_size,shuffle=False)
if __name__ == '__main__':
#设置迭代次数200次
Epoch=50
epoch_step = num_train // batch_size
for epoch in range(1, Epoch + 1):
net.train()
total_loss = 0
loss_sum = 0.0
with tqdm(total=epoch_step, desc=f'Epoch {epoch}/{Epoch}', postfix=dict, mininterval=0.3) as pbar:
for step, (features, labels) in enumerate(train_loader, 1):
features = features.to(device)
labels = labels.to(device)
batch_size = labels.size()[0]
optimizer.zero_grad()
out = net(features)
loss = loss_fun(out, labels)
loss.backward()
optimizer.step()
total_loss += loss
pbar.set_postfix(**{'loss': total_loss.item() / (step),
'lr': get_lr(optimizer)})
pbar.update(1)
# 验证
net.eval()
acc_sum = 0
for val_step, (features, labels) in enumerate(val_loader, 1):
with torch.no_grad():
features = features.to(device)
labels = labels.to(device)
predictions = net(features)
val_metric = metric_func(predictions, labels)
acc_sum += val_metric.item()
print('val_acc=%.4f' % (acc_sum / val_step))
#保存模型
if (epoch) % 1 == 0:
torch.save(net.state_dict(), 'logs/Epoch%d-Loss%.4f_.pth' % (
epoch, total_loss / (epoch_step + 1)))
lr_scheduler.step()
训练日志如下所示,验证集准确率可以达到0.95以上
源代码下载
项目目录如下所示:
整体项目下载:https://gitee.com/mqwdasddqw/project-download-address