(图像处理)课上小作业05
1.软件效果
2.模式识别模型
(1)LeNet结构
(2)代码实现
import torch.nn as nn
import torch.nn.functional as F
class LeNet(nn.Module):
# 初始化方法,定义整个网络的结构
def __init__(self):
# 解决多继承环境下的父类函数调用问题
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(3, 16, 5)# 输入参数:输入深度in_channels、输出深度out_channels、kernel_size
self.pool1 = nn.MaxPool2d(2, 2)# 输入参数:kernel_size、stride
self.conv2 = nn.Conv2d(16, 32, 5)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(32*5*5, 120) # 全连接层的输入需要是一个一维向量,所以要把最后一次池化的结果展平32*5*5,第二个参数是全连接层的输出长度
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 26) # 分类数量
# 定义正向传播的过程,x是一个Tensor,通道排序为:[batch,channel,height,width]
def forward(self, x):
x = F.relu(self.conv1(x)) # input(3, 32, 32) output(16, 28, 28) # 有一个卷积后图像大小的计算公式
x = self.pool1(x) # output(16, 14, 14) # 池化不会改变图像的深度
x = F.relu(self.conv2(x)) # output(32, 10, 10)
x = self.pool2(x) # output(32, 5, 5)
x = x.view(-1, 32*5*5) # output(32*5*5) # view将x展平为一维
x = F.relu(self.fc1(x)) # output(120)
x = F.relu(self.fc2(x)) # output(84)
x = self.fc3(x) # output(10)
return x
3.数据准备
(1)制作训练数据集
-
将图片按类别分开,并以类别命名文件夹
-
将软件采集到的图像保存到相应文件夹中
(2)为数据集制作label
import os
train_txt_path = os.path.join("dataset", "train", "train.txt")
train_dir = os.path.join("dataset", "train")
valid_txt_path = os.path.join("dataset", "val", "test.txt")
valid_dir = os.path.join("dataset", "val")
letter_index = {
'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10,
'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21,
'W': 22, 'X': 23, 'Y': 24, 'Z': 25
}
def gen_txt(txt_path, img_dir):
f = open(txt_path, 'w')
for root, s_dirs, _ in os.walk(img_dir, topdown=True): # 获取 train文件下各文件夹名称
for sub_dir in s_dirs:
i_dir = os.path.join(root, sub_dir) # 获取各类的文件夹 绝对路径
img_list = os.listdir(i_dir) # 获取类别文件夹下所有png图片的路径
for i in range(len(img_list)):
if not img_list[i].endswith('png'): # 若不是png文件,跳过
continue
label = img_list[i].split('_')[0]
# print(label)
img_path = os.path.join(i_dir, img_list[i])
line = img_path + ' ' + str(letter_index[label]) + '\n'
f.write(line)
f.close()
if __name__ == '__main__':
gen_txt(train_txt_path, train_dir)
gen_txt(valid_txt_path, valid_dir)
4.训练
加载训练集和验证集并开始训练
关键代码
train_transform = transforms.Compose(
[
transforms.RandomHorizontalFlip(), # 随机旋转图片
transforms.ToTensor(),
transforms.Resize((32, 32)), # 将图片尺寸resize到32x32
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
val_transform = transforms.Compose(
[
transforms.ToTensor(),
transforms.Resize((32, 32)), # 将图片尺寸resize到32x32
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
train_data = MyDataset('./dataset/train/train.txt', transform=train_transform)
test_data = MyDataset('./dataset/val/test.txt', transform=val_transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=8,
shuffle=True, num_workers=0) # shuffle表示数据是否需要打乱
val_loader = torch.utils.data.DataLoader(test_data, batch_size=4,
shuffle=False, num_workers=0)
5.预测
关键代码
- (1)加载模型
# 加载模型
self.net = LeNet()
self.net.load_state_dict(torch.load('letters_Lenet.pth'))
self.transform = transforms.Compose(
[transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
- (2)获取画板上的图像并给出分类结果
def on_btn_Classify_Clicked(self):
# 1.读取画板上的图片
qimage = self.__paintBoard.GetContentAsQImage()
image = ImageQt.fromqimage(qimage)
im = self.transform(image) # [C, H, W]
im = torch.unsqueeze(im, dim=0)
# 2.喂入模型,给出分类结果
try:
with torch.no_grad():
outputs = self.net(im)
predict = torch.max(outputs, dim=1)[1].data.numpy()
# print("predict: ",predict)
print("final: ", predict)
print(self.letter_index[predict[0]])
self.__lb_label.setText(self.letter_index[predict[0]])
except Exception as e:
print(e)