目录
今天我想和大家分享一些关于深度学习入门的知识。深度学习是人工智能领域的一个重要分支,目前已经被广泛应用于图像识别、自然语言处理、语音识别等领域。如果你对深度学习感兴趣,那么本文可能会对你有所帮助。
在这篇文章中,我们将一起学习如何构建一个简单的深度学习模型,并了解如何使用常用的深度学习框架 ( PyTorch)来编写和运行代码。我们将以规范代码结构开始深度学习,以便初学者可以更好地学习和使用深度学习框架。
希望通过这篇文章,你可以更好地了解深度学习的基本概念和编程技巧,并开始实践编写自己的深度学习模型。
一、导入包以及设置随机种子
使实验结果可以复现
import numpy as np
from tqdm import tqdm
import os
import time
import torch
from torch import nn
from torchvision import models
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import random
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
二、以类的方式定义超参数
统一设置超参数,方便调参
class argparse():
def __init__(self) -> None:
self.captcha_size = 4 # 4位验证码
self.captcha_array = "0123456789abcdefghijklmnopqrstuvwxyz" # 验证码从36位字符中取出
self.batch_size = 512 # 批量大小
self.lr = 0.001 #学习率
self.epochs = 20 # 所有数据被训练的总轮数
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 在支持GPU时使用GPU,否则使用CPU
args = argparse() # 实例化超参数类
三、定义自己的模型
继承自 nn.Module , 定义自己的模型
class Mymodel(nn.Module):
def __init__(self):
super(Mymodel,self).__init__()
self.seq = nn.Sequential(
nn.Conv2d(in_channels=1,out_channels=64,kernel_size=3,padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2), #[6, 64, 30, 80],
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2), #[6, 128, 15, 40]
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2), # [6, 256, 7, 20]
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Flatten()
)
self.layer = nn.Sequential(
nn.Linear(in_features=15360,out_features=4096),
nn.Dropout(0.2), # drop 20% of the neuron
nn.ReLU(),
nn.Linear(in_features=4096, out_features = args.captcha_size*args.captcha_array.__len__())
)
def forward(self, x):
x = self.seq(x)
x = self.layer(x)
return x
也可以从PyTorch中自带的模型中读取,修改网络的输入层和输出层适应目标任务
class myResNet(nn.Module):
def __init__(self) -> None:
super(myResNet,self).__init__()
self.model = models.resnet50(pretrained=False)
self.model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
self.model.fc = nn.Linear(in_features=2048,out_features=args.captcha_size*args.captcha_array.__len__(), bias=True)
def forward(self, x):
x = self.model(x)
return x
五、定义自己的数据集Dataset,DataLoader
以类的形式放置工具函数,独热编码(one-hot)实现文本转向量与向量转文本
class Tools():
# def __init__(self):
# pass
def text2vec(self, text):
# 4行36列
vec = torch.zeros((args.captcha_size, len(args.captcha_array)))
for i in range(len(text)):
vec[i, args.captcha_array.index(text[i])] = 1
return vec
# text2vec('aab1')
def vec2text(self, vec):
vec = torch.argmax(vec, dim = 1)
# print(vec)
text = ''
for v in vec:
text += args.captcha_array[v]
return text
def accuracy(self, y_hat, y): #@save
"""计算预测正确的数量"""
pass
测试工具函数
tls = Tools()
tls.vec2text(tls.text2vec('aab1'))
Output:
'aab1'
定义读取数据集,继承自Dataset,主要实现 __init__()
__getitem__()
__len__()
class My_datasets(Dataset):
def __init__(self, root_dir):
super(My_datasets, self).__init__()
# self.image_path = os.listdir(root_dir)
self.list_image_path = [os.path.join(root_dir, image_path) for image_path in os.listdir(root_dir)]
self.transforms = transforms.Compose(
[
transforms.Resize((60,160)),
transforms.Grayscale(),
transforms.ToTensor()
]
)
# print(self.list_image_path)
def __getitem__(self, index) :
image_path = self.list_image_path[index]
# print(image_path)
img_ = Image.open(image_path)
image_name = image_path.split('/')[-1]
img_tensor = self.transforms(img_)
# img_.show()
img_label = image_name.split('_')[0]
img_label = tls.text2vec(img_label)
img_label = img_label.view(1, -1)[0]
return img_tensor, img_label
# return super().__getitem__(index)
def __len__(self):
return self.list_image_path.__len__()
六、定义训练函数
def train_original():
train_path = r"./dataset/train/"
test_path = r"./dataset/test/"
train_dataset = My_datasets(train_path)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
val_dataset = My_datasets(test_path)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=True)
loss_fn=nn.MultiLabelSoftMarginLoss().to(args.device)
model = myResNet().to(args.device)
optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
total_step=0
save_step = 100
for epoch in range(args.epochs):
min_loss = 100000
for i,(imgs,targets) in enumerate(train_dataloader):
imgs=imgs.to(args.device)
targets=targets.to(args.device)
# print(imgs.shape)
# print(targets.shape)
outputs=model(imgs)
# print(outputs.shape)
loss = loss_fn(outputs, targets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if loss < min_loss:
min_loss = loss
total_step += 1
if total_step % save_step == 0:
save_step = max(save_step - 20 ,10)
print("save model {}".format(total_step))
torch.save(model.state_dict(),"afterResNetmodel.pth")
print("epoch{}, loss:{}".format(epoch, min_loss.item()))
train_original()
七、测试保存的最优模型的识别正确率以及对单张的验证码识别
定义测试函数和单张预测函数
def test_pred():
m = myResNet()
# m.load_state_dict(torch.load("best_model.pth",map_location = args.device))
m.load_state_dict(torch.load("afterResNetmodel.pth",map_location = args.device), strict=False)
m.to(args.device)
m.eval()
test_data = My_datasets("./dataset/test/")
test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
test_length = test_data.__len__()
correct = 0;
for i, (imgs, lables) in enumerate(test_dataloader):
imgs = imgs.to(args.device)
lables = lables.to(args.device)
lables = lables.view(-1, args.captcha_array.__len__())
lables_text = tls.vec2text(lables)
predict_outputs = m(imgs)
predict_outputs = predict_outputs.view(-1, args.captcha_array.__len__())
predict_labels = tls.vec2text(predict_outputs)
if predict_labels == lables_text:
correct += 1
# print("预测正确:正确值:{},预测值:{}".format(lables_text, predict_labels))
else:
print("预测失败:正确值:{},预测值:{}".format(lables_text, predict_labels))
# m(imgs)
print("正确率{}".format(correct / test_length * 100))
def pred_pic(pic_path):
img=Image.open(pic_path)
tersor_img=transforms.Compose([
transforms.Grayscale(),
transforms.Resize((60,160)),
transforms.ToTensor()
])
img=tersor_img(img).to(args.device)
# print(img.shape)
img=torch.reshape(img,(-1,1,60,160))
# print(img.shape)
m = myResNet()
m.load_state_dict(torch.load("afterResNetmodel.pth",map_location = args.device), strict=False)
m.to(args.device)
m.eval()
outputs = m(img)
outputs=outputs.view(-1,len(args.captcha_array))
outputs_lable=tls.vec2text(outputs)
print(outputs_lable)
测试
test_pred()
Output:
预测失败:正确值:jbgl,预测值:ibgl
预测失败:正确值:w164,预测值:wz64
预测失败:正确值:g80u,预测值:880u
预测失败:正确值:t9eo,预测值:t980
预测失败:正确值:jln3,预测值:jln9
预测失败:正确值:k0g5,预测值:kog5
预测失败:正确值:etni,预测值:etnd
预测失败:正确值:oqhx,预测值:oqhh
预测失败:正确值:q045,预测值:qo45
预测失败:正确值:pdhb,预测值:pdbb
预测失败:正确值:17ig,预测值:12ig
预测失败:正确值:sz4c,预测值:5z4c
正确率94.0
今天的教程学习已经全部结束,我很高兴能够与大家分享这些内容。我希望能够为大家提供有用的信息和帮助,以便大家更好地理解深度学习和神经网络的知识。
如果您有任何疑问或建议,请随时与我联系。