本文着重于pytorch代码构建网络,理论叙述暂不涉及。
在深度学习发展早期,有一些研究者,他们认为特征本身也应该由学习得来。他们还相信,为了表征足够复杂的输入,特征本身应该分级表示。持这一想法的研究者相信,多层神经网络可能可以学得数据的多级表征,并逐级表示越来越抽象的概念或模式。以图像分类为例,并回忆物体边缘检测的例子。在多层神经网络中,图像的第一级的表示可以是在特定的位置和⻆度是否出现边缘;而第二级的表示说不定能够将这些边缘组合出有趣的模式,如花纹;在第三级的表示中,也许上一级的花纹能进一步汇合成对应物体特定部位的模式。这样逐级表示下去,最终,模型能够较容易根据最后一级的表示完成分类任务。需要强调的是,输入的逐级表示由多层模型中的参数决定,而这些参数都是学出来的
环境信息:
windows11
torch 2.1.0
cuda 121
python 3.9
数据集下载
开始搭建网络前先下载数据。
本网络使用花分类数据集进行训练,下载地址: https://pan.baidu.com/s/1pBh6tqnp7qtdd1WfjViy-Q 提取码: dj8v
解压后会得到如下文件:
-flower_photos
-----daisy
-----dandelion
-----roses
-----sunflowers
-----tulips
网络结构
AlexNet网络结构如下图:
nn.Conv2d
展示一下nn.Conv2d是如何计算的:
首先,他需要输入一个四维的张量,在具体的数据中表现为(batch, channel, w, h),下面构建一个数值为1~49的4维张量来进行测试,kernel设置为3*3的单位矩阵,并把它拉伸成4维
import torch.nn as nn
import torch
img = torch.arange(49, dtype=torch.float32).view(1, 1, 7, 7)
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)
k = torch.tensor([[[[1, 1, 1], [1, 1, 1], [1, 1, 1]]]], dtype=torch.float32)
conv.weight.data = k
img_2 = conv(img)
print(img)
print(img_2)
print(conv.weight)
下面是打印结果:
tensor([[[[ 0., 1., 2., 3., 4., 5., 6.],
[ 7., 8., 9., 10., 11., 12., 13.],
[14., 15., 16., 17., 18., 19., 20.],
[21., 22., 23., 24., 25., 26., 27.],
[28., 29., 30., 31., 32., 33., 34.],
[35., 36., 37., 38., 39., 40., 41.],
[42., 43., 44., 45., 46., 47., 48.]]]])
tensor([[[[ 71.9927, 80.9927, 89.9927, 98.9927, 107.9927],
[134.9927, 143.9927, 152.9927, 161.9927, 170.9927],
[197.9927, 206.9927, 215.9927, 224.9927, 233.9927],
[260.9927, 269.9927, 278.9927, 287.9927, 296.9927],
[323.9927, 332.9927, 341.9927, 350.9927, 359.9927]]]],
grad_fn=<ConvolutionBackward0>)
Parameter containing:
tensor([[[[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]]]], requires_grad=True)
代码构建
代码分为三个部分:
-
make_dataset.py——制作数据集代码
-
model.py——网络结构文件
-
train.py——训练代码
-
detect.py——检测代码
make_dataset.py
主要用来划分训练集和测试集, 注意修改main函数中的data_root路径,指向自己保存花分类数据的路径。运行后会在源数据同级目录下生成flower_data文件夹,里面包含了train和val两个文件夹。
import os
from shutil import copy, rmtree
import random
def mk_file(file_path: str):
if os.path.exists(file_path):
# 如果文件夹存在,则先删除原文件夹在重新创建
rmtree(file_path)
os.makedirs(file_path)
def main():
# 保证随机可复现
random.seed(0)
# 将数据集中10%的数据划分到验证集中
split_rate = 0.1
data_root = r'D:\Codes\AlexNet\cls_datas'
origin_flower_path = os.path.join(data_root, "flower_photos") # 搜索花分类数据的路径
assert os.path.exists(origin_flower_path), "path '{}' does not exist.".format(origin_flower_path)
flower_class = [cla for cla in os.listdir(origin_flower_path)
if os.path.isdir(os.path.join(origin_flower_path, cla))]
# 建立保存训练集的文件夹
train_root = os.path.join(data_root, "train")
mk_file(train_root)
for cla in flower_class:
# 建立每个类别对应的文件夹
mk_file(os.path.join(train_root, cla))
# 建立保存验证集的文件夹
val_root = os.path.join(data_root, "val")
mk_file(val_root)
for cla in flower_class:
# 建立每个类别对应的文件夹
mk_file(os.path.join(val_root, cla))
for cla in flower_class:
cla_path = os.path.join(origin_flower_path, cla)
images = os.listdir(cla_path)
num = len(images)
# 随机采样验证集的索引
eval_index = random.sample(images, k=int(num*split_rate))
for index, image in enumerate(images):
if image in eval_index:
# 将分配至验证集中的文件复制到相应目录
image_path = os.path.join(cla_path, image)
new_path = os.path.join(val_root, cla)
copy(image_path, new_path)
else:
# 将分配至训练集中的文件复制到相应目录
image_path = os.path.join(cla_path, image)
new_path = os.path.join(train_root, cla)
copy(image_path, new_path)
print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="") # processing bar
print()
print("processing done!")
if __name__ == '__main__':
main()
model.py
import torch
import torch.nn as nn
class AlexNet(nn.Module):
def __init__(self, num_classes=10):
super(AlexNet, self).__init__()
self.backbone = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), # [55, 55, 96]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # [27,27,96]
nn.Conv2d(96, 256, kernel_size=5, padding=2), # [27,27,256]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # [13,13,256]
nn.Conv2d(256, 384, kernel_size=3, padding=1), # [13,13,384]
nn.ReLU(inplace=True),
nn.Conv2d(384, 384, kernel_size=3, padding=1), # [13, 13, 256]
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1), # [13, 13, 256]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # [6,6,256]
)
self.fn = nn.Sequential(
nn.Linear(256 * 6 * 6, 2048),
nn.Dropout(0.5),
nn.Linear(2048, 2048),
nn.Dropout(0.5),
nn.Linear(2048, num_classes),
)
def forward(self, x):
x = self.backbone(x)
x = torch.flatten(x, start_dim=1)
x = self.fn(x)
return x
if __name__ == '__main__':
net = AlexNet()
input_x = torch.randn((1, 3, 224, 224))
print(net(input_x))
其中,inplace=True表示在原张量上进行运算,效果相当于是C++的引用,可以节约内存。
运行model.py,输出为:(每次结果不一样,因为权重初始化是随机的)
tensor([[ 0.0631, 0.0111, 0.0092, 0.0011, 0.0013, 0.0274, 0.0301, -0.0282,
0.0143, -0.0019]], grad_fn=<AddmmBackward0>)
train.py
from model import AlexNet
from torchvision import transforms, datasets, utils
from tqdm import tqdm
import torch.nn as nn
import torch
import argparse
import sys
def run(opt):
# 数据读取
data_transfom = {
'train': transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
'val': transforms.Compose([transforms.RandomResizedCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
}
train_dataset = datasets.ImageFolder(root=opt.data + '/train', transform=data_transfom['train'])
val_dataset = datasets.ImageFolder(root=opt.data + '/val', transform=data_transfom['val'])
train_data_num = len(train_dataset)
val_data_num = len(val_dataset)
train_data_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=opt.batch_size,
shuffle=True,
num_workers=4)
val_data_loader = torch.utils.data.DataLoader(val_dataset,
batch_size=opt.batch_size,
shuffle=False,
num_workers=4)
print(f'==> train data number: {train_data_num}, value data number: {val_data_num}')
# 构建网络
device = opt.device
model = AlexNet(num_classes=5)
model.to(device)
print(f'==> use {device}')
save_path = './alexnet.pt'
loss_function = nn.CrossEntropyLoss()
running_loss = 0.0
best_acc = 0.0
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)
for epoch in range(opt.epochs):
model.train()
train_bar = tqdm(train_data_loader, file=sys.stdout)
for step, (images, labels) in enumerate(train_bar):
optimizer.zero_grad()
outputs = model(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, opt.epochs, loss)
# 模型验证
model.eval()
acc = 0.0
with torch.no_grad():
val_bar = tqdm(val_data_loader, file=sys.stdout)
for val_data in val_bar:
val_imgs, val_labels = val_data
outputs = model(val_imgs.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_data_num
print('[epoch %d] val_accuracy: %.3f' % (epoch + 1, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(model.state_dict(), save_path)
print('==> Finished Training!')
def get_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--data', type=str, default='../datas/flower_data', help='train data source')
parser.add_argument('--device', type=str, default='cuda:0', help='training device')
parser.add_argument('--batch-size', dest='batch_size', type=int, default=32)
parser.add_argument('--epochs', type=int, default=10, help='total training epochs')
opt = parser.parse_args()
return opt
if __name__ == '__main__':
opt = get_opt()
run(opt)
打开终端运行 python train.py --data …/datas/flower_data --epochs 20 --batch-size 64即可运行,默认是使用gpu,如果没有gpu,运行命令需要改成 python train.py --data …/datas/flower_data --epochs 20 --batch-size 64 --device cpu
最后,可以看到在运行目录下,生成了模型文件alexnet.pt。
predict.py
import torch
from torchvision import transforms
import argparse
from PIL import Image
import matplotlib.pyplot as plt
from model import AlexNet
def run(opt):
device = opt.device
data_transform = transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
img0 = Image.open(opt.img)
img = data_transform(img0)
img = torch.unsqueeze(img, dim=0).to(device) # 添加一个维度
# 加载模型
model = AlexNet(num_classes=5).to(device)
model.load_state_dict(torch.load(opt.weights))
model.eval() # 评估模式,只正向传播
with torch.no_grad():
output = torch.squeeze(model(img)).cpu()
output = torch.softmax(output, dim=0)
conf = torch.max(output).numpy()
cls = torch.argmax(output).numpy()
classes = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
print_res = 'class: {} confidence: {}'.format(classes[cls], conf)
print(print_res)
plt.imshow(img0)
plt.title(print_res)
plt.show()
def get_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--img', type=str, default=r'D:\Codes\DLNet\sunflower1.jpg', help='train data source')
parser.add_argument('--device', type=str, default='cuda:0', help='training device')
parser.add_argument('--weights', type=str, default='alexnet.pt', help='training device')
opt = parser.parse_args()
return opt
if __name__ == '__main__':
opt = get_opt()
run(opt)