注释了一些语法的用处,手搓有助于能力提升
1.model.py
import torch
import torch.nn as nn
class AlexNet(nn.Module):
def __init__(self,num_classes=1000,init_weight=False):
super(AlexNet,self).__init__()
# 用nn.Sequential()将网络打包成一个模块,精简代码
self.features=nn.Sequential(
nn.Conv2d(3,48,kernel_size=11,stride=4,padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.Conv2d(48,128,kernel_size=5,stride=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3,stride=2),
nn.Conv2d(128,192,kernel_size=3,padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(),
nn.ReLU(),
nn.Conv2d(),
nn.ReLU(),
nn.MaxPool2d(),
)
self.classfiter=nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(128*6*6,2048),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048,2048),
nn.ReLU(inplace=True),
nn.Linear(2048,num_classes)
)
if init_weight:
self._initialize_weight()
def forward(self,x):
x=self.features(x),
x=torch.flatten(x,start_dim=1)
x=self.classfiter(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
# 使用 Kaiming 初始化方式初始化 Conv2d 层的权重,使用 'relu' 非线性激活函数
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
# 将 Conv2d 层的偏置设为零
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
# 使用均值为0,标准差为0.01的正态分布初始化 Linear 层的权重
nn.init.normal_(m.weight, 0, 0.01)
# 将 Linear 层的偏置设为零
nn.init.constant_(m.bias, 0)
用nn.Sequential()将网络打包成一个模块,精简代码,这是这个model中值得我们学习的,见得多了,自然以后就都会了
2.train.py
import os # 导入操作系统模块
import sys # 导入系统模块
import json # 导入JSON模块,用于处理JSON数据
import torch # 导入PyTorch深度学习框架
import torch.nn as nn # 导入PyTorch中的神经网络模块
from torchvision import transforms, datasets, utils # 导入PyTorch中的计算机视觉工具
import matplotlib.pyplot as plt # 导入Matplotlib库用于绘图
import numpy as np # 导入NumPy库,用于数值计算
import torch.optim as optim # 导入PyTorch中的优化器模块
from tqdm import tqdm # 导入tqdm库,用于在循环中显示进度条
from model import AlexNet # 导入自定义的AlexNet模型
def main():
# 检查是否可用GPU,如果可用则使用GPU,否则使用CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("正在使用 {} 设备.".format(device))
# 定义数据预处理的转换操作
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)), # 由于使用了AlexNet模型,输入大小必须是(224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
# 获取数据集路径
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # 获取数据根目录
image_path = os.path.join(data_root, "data_set", "flower_data") # 花卉数据集路径
assert os.path.exists(image_path), "{} 路径不存在.".format(image_path)
# 创建训练集数据加载器
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
# 创建类别标签与数字索引的映射
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# 将类别映射写入JSON文件
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
# 设置批处理大小和数据加载器的工作进程数
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # 数据加载器工作进程数
print('每个进程使用 {} 个数据加载器工作线程'.format(nw))
# 创建训练集和验证集的数据加载器
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=4, shuffle=False,
num_workers=nw)
print("使用 {} 张图片进行训练,{} 张图片进行验证.".format(train_num, val_num))
# 创建AlexNet模型实例
net = AlexNet(num_classes=5, init_weights=True)
# 将模型移动到设备(GPU或CPU)
net.to(device)
# 定义损失函数和优化器
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0002)
epochs = 10 # 训练轮数
save_path = './AlexNet.pth' # 模型保存路径
best_acc = 0.0 # 最佳验证集准确率
train_steps = len(train_loader) # 训练集步数
# 开始训练循环
for epoch in range(epochs):
# 训练阶段
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# 打印训练过程的统计信息
running_loss += loss.item()
train_bar.desc = "训练中... 第[{}/{}] 轮,损失: {:.3f}".format(epoch + 1, epochs, loss)
# 验证阶段
net.eval()
acc = 0.0 # 累积每个epoch的准确数目
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
# 计算验证集准确率
val_accurate = acc / val_num
print('[第 %d 轮] 训练损失: %.3f 验证集准确率: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
# 保存最佳模型
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('训练完成')
if __name__ == '__main__':
main()
3 predict.py
import os # 导入操作系统模块
import json # 导入JSON模块,用于处理JSON数据
import torch # 导入PyTorch深度学习框架
from PIL import Image # 导入PIL库,用于图像处理
from torchvision import transforms # 导入PyTorch中的计算机视觉工具
import matplotlib.pyplot as plt # 导入Matplotlib库用于绘图
from model import AlexNet # 导入自定义的AlexNet模型
def main():
# 检查是否可用GPU,如果可用则使用GPU,否则使用CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 定义数据预处理的转换操作
data_transform = transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# 加载待分类的图片
img_path = "../tulip.jpg"
assert os.path.exists(img_path), "文件: '{}' 不存在.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# 扩展为batch维度
img = torch.unsqueeze(img, dim=0)
# 读取类别映射文件
json_path = './class_indices.json'
assert os.path.exists(json_path), "文件: '{}' 不存在.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# 创建AlexNet模型实例
model = AlexNet(num_classes=5).to(device)
# 加载训练好的模型权重
weights_path = "./AlexNet.pth"
assert os.path.exists(weights_path), "文件: '{}' 不存在.".format(weights_path)
model.load_state_dict(torch.load(weights_path))
model.eval()
with torch.no_grad():
# 对图片进行分类预测
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
# 打印预测结果
print_res = "类别: {} 概率: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("类别: {:10} 概率: {:.3}".format(class_indict[str(i)],
predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()