最近在深度学习的入门阶段,再看resnet经典论文的同时不知道逐句代码什么意思,于是几乎逐句注释,(没有注释的部分为重复),如有错误欢迎大家指出纠正!
#model
import torch.nn as nn#导入torch框架中的nn(神经网络包),用于构造自定义模型
import torch#导入pytorch框架
class BasicBlock(nn.Module):#构造基础模块类,继承nn模块中的Module——拓展:torch.nn中还有nn.Linear()线性全连接层,nn.ReLU()relu激活函数层,nn.Conv2d()卷积层
expansion = 1#这是BasicBlock类和Bottleneck类不同之处之一,本类只包含两个3*3的卷积
#out_channel=(in_channel-kernel_size+2*padding)/stride+1
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):#本类的构造函数,其中downsample=None的意思是不进行下采样操作,**kwargs表示不限制参数个数,具有灵活性
super(BasicBlock, self).__init__()#多继承操作,继承父类的构造函数中的所有参数及方法
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=3, stride=stride, padding=1, bias=False)#bias=False表示不设偏置
self.bn1 = nn.BatchNorm2d(out_channel)#数据归一化处理,通常在卷积操作之后,在relu操作之前防止数据过大不好处理
self.relu = nn.ReLU(inplace=True)#激活函数层,inplace=True表示在原向量处做改动
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=1, padding=1, bias=False)#卷积层
self.bn2 = nn.BatchNorm2d(out_channel)#归一化
self.downsample = downsample#下采样,但由于构造函数不执行downsample操作所以此处也不执行
def forward(self, x):#前向传播
identity = x
if self.downsample is not None:
identity = self.downsample(x)#判断是否执行下采样操作,下采样操作是为了使原始输入x的通道数和经过网络后的数据通道数相等,从而能够相加的操作
out = self.conv1(x)#卷积
out = self.bn1(out)#归一化
out = self.relu(out)#激活函数
out = self.conv2(out)#卷积
out = self.bn2(out)#归一化
out += identity#残差块的具体操作
out = self.relu(out)#激活函数
return out#返回输出值
class Bottleneck(nn.Module):#Bottleneck与Basicblock的区别在于Bottleneck有三层包括1*1,3*3,1*1的卷积层,而且expansion的值为4
"""
注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
这么做的好处是能够在top1上提升大概0.5%的准确率。
可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
"""
expansion = 4
def __init__(self, in_channel, out_channel, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=1, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
# -----------------------------------------
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=1, stride=stride, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
# -----------------------------------------
self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,
kernel_size=1, stride=1, bias=False)
self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, blocks_num, num_classes=1000, include_top=True):#include_top=True即包括全连接层,构造一个完整的神经网络
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64
self.conv1 = nn.Conv2d(in_channels=3, out_channels=self.in_channel,
kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, blocks_num[0])#生成神经网络的层函数self._make_layer(),64代表卷积核的数量,步长默认为1,blocks_num[0]表示所含block的数量
self.layer2 = self._make_layer(block, 128, blocks_num[1], 2)#步长为2
self.layer3 = self._make_layer(block, 256, blocks_num[2], 2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], 2)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))#自适应平均池化,生成大小尺寸为1*1
self.fc = nn.Linear(512 * block.expansion, num_classes)#nn.Linear函数中的前一个参数是输入维度,后一个参数是输出值(本模型用于分类,因此输出值为类别数)
for m in self.modules():#m表示参数,表示遍历所有模块中的参数
if isinstance(m, nn.Conv2d):#判断m参数是否在卷积层中
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')#采用nn.init.kaiming_normal_(权重,初始化,激活函数),初始化参数的目的是防止梯度爆炸或消失
def _make_layer(self, block, channel, block_num, stride=1):#卷积核的个数和输出维度一般相等
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(#nn.Sequential()函数用于堆叠各类层,可用于创建一个简单的模型
nn.Conv2d(in_channels=self.in_channel, out_channels=channel * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion)
)
layers = []#创建空列表
layers.append(block(self.in_channel, channel, stride, downsample))#layers.append()用于将模块添加进空列表中
self.in_channel = channel * block.expansion
for _ in range(1, block_num):#循环添加block_num个block
layers.append(block(self.in_channel, channel))
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.maxpool(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
if self.include_top:
out = self.avgpool(out)
# 从第二维开始扁平化
out = torch.flatten(out, 1)#扁平化为一维向量
out = self.fc(out)#全连接
return out
def resnet34(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)#[3,6,9,3]指的是每一层网络所含的残差块的数量
def resnet50(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet50-19c8e357.pth
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
#train
import os#导入操作系统相关的模块
import sys#导入python内置库
import json#导入生成json文件的包
import torch#导入pytorch
from torchvision import datasets, transforms#从torchvision(负责可视化的包),datasets负责直接加载数据集,transforms负责对图片进行各种操作如反转
import torch.nn as nn#导入torch的神经网络模块
import torch.optim as optim#导入优化器
from tqdm import tqdm#导入表示进度条的工具
from model import resnet34
from torch.utils.data import DataLoader#导入数据加载器
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")#确定设备,如果gpu可用则用gpu,否则使用cpu
# device = torch.device("cuda:0")
# tensor_on_cpu =tensor_on_cpu.to(device)
print("using {} device.".format(device))#输出"using cuda:0 device."
# 数据增强
data_transform = {#定义数据处理操作
"train": transforms.Compose([transforms.RandomResizedCrop(224),#训练集裁剪使频道数为224
transforms.RandomHorizontalFlip(),#对图片进行反转,旋转等操作
transforms.ToTensor(),#使图片转换为张量形式
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),#使数据归一化,前一个是均值即原数据应减去的数值,后一个是方差即应除以的数据(这些参数都是在之前机器学习参数训练基础上确定的)
"val": transforms.Compose([transforms.Resize(256),#验证集使频道数为256
transforms.CenterCrop(224),#中心裁剪至224
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}
# 获取当前目录的绝对路径
data_root = os.getcwd()
# 获取数据集路径
data_root='/home/other/alexnet/resnet'
image_path = os.path.join(data_root, 'data_set', 'flower_data')
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)#设置断句,防止程序因为找不到图片所在路径而崩溃
# 数据集加载
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, 'train'),
transform=data_transform['train'])######
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, 'val'),
transform=data_transform['val'])
val_num = len(validate_dataset)#求验证集的大小
train_num = len(train_dataset)#求训练集的大小
# 按文件夹的名字来确定的类别
# train_dataset返回一个字典, {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
# 调换键值
class_dict = dict((v, k) for k, v in flower_list.items())
# 将该字典保存为json文件
# 将json文件格式化,indent设置每个k:v的左缩进格
json_str = json.dumps(class_dict, indent=4)
with open('class_indices.json', 'w') as json_file:#'w'表示写入模式
json_file.write(json_str)
batch_size = 16#批量大小,即迭代一次所用的样本数
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))#nw表示进程并行数,os.cpu_count()表示cpu核心数,数字8表示人为规定的上限
train_loader = DataLoader(dataset=train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)#Dataloaoder表示将数据集分为批次
validate_loader = DataLoader(dataset=validate_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
net = resnet34()
# 加载预训练模型权重
model_weight_path = '/home/other/alexnet/resnet/.idea/resnet34_pre.pth'
assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
net.load_state_dict(torch.load(model_weight_path, map_location='cpu'))#表示加载模型权重至cpu上
net.fc = nn.Linear(net.fc.in_features, 5)#全连接层输入为net.fc.in_features,输出为5个种类
net.to(device)#将net导入到gpu上运行
criterion = nn.CrossEntropyLoss()#交叉熵损失函数常用于分类
optimizer = optim.Adam(net.parameters(), lr=1e-4)#优化器,net.parameters表示模型中含有的参数个数,lr表示学习率
epochs = 3#训练3次
best_acc = 0.#初始化最好准确度为0
save_path = 'resnet34.pth'#将修改好后的参数存入该路径中
for epoch in range(epochs):#从0到epochs-1迭代循环
# train
net.train()#将模型切入到训练模式
train_bar = tqdm(train_loader)
for img, label in train_bar:
logits = net(img.to(device))
label = label.to(device)
print('type(logits)', type(logits), "type(label)", type(label))
loss = criterion(logits, label)
optimizer.zero_grad()#优化器参数每次都需要归零
loss.backward()#反向传播
optimizer.step()#进行优化参数
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)#显示进度条进度
# validate
net.eval()
acc = 0.
test_loss = 0.
with torch.no_grad():#验证集不需要优化器
val_bar = tqdm(validate_loader)
for img, label in val_bar:
logits = net(img.to(device))
label = label.to(device)
loss = criterion(logits, label)
test_loss += loss.item()#计算损失之和
pred = torch.max(logits, dim=1)[1]#预测值,max[1]表示选取认为相似度最高的作为答案
acc += torch.eq(pred, label.to(device)).sum().item()#将结果和真实值作比较,若正确则返回1,将这一批次的结果之和返回
val_bar.desc = "validate epoch[{}/{}]".format(epoch + 1,
epochs)
val_acc = acc / val_num
print('[epoch {}] test_loss: {:.3f} val_accuracy: {:.3f}'.format(
epoch + 1, test_loss / len(validate_loader), val_acc))
if val_acc > best_acc:
best_acc = val_acc
torch.save(net.state_dict(), save_path)
print("finished training!")
if __name__ == '__main__':
main()