参照的是B站UP的一个视频,利用他的代码,写了点笔记:
原视频
一.model.py 搭建网络
我把model.py分成四个部分:
1.定义残差结构 适用于18、34层的
# 定义残差结构 适用于18、34层的
class BasicBlock(nn.Module):
expansion = 1 # 卷积核个数的倍数变化,在18、34层的resnet中,没有倍数变化
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
"""
in_channel:输入特征矩阵的深度
out_channel:输出特征矩阵的深度
stride:步长
downsample:下采样参数(虚线残差结构相关)
下采样实际上就是缩小图像,主要目的是为了使得图像符合显示区域的大小,生成对应图像的缩略图
下采样的过程是一个信息损失的过程
在CNN中的池化层或卷积层就是下采样。不过卷积过程导致的图像变小是为了提取特征,而池化下采样是为了降低特征的维度。
下采样层有两个作用:
一是减少计算量,防止过拟合;
二是增大感受野,使得后面的卷积核能够学到更加全局的信息。
"""
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_channels=in_channel,
out_channels=out_channel,
kernel_size=3, # 卷积核大小
stride=stride, #output=(input-3+2*1)/1+1=input 没有变化
padding=1, #当步长为2时output=(input-3+2*1)/2+1=input/2+0.5=input/2(向下取整)
bias=False) #使用BN的时候,不使用偏置
# 关于BN的使用:(将每个batch的数据规范化为统一的分布,帮助网络训练, 对输入数据做规范化)
# 1.使用BN的同时,卷积中的参数bias置为False
# 2.BN层放在conv层和relu层中间
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(
in_channels=out_channel, #这里是self.conv1输出的深度,也就是out_channel
out_channels=out_channel,
kernel_size=3,
stride=1,
padding=1,
bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample
def forward(self, x):
"""
18、34层:定义正向传播的过程
"""
identity = x #捷径上的输出值
if self.downsample is not None: # 如果没有输入下采样函数,则对应的是实线的残差结构
identity = self.downsample(x) # 如果输入了留下采样函数,则得到分支的输出
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out #得到残差结构的最终输出
2.定义残差结构 适用于50、101、152层的
# 定义残差结构 适用于50、101、152层的
class Bottleneck(nn.Module):
"""
注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
这么做的好处是能够在top1上提升大概0.5%的准确率。
可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
"""
expansion = 4 #对应残差结构所使用卷积核大小的变化
def __init__(self, in_channel, out_channel, stride=1, downsample=None, groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width, kernel_size=1, stride=1,
bias=False) # squeeze channels
self.bn1 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv2 = nn.Conv2d(
in_channels=width,
out_channels=width,
groups=groups,
kernel_size=3,
stride=stride, # 这里是传入的参数
bias=False,
padding=1)
self.bn2 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv3 = nn.Conv2d(
in_channels=width,
out_channels=out_channel * self.expansion, #输出深度
kernel_size=1,
stride=1,
bias=False) # unsqueeze channels
self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
3.整个网络的框架部分
# 整个网络的框架部分
class ResNet(nn.Module):
def __init__(self, block, blocks_num, num_classes=1000, include_top=True, groups=1, width_per_group=64):
"""
block:残差结构18层、34层。。。。
blocks_num:所使用残差结构的数目的列表
num_classes:训练集的分类个数
include_top:方便搭建更复杂的网络,本节课用不到
groups:
width_per_group:
"""
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64 #输入特征矩阵的深度,即通过maxpooling之后的特征矩阵的深度,无论是多少层,都是64
self.groups = groups
self.width_per_group = width_per_group
# 对应的是7x7,64的卷积层,为了让输出特征矩阵的高和宽缩减为原来的一半,设置padding为3
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
# 定义的是3x3的maxpool,为了让输出特征矩阵的高和宽缩减为原来的一半,设置padding为1
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# conv2
self.layer1 = self._make_layer(block, 64, blocks_num[0])
# conv3
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
# conv4
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
# conv5
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
# 经历过自适应平均池化下采样得到的特征矩阵的高和宽都是1,1
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
# 深度是512,全连接层
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') #对卷积层进行一个kaiming_normal初始化操作
def _make_layer(self, block, channel, block_num, stride=1):
"""
生成conv层
block:18、34层的BasicBlock,50、101、152层的Bottleneck
channel:残差结构中卷积核的个数(对于18层和34层的,卷积核个数都一样;对于50、101、152的残差结构,第一层:第三层=1:4
这里channel对应的是第一层的卷积核个数)
block_num:该层一共包含了多少个残差结构
stride:步长
"""
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion: #第一个残差结构中只有50、101、152进入
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layers = []
# 在这个append中,18和34传入的是实线残差结构;50+的传入的是虚线残差结构
layers.append(
block(self.in_channel, # 输入特征矩阵的深度
channel, # 第一个卷积层的卷积核
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
# 从第二层开始,加入剩下的实线残差结构
for _ in range(1, block_num):
layers.append(block(self.in_channel, channel, groups=self.groups, width_per_group=self.width_per_group))
# *加上list或者tuple,可以把这个数组或者元组转换为非关键字参数
# nn.Sequential()按照内部模块的顺序自动依次计算并输出结果。
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.include_top:
x = self.avgpool(x) # 平均池化下采样
x = torch.flatten(x, 1) #展平处理
x = self.fc(x) #全连接
return x
4.定义网络的函数:
def resnet34(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet50(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet50-19c8e357.pth
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
def resnext50_32x4d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
def resnext101_32x8d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth
groups = 32
width_per_group = 8
return ResNet(Bottleneck, [3, 4, 23, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
二.split_data.py 处理图片
import os
from shutil import copy, rmtree
import random
def mk_file(file_path: str):
if os.path.exists(file_path):
# 如果文件夹存在,则先删除原文件夹在重新创建
rmtree(file_path)
os.makedirs(file_path)
def main():
# 保证随机可复现
random.seed(0)
# 将数据集中10%的数据划分到验证集中
split_rate = 0.1
# 指向你解压后的flower_photos文件夹
cwd = os.getcwd()
data_root = os.path.join(cwd, "flower_data")
origin_flower_path = os.path.join(data_root, "flower_photos")
assert os.path.exists(origin_flower_path), "path '{}' does not exist.".format(origin_flower_path)
flower_class = [cla for cla in os.listdir(origin_flower_path)
if os.path.isdir(os.path.join(origin_flower_path, cla))]
# 建立保存训练集的文件夹
train_root = os.path.join(data_root, "train")
mk_file(train_root)
for cla in flower_class:
# 建立每个类别对应的文件夹
mk_file(os.path.join(train_root, cla))
# 建立保存验证集的文件夹
val_root = os.path.join(data_root, "val")
mk_file(val_root)
for cla in flower_class:
# 建立每个类别对应的文件夹
mk_file(os.path.join(val_root, cla))
for cla in flower_class:
cla_path = os.path.join(origin_flower_path, cla)
images = os.listdir(cla_path)
num = len(images)
# 随机采样验证集的索引
eval_index = random.sample(images, k=int(num*split_rate))
for index, image in enumerate(images):
if image in eval_index:
# 将分配至验证集中的文件复制到相应目录
image_path = os.path.join(cla_path, image)
new_path = os.path.join(val_root, cla)
copy(image_path, new_path)
else:
# 将分配至训练集中的文件复制到相应目录
image_path = os.path.join(cla_path, image)
new_path = os.path.join(train_root, cla)
copy(image_path, new_path)
print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="") # processing bar
print()
print("processing done!")
if __name__ == '__main__':
main()
三.train.py训练模型
import os
import sys
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from tqdm import tqdm
import torchvision.models.resnet
from model import resnet34
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train":
transforms.Compose([ # 用Compose把多个步骤整合到一起
transforms.RandomResizedCrop(224), # 将原图片随机裁剪出一块,再缩放成相应 (224*224) 的比例
transforms.RandomHorizontalFlip(), #以给定的概率随机水平旋转给定的PIL的图像,默认为0.5;
transforms.ToTensor(), #将给定图像转为Tensor 其作用是将数据归一化到[0,1] 会把HWC会变成C *H *W(拓展:格式为(h,w,c),像素顺序为RGB)
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) #归一化处理 这组数据是从imagenet训练集中抽样算出来的
]),
# Normalize:将数据按通道进行计算,将每一个通道的数据先计算出其方差与均值,然后再将其每一个通道内的每一个数据减去均值,
# 再除以方差,得到归一化后的结果。
"val":
transforms.Compose([
transforms.Resize(256), # 将图片最小边缩放到指定长度
transforms.CenterCrop(224), #裁减成一个224*224的图片
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
}
data_root = os.path.abspath(os.path.join(os.getcwd(), "./")) # get data root path 获取文件根目录
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
# datasets.ImageFolder从上面可以得到的信息:
# 1)每个类别需要单独成立一个文件夹
# 2)每个类别里面的图片需要按顺序排datasets.ImageFolder列(无论使用英语还是数字)
# 参数:
# root:图片存储的根目录,即各类别文件夹所在目录的上一级目录。
# transform:对图片进行预处理的操作(函数),原始图片作为输入,返回一个转换后的图片。
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"])
train_num = len(train_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx # 获取分类的名称对应的索引
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 16
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
num_workers=0) #num_workers=nw?
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
print("using {} images for training, {} images for validation.".format(train_num, val_num))
net = resnet34()
# load pretrain weights 载入预训练模型
# download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
model_weight_path = "./resnet34-pre.pth"
assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
net.load_state_dict(torch.load(model_weight_path, map_location='cpu'))
# for param in net.parameters():
# param.requires_grad = False
# change fc layer structure
in_channel = net.fc.in_features
net.fc = nn.Linear(in_channel, 5)
net.to(device)
# define loss function
loss_function = nn.CrossEntropyLoss()
# construct an optimizer
params = [p for p in net.parameters() if p.requires_grad]
optimizer = optim.Adam(params, lr=0.0001) # lr学习率自己调试
epochs = 3
best_acc = 0.0 # 最佳准确率
save_path = './resNet34.pth' # 保存权重的路径
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train() # 用net.train()和net.eval()来管理dropout方法(随机使部分神经元失活),这里启动dropout方法
running_loss = 0.0 #统计在训练过程中的随机损失
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
logits = net(images.to(device))
loss = loss_function(logits, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss)
# validate
net.eval() # 这里关闭掉dropout方法
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
# loss = loss_function(outputs, test_labels)
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item() #统计预测正确的样本个数
val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs)
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc: # 如果当前正确率更高,就更新参数
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
四.predict.py预测:
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import resnet34
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# load image
img_path = "./data_set/tulip.jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# create model
model = resnet34(num_classes=5).to(device)
# load model weights 载入模型
weights_path = "./resNet34.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path, map_location=device))
# prediction
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu() # 图片输入模型,并压缩batch维度
predict = torch.softmax(output, dim=0) # 得到概率分布
predict_cla = torch.argmax(predict).numpy() #得到最大值所对应的索引
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()