ResNet 实现
目录
- 网络结构
- 介绍一种写网络的便捷方法
- 函数
- 源码
参考:
- 【深度学习】PyTorch Dataset类的使用与实例分析
https://zhuanlan.zhihu.com/p/500839903
- bilibili
ttps://www.bilibili.com/video/BV14E411H7Uw
网络结构
网络结构如下:
resnet浅层结构(18-layer和34-layer)的残差模块(有两个/三个卷积层),三个卷积层对应具有虚线的residual结构,具有升高channel的作用:
resnet深层结构(50-layer和101-layer和152layer)的残差模块(有三个/四个卷积层):
介绍一种写网络的便捷方法
mac上对着网络写代码的便捷方式,我们往往将图片置于顶层再进行写代码,下面介绍两种将图片置于顶层的方法。
-
截图悬浮
使用command + shift + 4
鼠标左键选择你需要截图的区域(鼠标右键是取消截图)。截图后双击打开右下角截图图片,你的图片便是自动悬浮在最顶层。 -
使图片始终浮动在最上方
如果图片并不是截图生成,可以将需要图片放在最顶层后,在点屏幕任何地方的时候按住command
,便不会更改屏幕原有图层顺序。mac显示桌面快捷键command + F3
。
函数
torch.nn.BatchNorm2d类
用于对feature map特征矩阵进行BN操作,我们往往只需要置顶输入BN层的feature map channels特征矩阵的通道数,BN需要训练参数,BN函数原型如下:
torch.nn.BatchNorm2d(num_features,
eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)
torch.Tensor.to()方法
将tensor传到GPU上。
查看图片维度的方法
len(np.shape(image)) == 3 # image是一个from PIL import Image的对象
torch.utils.data.Dataset类
Dataset的作用是:提供一种方式去获取Image和对应的真实Label。
我们将子类继承自Dataset类后一定要重写__init__()
和getitem()
方法来获取每一个index对应的Image和Label。在getitem()
中我们还应该用PIL库打开图片,并使用transforms函数将打开的图片处理后(得转换为torch.Tensor格式)再将Image和Label一块return。
当我们得到一个数据集时,Dataset类可以帮我们提取我们需要的数据,我们用子类继承Dataset类,我们先给每个数据一个编号(idx),在后面的神经网络中,初始化Dataset子类实例后,就可以通过这个编号去实例对象中读取相应的数据,会自动调用__getitem__方法,同时子类对象也会获取相应真实的Label(人为去复写即可)。
卷积和全连接的区别
我们需要注意的是,卷积其实是用作特征提取的,真正的分类作用的是后面的全连接层网络。
python list添加元素
python list添加元素主要有两种实现形式,分别是+ []
和使用append()
函数。但不管使用哪一种方式,都是添加的变量指向的类的实例化对象或者字面量,而不是对象的变量名。
list_a = []
element_a = "b"
# 采用两个列表直接相加形成新列表
list_a += [element_a]
# 使用append()函数
list_a.appent(element_a)
# 添加的是对象的实例化变量
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
layers += [conv2d, nn.ReLU(True)]
# 输出 >>> [Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True)]
*和**
将一个大变量解包成一个一个的小变量。
terminal 操作
mac和linux的terminal操作不同,具体如下:
# 启动虚拟环境
source activate env_cp39_ymz # mac
conda activate env_cp39_ymz # linux
# terminal 清屏
command + k # mac
clear # linux
# termianl 终止进程
control + z # mac
ctrl + c # linux终止进程
我们需要注意的是mac下control + c
是结束键盘输入,control + z
是终止进程。但是mac下的的control + z
在linux下是挂起进程的意思(暂停该进程)。在linux下挂起进程后需要用ps -ef | grep [关键字]
和kill -9 [PID]
杀死进程。
源码
model.py
from turtle import forward
import torch.nn as nn
import torch
class BasicBlock(nn.Module):
"""浅层residual结构"""
expansion = 1
def __init__(self, in_channels, out_channels, stride=1, downsample=None, **kwargs) -> None:
"""网络结构"""
super(BasicBlock, self).__init__()
# self.feature = nn.Sequential(
# nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
# kernel_size=3, stride=stride),
# nn.BatchNorm1d(out_channels),
# nn.ReLU(inplace=True),
# nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
# kernel_size=3, stride=1),
# # xiangjia
# nn.ReLU(inplace=True)
# )
self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
kernel_size=3, stride=stride, padding=1, bias=False) # padding 默认等于 stride,所以我们这里将 padding 单独设置
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU() # ReLU不需要训练参数所以只需要设置一遍便可以
self.conv2 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels,
kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample # 若是虚线,直接将拼接层输入
def forward(self, x):
"""正向传播过程"""
# out = self.conv1(x)
# out = self.bn1(out)
# out = self.relu(out)
# out = self.conv2(out)
# out = self.bn2(out)
# return out + x
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity # 两个tensor中的元素直接相加
out = self.relu(out) # 设置一个ReLU便可以
return out
class Bottleneck(nn.Module):
"""高层残差结构"""
expansion = 4
def __init__(self, in_channels, out_channels, stride=1, downsample=None, **kwargs) -> None:
"""创建模型结构"""
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
kernel_size=1, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True) # 别忘记指定inplace
self.conv2 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels,
kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.conv3 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels * self.expansion,
kernel_size=1, stride=1, padding=1, bias=False)
self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
self.downsample = downsample
def forward(self, x):
"""正向传播"""
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
"""ResNet模型"""
def __init__(self, block, blocks_num, num_classes=1000,
include_top=True, groups=1, width_per_group = 64) -> None:
super(ResNet, self).__init__()
# self.include_top = include_top
self.in_channel = 64
# self.groups = groups
# self.width_per_group = width_per_group
self.conv1 = nn.Conv2d(in_channels=3, out_channels=self.in_channel,
kernel_size=7, stride=2, padding=3, bias=False) # 加入BN层后,在卷积层不需要增加bias
self.bn1 = nn.BatchNorm2d(self.in_channel) # BN层有训练参数
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, blocks_num[0]) # 在__init__中可以直接调用类中方法
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
self.avgpool= nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
# init weight
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def _make_layer(self, block, channel, block_num, stride=1):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layers = []
layers.append(block(self.in_channel,
channel,
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layers.append(block(self.in_channel,
channel,
groups=self.groups,
width_per_group=self.width_per_group))
return nn.Sequential(*layers) # 要传入Sequential得将list转为单个元素(对象)传入
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def resnet34(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
train.py
import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from model import vgg
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
model_name = "vgg16"
net = vgg(model_name=model_name, num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)
epochs = 30
best_acc = 0.0
save_path = './{}Net.pth'.format(model_name)
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
predict.py
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import resnet34
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
# load image
img_path = "../tulip.jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# create model
model = resnet34(num_classes=5).to(device)
# load model weights
weights_path = "./resNet34.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path, map_location=device))
# prediction
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()