- 🍨 本文为🔗365天深度学习训练营 中的学习记录博客
- 🍖 原作者:K同学啊 | 接辅导、项目定制
J1 resnet50
前言
前面对基础有了详细的了解,已经学会神经网络训练流程,这周对深入网络resnet进行学习。
resnet简介
ResNet-50(Residual Network with 50 layers)是一种深度卷积神经网络,由Microsoft Research提出。它是ResNet系列的一部分,具有50个层次,因此得名ResNet-50。ResNet-50是一种非常深的神经网络,通过使用残差块(Residual Blocks)解决了深度网络训练中的梯度消失问题,使得训练更加容易。
深度网络结构: ResNet-50包含50个层次,其中包括一些残差块。这使得网络可以学习非常复杂的特征和表示。
残差块(Residual Block): ResNet的核心是残差学习。在传统的卷积神经网络中,添加更多层次可能导致性能下降,因为梯度逐渐消失。而残差块通过引入跳跃连接(shortcut connections)允许信息直接在层次之间流通,从而避免了梯度消失问题。
平均池化: 在网络的最后,平均池化层用于将整个特征图的空间维度降为1x1,然后连接到最终的全连接层。这有助于减少参数数量,并为网络提供整体的特征。
预训练权重: ResNet-50通常使用在ImageNet数据集上预训练的权重。这些权重可以通过使用weights='imagenet’参数加载。
代码实现
tensorflow实现
from tensorflow.keras.layers import Dropout
import tensorflow as tf
import glob
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,ZeroPadding2D,AveragePooling2D
from tensorflow.keras.layers import Activation,BatchNormalization,Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
import tensorflow.keras.backend as K
#from tensorflow.keras.utils.data_utils import get_file
from tensorflow.keras.applications.imagenet_utils import decode_predictions
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from tensorflow.keras import layers
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
# 64,64,256
filters1, filters2, filters3 = filters
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
# 降维
x = Conv2D(filters1, (1, 1), strides=strides,
name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(name=bn_name_base + '2a')(x)
x = Dropout(0.2)(x) # dropout
x = Activation('relu')(x)
# 3x3卷积
x = Conv2D(filters2, kernel_size, padding='same',
name=conv_name_base + '2b')(x)
x = BatchNormalization(name=bn_name_base + '2b')(x)
# dropout
x = Dropout(0.2)(x)
x = Activation('relu')(x)
# 升维
x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
x = BatchNormalization(name=bn_name_base + '2c')(x)
x = Dropout(0.2)(x) # dropout
# 残差边
shortcut = Conv2D(filters3, (1, 1), strides=strides, name=conv_name_base + '1')(input_tensor) # 将input_tensor转换为对应维度(w x h x c)
shortcut = BatchNormalization(name=bn_name_base + '1')(shortcut)
x = layers.add([x, shortcut])
x = Activation('relu')(x)
return x
def identity_block(input_tensor, kernel_size, filters, stage, block):
[filters1, filters2, filters3] = filters
# filters = [512, 512, 1024]
# filters1 = 512, filters2 = 512, filters3 = 1024
# print(filters1)
# print(filters)
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
# 降维
x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(name=bn_name_base + '2a')(x)
x = Dropout(0.2)(x) # dropout
x = Activation('relu')(x)
# 3x3卷积
x = Conv2D(filters2, kernel_size,padding='same', name=conv_name_base + '2b')(x)
x = Dropout(0.2)(x) # dropout
x = BatchNormalization(name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
# 升维
x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
x = BatchNormalization(name=bn_name_base + '2c')(x)
x = Dropout(0.2)(x) # dropout
x = layers.add([x, input_tensor])
x = Activation('relu')(x)
return x
# resnet18: ResNet(BasicBlock, [2, 2, 2, 2])
# resnet34: ResNet(BasicBlock, [3, 4, 6, 3])
# resnet50:ResNet(Bottleneck, [3, 4, 6, 3])
# resnet101:ResNet(Bottleneck, [3, 4, 23, 3])
# resnet152:ResNet(Bottleneck, [3, 8, 36, 3])
def ResNet50(input_shape=[224,224,3],classes=1000):
# [224,224,3]
img_input = tf.keras.layers.Input(shape=input_shape)
x = ZeroPadding2D((3, 3))(img_input) # [230,230,3]
# [112,112,64]
x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) #[112,112,64]
x = BatchNormalization(name='bn_conv1')(x)
x = Activation('relu')(x)
# [56,56,64]
x = MaxPooling2D((3, 3), strides=(2, 2))(x)
# [56,56,256]
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
# [28,28,512]
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
# [14,14,1024]
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
# [7,7,2048]
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
# 代替全连接层
x = AveragePooling2D((7, 7), name='avg_pool')(x)
# 进行预测
x = Flatten()(x)
x = Dense(classes, activation='softmax', name='fc1000')(x)
model = Model(img_input, x, name='resnet50')
model.load_weights("/content/drive/MyDrive/My_files/365_program/tensorflow/resnet50_weights_tf_dim_ordering_tf_kernels.h5")
return model
Model: “model_1”
Layer (type) Output Shape Param # Connected to
input_10 (InputLayer) [(None, 224, 224, 3)] 0 []
conv1_pad (ZeroPadding2D) (None, 230, 230, 3) 0 [‘input_10[0][0]’]
conv1_conv (Conv2D) (None, 112, 112, 64) 9472 [‘conv1_pad[0][0]’]
conv1_bn (BatchNormalizati (None, 112, 112, 64) 256 [‘conv1_conv[0][0]’]
on)
conv1_relu (Activation) (None, 112, 112, 64) 0 [‘conv1_bn[0][0]’]
pool1_pad (ZeroPadding2D) (None, 114, 114, 64) 0 [‘conv1_relu[0][0]’]
pool1_pool (MaxPooling2D) (None, 56, 56, 64) 0 [‘pool1_pad[0][0]’]
conv2_block1_1_conv (Conv2 (None, 56, 56, 64) 4160 [‘pool1_pool[0][0]’]
D)
conv2_block1_1_bn (BatchNo (None, 56, 56, 64) 256 [‘conv2_block1_1_conv[0][0]’]
rmalization)
…………………………
conv5_block3_2_bn (BatchNo (None, 7, 7, 512) 2048 [‘conv5_block3_2_conv[0][0]’]
rmalization)
conv5_block3_2_relu (Activ (None, 7, 7, 512) 0 [‘conv5_block3_2_bn[0][0]’]
ation)
conv5_block3_3_conv (Conv2 (None, 7, 7, 2048) 1050624 [‘conv5_block3_2_relu[0][0]’]
D)
conv5_block3_3_bn (BatchNo (None, 7, 7, 2048) 8192 [‘conv5_block3_3_conv[0][0]’]
rmalization)
conv5_block3_add (Add) (None, 7, 7, 2048) 0 [‘conv5_block2_out[0][0]’,
‘conv5_block3_3_bn[0][0]’]
conv5_block3_out (Activati (None, 7, 7, 2048) 0 [‘conv5_block3_add[0][0]’]
on)
global_average_pooling2d_1 (None, 2048) 0 [‘conv5_block3_out[0][0]’]
(GlobalAveragePooling2D)
dense_2 (Dense) (None, 256) 524544 [‘global_average_pooling2d_1[0
][0]’]
dense_3 (Dense) (None, 4) 1028 [‘dense_2[0][0]’]
==================================================================================================
Total params: 24113284 (91.98 MB)
Trainable params: 525572 (2.00 MB)
Non-trainable params: 23587712 (89.98 MB)
pytorch实现
import torch.nn as nn
import torch
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
"""
注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
这么做的好处是能够在top1上提升大概0.5%的准确率。
可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
"""
expansion = 4
def __init__(self, in_channel, out_channel, stride=1, downsample=None,
groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
kernel_size=1, stride=1, bias=False) # squeeze channels
self.bn1 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
kernel_size=1, stride=1, bias=False) # unsqueeze channels
self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,
block,
blocks_num,
num_classes=1000,
include_top=True,
groups=1,
width_per_group=64):
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64
self.groups = groups
self.width_per_group = width_per_group
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, blocks_num[0])
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def _make_layer(self, block, channel, block_num, stride=1):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layers = []
layers.append(block(self.in_channel,
channel,
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layers.append(block(self.in_channel,
channel,
groups=self.groups,
width_per_group=self.width_per_group))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def resnet34(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet50(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet50-19c8e357.pth
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
def resnext50_32x4d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
def resnext101_32x8d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth
groups = 32
width_per_group = 8
return ResNet(Bottleneck, [3, 4, 23, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
对鸟类数据集分类
一共四种鸟类。
训练结果
总结
可以看到深度网络分类的结果很高,通过加深网络结构使神经网络的学习能力增强,这里使用了预训练数据即迁移学习的思想,对训练好的参数在利用,通过这次实验对resnet50有了详细了解。