VGG-16的原理图,详细介绍 这里免去。关注与模型实现,后面有空会慢慢完善内容
依赖库
import paddle
from paddle.vision.transforms import Compose, ColorJitter, Resize,Transpose, Normalize
import cv2
import numpy as np
from PIL import Image
直接上paddle代码
# 分为两部分:网络块的定义、网络传播过程的定义
网络块的定义:
# 定义网络块
class ConvPool(paddle.nn.Layer):
'''卷积+池化'''
def __init__(self,
num_channels,#1
num_filters, #2
filter_size,#3
pool_size,#4
pool_stride,#5
groups,#6
conv_stride=1,
conv_padding=1,
):
super(ConvPool, self).__init__()
self._conv2d_list = []
for i in range(groups):
conv2d = self.add_sublayer( #添加子层实例
'bb_%d' % i,
paddle.nn.Conv2D( # layer
in_channels=num_channels, #通道数
out_channels=num_filters, #卷积核个数
kernel_size=filter_size, #卷积核大小
stride=conv_stride, #步长
padding = conv_padding, #padding
)
)
num_channels = num_filters
self._conv2d_list.append(conv2d)
self._pool2d = paddle.nn.MaxPool2D(
kernel_size=pool_size, #池化核大小
stride=pool_stride #池化步长
)
# print(self._conv2d_list)
def forward(self, inputs):
x = inputs
for conv in self._conv2d_list:
x = conv(x)
x = paddle.nn.functional.relu(x)
x = self._pool2d(x)
return x
网络传播过程架构
# 定义VGG网络结构
class VGGNet(paddle.nn.Layer):
def __init__(self):
super(VGGNet, self).__init__()
self.convpool01 = ConvPool(3,64,3,2,2,1)
self.convpool02 = ConvPool(64,128,3,2,2,2)
self.convpool03 = ConvPool(128,256,3,2,2,4)
self.convpool04 = ConvPool(256,512,3,2,2,3)
self.convpool05 = ConvPool(512,512,3,2,2,3)
self.pool_5_shape = 512*7*7
self.fc01 = paddle.nn.Linear(self.pool_5_shape,4096)
self.fc02 = paddle.nn.Linear(4096,4096)
# self.fc03 = paddle.nn.Linear(4096,train_parameters['class_dim'])
self.fc03 = paddle.nn.Linear(4096,20)
def forward(self, inputs, label=None):
out = self.convpool01(inputs)
out = self.convpool02(out)
out = self.convpool03(out)
out = self.convpool04(out)
out = self.convpool05(out)
out = paddle.reshape(out,shape=[-1,512*7*7])
out = self.fc01(out)
out = self.fc02(out)
out = self.fc03(out)
if label is not None:
acc = paddle.metric.accuracy(input=out,label=label)
return out,acc
else:
return out
网络具体结构可用API查看:查看结构
model = paddle.Model(VGGNet())
model.summary((1, 3, 224, 224))
---------------------------------------------------------------------------
Layer (type) Input Shape Output Shape Param #
===========================================================================
Conv2D-1 [[1, 3, 224, 224]] [1, 64, 224, 224] 1,792
MaxPool2D-1 [[1, 64, 224, 224]] [1, 64, 112, 112] 0
ConvPool-1 [[1, 3, 224, 224]] [1, 64, 112, 112] 0
Conv2D-2 [[1, 64, 112, 112]] [1, 128, 112, 112] 73,856
Conv2D-3 [[1, 128, 112, 112]] [1, 128, 112, 112] 147,584
MaxPool2D-2 [[1, 128, 112, 112]] [1, 128, 56, 56] 0
ConvPool-2 [[1, 64, 112, 112]] [1, 128, 56, 56] 0
Conv2D-4 [[1, 128, 56, 56]] [1, 256, 56, 56] 295,168
Conv2D-5 [[1, 256, 56, 56]] [1, 256, 56, 56] 590,080
Conv2D-6 [[1, 256, 56, 56]] [1, 256, 56, 56] 590,080
Conv2D-7 [[1, 256, 56, 56]] [1, 256, 56, 56] 590,080
MaxPool2D-3 [[1, 256, 56, 56]] [1, 256, 28, 28] 0
ConvPool-3 [[1, 128, 56, 56]] [1, 256, 28, 28] 0
Conv2D-8 [[1, 256, 28, 28]] [1, 512, 28, 28] 1,180,160
Conv2D-9 [[1, 512, 28, 28]] [1, 512, 28, 28] 2,359,808
Conv2D-10 [[1, 512, 28, 28]] [1, 512, 28, 28] 2,359,808
MaxPool2D-4 [[1, 512, 28, 28]] [1, 512, 14, 14] 0
ConvPool-4 [[1, 256, 28, 28]] [1, 512, 14, 14] 0
Conv2D-11 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,359,808
Conv2D-12 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,359,808
Conv2D-13 [[1, 512, 14, 14]] [1, 512, 14, 14] 2,359,808
MaxPool2D-5 [[1, 512, 14, 14]] [1, 512, 7, 7] 0
ConvPool-5 [[1, 512, 14, 14]] [1, 512, 7, 7] 0
Linear-1 [[1, 25088]] [1, 4096] 102,764,544
Linear-2 [[1, 4096]] [1, 4096] 16,781,312
Linear-3 [[1, 4096]] [1, 20] 81,940
===========================================================================
Total params: 134,895,636
Trainable params: 134,895,636
Non-trainable params: 0
---------------------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 108.40
Params size (MB): 514.59
Estimated Total Size (MB): 623.56
---------------------------------------------------------------------------
Process finished with exit code 0
认真看 他们的全连接层
Linear-1 [[1, 25088]] [1, 4096] 102,764,544
Linear-2 [[1, 4096]] [1, 4096] 16,781,312
Linear-3 [[1, 4096]] [1, 20] 81,940
线性层的部分其实就是全连接的过程,输入输出数据之间通过全连接进行计算。参数量计算具体如下:
模型训练
#定义输入
input_define = paddle.static.InputSpec(shape=[-1,3,224,224], dtype="float32", name="img")
label_define = paddle.static.InputSpec(shape=[-1,1], dtype="int64", name="label")
#实例化网络对象并定义优化器等训练逻辑
model = VGGNet()
model = paddle.Model(model,inputs=input_define,labels=label_define) #用Paddle.Model()对模型进行封装
optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters())
#上述优化器中的学习率(learning_rate)参数很重要。要是训练过程中得到的准确率呈震荡状态,忽大忽小,可以试试进一步把学习率调低。
model.prepare(optimizer=optimizer, #指定优化器
loss=paddle.nn.CrossEntropyLoss(), #指定损失函数
metrics=paddle.metric.Accuracy()) #指定评估方法
model.fit(train_data=train_dataset, #训练数据集
eval_data=eval_dataset, #测试数据集
batch_size=64, #一个批次的样本数量
epochs=30, #迭代轮次
save_dir="/home/aistudio/lup", #把模型参数、优化器参数保存至自定义的文件夹
save_freq=20, #设定每隔多少个epoch保存模型参数及优化器参数
log_freq=100 #打印日志的频率
)