Pytorch:深度神经网络搭建与训练,模型保存与复用
Copyright: Jingmin Wei, Pattern Recognition and Intelligent System, School of Artificial and Intelligence, Huazhong University of Science and Technology
本教程不商用,仅供学习和参考交流使用,如需转载,请联系本人。
nn.Module 库定义的网络层会自动学习参数,比如 Fully-connected, Convolution, Pooling 层等等,适合于定义需要学习参数的层。
nn.functional 库定义的网络层不会自动学习参数,使用针对于 Activation, Batch Normalization 层,而且需要用nn.Parameter 封装,可用于定义不需要学习参数的层。
nn.Sequential() 使用时可以封装多个网络层,而不需要显式定义前向传播路径,常用于网络快速搭建。
forward() 函数实际上是 net.__call__(data) 将实例 net 变成了可调用对象 net(data) ,而在 net.__call__(data) 中主要调用的就是我们自己定义的 forward() 函数。
以全连接神经网络为例,介绍定义网络的过程,将使用到 nn.Module 和 nn.Sequential 两种不同的网络定义方式。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from torch import optim
import torch.utils.data as Data
from torchsummary import summary
数据准备
和之前数据预处理的内容类似
boston_X, boston_y = load_boston(return_X_y = True)
print(boston_X)
print(boston_X.shape)
[[6.3200e-03 1.8000e+01 2.3100e+00 ... 1.5300e+01 3.9690e+02 4.9800e+00]
[2.7310e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9690e+02 9.1400e+00]
[2.7290e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9283e+02 4.0300e+00]
...
[6.0760e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 5.6400e+00]
[1.0959e-01 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9345e+02 6.4800e+00]
[4.7410e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 7.8800e+00]]
(506, 13)
# 对数据集的因变量进行可视化
plt.figure()
plt.hist(boston_y, bins=20)
plt.show()
# 使用StandardScaler()对自变量进行标准化处理
ss = StandardScaler(with_mean=True, with_std=True)
boston_Xs = ss.fit_transform(boston_X)
# 将数据转为张量格式
X_train = torch.from_numpy(boston_Xs.astype(np.float32))
y_train = torch.from_numpy(boston_y.astype(np.float32))
# 使用TensorDataset将X和y整理到一起
data = Data.TensorDataset(X_train, y_train)
# 定义一个数据加载器
train_loader = Data.DataLoader(
dataset = data,
batch_size = 128, # 每个batch大小是128
shuffle = True, # 随机打乱数据集
num_workers = 1 # 1个进程
)
网络定义与训练方式1
class MLPmodel(nn.Module):
def __init__(self):
super(MLPmodel, self).__init__()
# 定义第一个隐藏层,13-10,存在偏置
self.hidden1 = nn.Linear(in_features=13, out_features=10, bias=True)
self.active = nn.ReLU()
# 定义第一个隐藏层,10-10
self.hidden2 = nn.Linear(10, 10)
# 定义预测回归层
self.regreesion = nn.Linear(10, 1)
def forward(self, x):
x = self.hidden1(x)
x = self.active(x)
x = self.hidden2(x)
x = self.active(x)
output = self.regreesion(x)
return output
mlp1 = MLPmodel()
summary(mlp1, input_size=(506, 13))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Linear-1 [-1, 506, 10] 140
ReLU-2 [-1, 506, 10] 0
Linear-3 [-1, 506, 10] 110
ReLU-4 [-1, 506, 10] 0
Linear-5 [-1, 506, 1] 11
================================================================
Total params: 261
Trainable params: 261
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.03
Forward/backward pass size (MB): 0.16
Params size (MB): 0.00
Estimated Total Size (MB): 0.18
----------------------------------------------------------------
# 定义损失函数和优化器
# 优化器为随机梯度下降,学习率为0.001
optimizer = optim.SGD(mlp1.parameters(), lr=0.001)
loss_func = nn.MSELoss() # 损失函数为最小均方误差
train_loss_all = [] # 输出每个批次训练的损失函数
# 训练框架
for epoch in range(30):
for step, (b_x, b_y) in enumerate(train_loader):
output = mlp1(b_x).flatten() # 当前迭代的预测值
train_loss = loss_func(output, b_y) # 根据验证集和预测值计算损失
optimizer.zero_grad() # 清空梯度
train_loss.backward() # 梯度损失反向传播
optimizer.step() # 根据梯度更新权重w
train_loss_all.append(train_loss.item())
# train_loss_all可视化处理
plt.figure()
plt.plot(train_loss_all, 'r-')
plt.title('Train loss per iteration')
plt.show()
经过30个epoch和120次左右的迭代计算,网络达到了梯度最小。
网络定义与训练方式2
使用Sequential进行封装
class MLPmodel2(nn.Module):
def __init__(self):
super(MLPmodel2, self).__init__()
self.hidden = nn.Sequential(nn.Linear(in_features=13, out_features=10, bias=True),
nn.ReLU(),
nn.Linear(10, 10)
)
# 定义预测回归层
self.regreesion = nn.Linear(10, 1)
def forward(self, x):
x = self.hidden(x)
output = self.regreesion(x)
return output
mlp2 = MLPmodel2()
summary(mlp2, input_size=(506, 13))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Linear-1 [-1, 506, 10] 140
ReLU-2 [-1, 506, 10] 0
Linear-3 [-1, 506, 10] 110
Linear-4 [-1, 506, 1] 11
================================================================
Total params: 261
Trainable params: 261
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.03
Forward/backward pass size (MB): 0.12
Params size (MB): 0.00
Estimated Total Size (MB): 0.15
----------------------------------------------------------------
# 定义损失函数和优化器
# 优化器为随机梯度下降,学习率为0.001
optimizer = optim.SGD(mlp2.parameters(), lr=0.001)
loss_func = nn.MSELoss() # 损失函数为最小均方误差
train_loss_all = [] # 输出每个批次训练的损失函数
# 训练框架
for epoch in range(30):
for step, (b_x, b_y) in enumerate(train_loader):
output = mlp2(b_x).flatten() # 当前迭代的预测值
train_loss = loss_func(output, b_y) # 根据验证集和预测值计算损失
optimizer.zero_grad() # 清空梯度
train_loss.backward() # 梯度损失反向传播
optimizer.step() # 根据梯度更新权重w
train_loss_all.append(train_loss.item())
# train_loss_all可视化处理
plt.figure()
plt.plot(train_loss_all, 'r-')
plt.title('Train loss per iteration')
plt.show()
经过30个epoch和120次左右的迭代计算,网络达到了梯度最小。
模型保存和加载方法
保存整个模型
torch.save(mlp1, './model/mlp1.pkl')
torch.save(mlp2, './model/mlp2.pkl')
# 导入保存的模型
mlp1load = torch.load('./model/mlp2.pkl')
mlp1load
MLPmodel2(
(hidden): Sequential(
(0): Linear(in_features=13, out_features=10, bias=True)
(1): ReLU()
(2): Linear(in_features=10, out_features=10, bias=True)
)
(regreesion): Linear(in_features=10, out_features=1, bias=True)
)
# 导入保存的模型
mlp2load = torch.load('./model/mlp2.pkl')
mlp2load
MLPmodel2(
(hidden): Sequential(
(0): Linear(in_features=13, out_features=10, bias=True)
(1): ReLU()
(2): Linear(in_features=10, out_features=10, bias=True)
)
(regreesion): Linear(in_features=10, out_features=1, bias=True)
)
只保存模型的参数
torch.save(mlp1.state_dict(), './model/mlp1_param.pkl')
torch.save(mlp2.state_dict(), './model/mlp2_param.pkl')
# 导入保存的模型
mlp1param = torch.load('./model/mlp1_param.pkl')
mlp1param
OrderedDict([('hidden1.weight',
tensor([[-2.5801e-02, 3.2484e-02, -1.8182e-01, 2.2075e-01, 1.3536e-02,
4.1140e-01, 2.0664e-01, -3.1524e-01, 1.6168e-01, -3.7853e-01,
-6.4648e-02, 6.6774e-02, -5.8944e-01],
[-2.3007e-01, 1.1350e-01, -1.4941e-01, -1.0846e-01, -1.2126e-01,
2.0522e-01, 9.8839e-02, -1.4642e-02, -2.5171e-03, 2.4971e-01,
2.5216e-01, -2.5383e-03, -1.6766e-02],
[ 1.2099e-01, 1.9879e-01, -5.2713e-02, -6.1219e-02, -2.4022e-01,
-1.0958e-02, -3.2612e-01, -9.3556e-02, -1.1896e-01, -2.5612e-01,
-2.5323e-01, -1.6342e-01, 9.9980e-02],
[-1.2623e-01, -1.2110e-01, -1.7218e-01, 1.7832e-01, -2.5239e-01,
5.1809e-01, 1.9213e-01, 1.3833e-01, 5.5852e-02, -1.1326e-01,
-2.2920e-01, 1.4154e-01, -1.3036e-01],
[-2.7367e-01, 1.2442e-01, 1.1439e-01, -4.4984e-02, 2.4807e-02,
-2.8409e-01, 2.0971e-01, -2.7090e-01, 2.5916e-01, 3.1423e-01,
7.2872e-02, -1.6399e-01, -1.4795e-01],
[ 4.1014e-02, 8.0610e-02, 1.6082e-01, -1.8276e-01, 2.0158e-01,
4.9238e-02, -1.9620e-01, 1.2445e-01, 1.9717e-01, 7.0750e-02,
-1.7211e-01, 7.1605e-04, 1.0114e-01],
[-3.2713e-01, -5.5323e-02, -4.1358e-01, -3.0313e-01, -4.0237e-01,
7.5061e-01, -1.3962e-02, -4.7558e-02, -1.6573e-01, -4.0670e-01,
-2.0012e-01, 3.4082e-01, -4.7709e-01],
[ 3.3092e-02, -3.6262e-01, 4.4745e-02, 1.9566e-01, 1.2359e-01,
-1.8763e-01, 3.5385e-01, -6.6399e-02, 5.7203e-02, -5.1705e-02,
-1.1205e-01, 1.7392e-01, 1.5158e-01],
[-3.2567e-01, -3.8570e-01, 1.9685e-01, 2.1886e-01, 2.1566e-01,
-9.1948e-02, -3.2256e-02, -8.5592e-02, -3.6367e-02, -1.7714e-01,
1.5858e-01, 1.6223e-01, 6.9193e-02],
[-7.6076e-02, -3.0033e-01, 3.8297e-01, 2.1962e-01, -1.6100e-01,
3.1488e-01, -6.7479e-02, -7.0767e-03, 1.5569e-01, 3.0179e-01,
2.0089e-01, 1.6438e-01, -2.2026e-01]])),
('hidden1.bias',
tensor([ 1.1105, -0.2977, 0.3859, 0.1978, 0.2857, -0.2002, 0.3738, 0.1649,
0.5521, 0.1787])),
('hidden2.weight',
tensor([[-0.2493, -0.2792, 0.2416, 0.1935, -0.2118, -0.2448, 0.1553, -0.3149,
-0.2937, 0.1664],
[-0.2636, -0.1028, 0.0557, -0.1204, 0.2341, -0.2500, -0.1694, -0.1365,
0.1862, -0.1001],
[ 0.0876, -0.1066, 0.1766, -0.0564, 0.2845, -0.2912, -0.0857, -0.0988,
-0.2746, 0.0536],
[-0.1058, 0.0544, -0.1484, 0.0585, -0.0573, 0.0817, -0.3041, -0.1898,
0.1997, -0.0746],
[-0.0840, 0.0639, -0.1507, -0.2741, 0.1074, -0.1856, 0.1269, 0.1701,
0.0766, -0.2042],
[-0.0094, 0.1448, -0.0476, -0.2505, 0.0540, 0.0837, 0.1881, -0.0848,
-0.3105, 0.2323],
[ 1.0399, 0.0519, 0.3843, 0.5839, 0.3009, -0.1345, 0.8830, 0.2418,
0.4228, 0.5138],
[ 0.7188, -0.1969, 0.2883, 0.2077, 0.5364, 0.0476, 0.8421, 0.2680,
0.4813, 0.2482],
[-0.1314, -0.1304, 0.0932, 0.1019, 0.1956, -0.0253, 0.0965, 0.0314,
-0.1878, -0.0294],
[ 0.2939, -0.1036, 0.1260, 0.0500, -0.2336, -0.1687, 0.0634, 0.1648,
0.1477, -0.1833]])),
('hidden2.bias',
tensor([-0.2753, -0.1767, -0.1408, -0.2944, -0.2409, -0.2291, 1.4637, 1.3473,
-0.2575, -0.1991])),
('regreesion.weight',
tensor([[-0.0689, -0.2884, -0.0589, -0.1619, 0.0780, -0.0351, 2.2136, 1.9229,
-0.2540, 0.2036]])),
('regreesion.bias', tensor([2.6824]))])
# 导入保存的模型
mlp2param = torch.load('./model/mlp2_param.pkl')
mlp2param
OrderedDict([('hidden.0.weight',
tensor([[-3.7882e-01, -2.1213e-01, -1.1931e-01, -1.1345e-01, -7.0603e-02,
4.1154e-01, 9.6141e-03, -2.0682e-01, -4.0837e-01, -4.6498e-01,
-3.3819e-01, 4.1126e-01, -4.8775e-02],
[-3.9901e-01, 5.6975e-02, 1.5547e-01, 3.0070e-01, 1.4497e-01,
-1.8413e-01, 8.7258e-02, -3.6628e-01, 2.6932e-01, 5.2832e-01,
3.5038e-01, 1.0313e-01, -5.9314e-01],
[ 3.9728e-04, 6.2143e-02, -2.7005e-01, 1.9103e-01, -3.4464e-02,
7.8083e-01, -1.9403e-01, 3.1837e-02, -2.7593e-01, -1.8022e-01,
-4.4367e-01, -1.5718e-01, -4.3121e-01],
[ 1.8454e-01, -4.0077e-02, -4.8855e-02, -1.8623e-01, 1.8947e-02,
-1.9761e-01, 1.2482e-01, -1.6840e-01, -2.4291e-01, -3.8501e-02,
1.4017e-01, -6.8666e-02, 1.6561e-01],
[-1.9762e-01, 1.5395e-01, 2.5764e-01, 3.8647e-02, 2.8351e-01,
6.2669e-02, -1.7310e-01, 2.3517e-01, -2.8427e-02, 9.9237e-02,
-2.1677e-01, 1.6641e-01, 2.5929e-02],
[-1.3530e-01, -3.0910e-01, 1.9387e-01, 1.8563e-01, 1.7248e-01,
-7.3248e-02, 3.3756e-01, -2.7224e-01, 1.4616e-01, -2.9885e-02,
2.0379e-01, -1.8004e-01, -2.7933e-01],
[ 9.3557e-02, -6.7344e-02, 4.0941e-01, 2.3093e-02, 9.1351e-02,
2.6087e-01, -1.6865e-01, -1.8740e-01, 9.5988e-02, 1.5391e-01,
4.7002e-02, 2.3074e-01, -4.9821e-02],
[ 9.5425e-02, -1.0663e-01, 1.0190e-01, 1.8946e-01, -6.2244e-02,
4.0110e-01, -5.8842e-02, -1.0668e-01, -3.5096e-01, -4.2917e-01,
-1.5170e-01, 1.0003e-01, -7.3418e-02],
[-1.2922e-01, 7.6084e-02, -2.1245e-01, -1.0320e-01, -3.0408e-01,
6.6646e-03, -2.8006e-02, 3.7175e-03, -2.5638e-01, 1.0874e-01,
-2.9518e-01, 1.9825e-01, 5.1628e-02],
[ 5.6657e-02, -3.9290e-02, -2.5491e-01, -3.2850e-01, -3.0393e-01,
1.5562e-01, -1.5653e-01, -2.6869e-01, -3.8883e-01, -1.5824e-01,
2.6920e-01, 3.5387e-01, -2.6229e-01]])),
('hidden.0.bias',
tensor([ 0.8291, 0.3758, 0.4574, -0.2031, -0.1757, 0.0090, 0.5057, 0.2175,
0.4139, 0.3871])),
('hidden.2.weight',
tensor([[ 0.6125, 0.2457, -0.0235, 0.1202, -0.0678, 0.3525, 0.5065, 0.1228,
0.1286, 0.5249],
[ 0.2242, 0.0845, 0.1347, -0.0904, -0.0292, 0.1190, 0.3869, -0.2215,
0.0113, 0.0931],
[-0.1398, 0.1235, -0.2412, -0.3133, 0.1652, 0.0014, 0.1948, -0.1889,
0.1772, -0.2874],
[ 0.1530, 0.2623, 0.3819, -0.1784, 0.3407, -0.1312, -0.0243, 0.1617,
0.2015, -0.1002],
[-0.7185, -0.5606, -0.6867, 0.0243, 0.1756, -0.4220, -0.3690, -0.1479,
-0.4856, -0.0364],
[ 0.2623, 0.2144, 0.2790, 0.1752, 0.2042, 0.1720, 0.0624, 0.0425,
0.2600, 0.4524],
[ 0.2468, 0.2876, 0.2322, 0.0467, 0.2117, 0.0327, -0.0979, 0.3623,
-0.0187, 0.1619],
[-0.2574, -0.0452, 0.1506, -0.1167, -0.1946, -0.0150, -0.0213, -0.1401,
-0.1337, 0.0086],
[-0.1311, -0.0148, -0.1563, 0.0077, -0.2051, -0.2400, -0.1089, -0.1059,
-0.1817, -0.0868],
[-0.7173, -0.7765, -0.7238, 0.1524, 0.0629, -0.1739, -0.2796, -0.5206,
-0.0664, -0.6046]])),
('hidden.2.bias',
tensor([ 0.7569, 0.2052, -0.3467, 0.4155, -0.7522, 0.5108, 0.4010, 0.1794,
-0.3899, -1.2202])),
('regreesion.weight',
tensor([[ 1.1500, 0.4282, -0.3741, 0.6178, -1.4616, 0.7865, 0.6523, 0.0164,
-0.4694, -1.8913]])),
('regreesion.bias', tensor([1.1329]))])