神经网络-torch

浉河区的“小学生”

已于 2024-06-17 07:44:11 修改

阅读量1.3k

点赞数 23

文章标签：神经网络人工智能深度学习

于 2024-04-29 17:52:31 首次发布

本文链接：https://blog.csdn.net/qq_51945465/article/details/138320108

版权

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import tensor
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']

# 显示减号
plt.rcParams['axes.unicode_minus'] = False

np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x206c0f29610>

torch：2.2.1+cpu
numpy：1.24.3
pandas：2.2.1
matplotlib：3.7.1

1. 基础知识

1.1 框架自动求导

# 显示指出
x = torch.randn(3, 4, requires_grad=True)
x

tensor([[ 1.5410, -0.2934, -2.1788,  0.5684],
        [-1.0845, -1.3986,  0.4033,  0.8380],
        [-0.7193, -0.4033, -0.5966,  0.1820]], requires_grad=True)

b = torch.randn(3, 4, requires_grad=True)
t = x+b
y = t.sum()
y

tensor(-2.8990, grad_fn=<SumBackward0>)

# 计算每个参数相对于损失函数的梯度，从而进行反向传播和参数更新
y.backward()

tensor(-2.8990, grad_fn=<SumBackward0>)

b.grad

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

# 虽然y没有指定requires_grad，用到的时候会默认
x.requires_grad, b.requires_grad, y.requires_grad

(True, True, True)

1.1.1 简单案例

# 计算流程
x = torch.rand(1)
b = torch.rand(1, requires_grad=True)
w = torch.rand(1, requires_grad=True)
y = w * x
z = y + b
x.requires_grad, b.requires_grad, w.requires_grad, y.requires_grad

(False, True, True, True)

$Z = w x + b, y = w x$

$\frac{\partial{Z}}{\partial{b}}=1, \frac{\partial{Z}}{\partial{y}}=1$

$\frac{\partial{y}}{\partial{w}}=x, \frac{\partial{y}}{\partial{x}}=w$

$\frac{\partial{Z}}{\partial{x}}=\frac{\partial{Z}}{\partial{y}}\frac{\partial{y}}{\partial{x}}=1*w$

$\frac{\partial{Z}}{\partial{w}}=\frac{\partial{Z}}{\partial{y}}\frac{\partial{y}}{\partial{w}}=1*x$

链式法则，逐层求导

x.is_leaf, w.is_leaf, b.is_leaf, y.is_leaf, z.is_leaf

(True, True, True, False, False)

# 进行反向传播，如果不清空会累加每次结果
# retain_graph=True表示不清空
z.backward(retain_graph=True)

w.grad

tensor([0.2783])

b.grad

tensor([1.])

1.1.2 线性回归案例

x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)
x_train

array([[ 0.],
       [ 1.],
       [ 2.],
       [ 3.],
       [ 4.],
       [ 5.],
       [ 6.],
       [ 7.],
       [ 8.],
       [ 9.],
       [10.]], dtype=float32)

y_values = [2*i+1 for i in range(11)]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)
y_train

array([[ 1.],
       [ 3.],
       [ 5.],
       [ 7.],
       [ 9.],
       [11.],
       [13.],
       [15.],
       [17.],
       [19.],
       [21.]], dtype=float32)

线性回归模型

class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        out = self.linear(x)
        return out

# 输入x、输出y均只有一个值
input_dim = 1
output_dim = 1
model = LinearRegressionModel(input_dim, output_dim)
model

LinearRegressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

指定参数和损失函数

# 迭代次数，学习率
epochs = 1000
learning_rate = 0.01
# 优化器SGD
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
# 回归问题
criterion = nn.MSELoss()

训练模型

for epoch in range(epochs):
    epoch += 1
    # 转成tensor
    inputs = torch.from_numpy(x_train)
    labels = torch.from_numpy(y_train)
    # 梯度清零
    optimizer.zero_grad()
    # 前向传播
    outputs = model(inputs)
    # 计算损失
    loss = criterion(outputs, labels)
    # 反向传播
    loss.backward()
    # 更新参数
    optimizer.step()
    
    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 0.033222541213035583
epoch 100, loss 0.018948916345834732
epoch 150, loss 0.01080773863941431
epoch 200, loss 0.006164283026009798
epoch 250, loss 0.0035158887039870024
epoch 300, loss 0.002005331451073289
epoch 350, loss 0.0011437778593972325
epoch 400, loss 0.0006523674237541854
epoch 450, loss 0.00037207684363238513
epoch 500, loss 0.00021222172654233873
epoch 550, loss 0.00012104902998544276
epoch 600, loss 6.903993926243857e-05
epoch 650, loss 3.937735527870245e-05
epoch 700, loss 2.2458953026216477e-05
epoch 750, loss 1.2811175110982731e-05
epoch 800, loss 7.3064716161752585e-06
epoch 850, loss 4.1670832615636755e-06
epoch 900, loss 2.3768816390656866e-06
epoch 950, loss 1.3555484201788204e-06
epoch 1000, loss 7.73371482409857e-07

模型预测结果

model(torch.from_numpy(x_train).requires_grad_()).data.numpy()

array([[ 0.99836427],
       [ 2.9985998 ],
       [ 4.9988356 ],
       [ 6.999071  ],
       [ 8.999307  ],
       [10.999542  ],
       [12.999778  ],
       [15.000013  ],
       [17.000248  ],
       [19.000484  ],
       [21.00072   ]], dtype=float32)

模型的保存与读取

torch.save(model.state_dict(), 'model.pkl')

model.load_state_dict(torch.load('model.pkl'))

<All keys matched successfully>

使用GPU进行训练，只需把数据和模型传入到cuda里面。

import torch
import torch.nn as nn
import numpy as np

class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        out = self.linear(x)
        return out

input_dim = 1
output_dim = 1
model = LinearRegressionModel(input_dim, output_dim)

# 配置设备，model放入GPU中
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)

criterion = nn.MSELoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

epochs = 1000
for epoch in range(epochs):
    epoch += 1
    
    # 放入数据
    inputs = torch.from_numpy(x_train).to(device)
    labels = torch.from_numpy(y_train).to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 0.0005876993527635932
epoch 100, loss 0.00033520444412715733
epoch 150, loss 0.0001911871222546324
epoch 200, loss 0.00010904521332122386
epoch 250, loss 6.219371425686404e-05
epoch 300, loss 3.547255619196221e-05
epoch 350, loss 2.0232542738085613e-05
epoch 400, loss 1.154030269390205e-05
epoch 450, loss 6.581999969057506e-06
epoch 500, loss 3.7540908124356065e-06
epoch 550, loss 2.1410655790532473e-06
epoch 600, loss 1.2212855153848068e-06
epoch 650, loss 6.96716256243235e-07
epoch 700, loss 3.971402122715517e-07
epoch 750, loss 2.264746257196748e-07
epoch 800, loss 1.2923057113312097e-07
epoch 850, loss 7.374973165497067e-08
epoch 900, loss 4.2081651940861775e-08
epoch 950, loss 2.4022835276582555e-08
epoch 1000, loss 1.3600415904591046e-08

2. Tensor常见的形式

0：scalar
1：vector
2：matrix
3：n-dimensional tensor

2.1 scalar

通常为一个数值

x = tensor(42)
x

tensor(42)

x.dim()

x*2

tensor(84)

x.item()

2.2 Vector

通常指特征，某一维度特征。 $\vec{v}=[v_1,v_2,...,v_n]$

v = tensor([1.5,-0.5,3.0])
v

tensor([ 1.5000, -0.5000,  3.0000])

v.dim()

v.size()

torch.Size([3])

2.3 Matrix

一般计算的是矩阵，通常是多维。

m = tensor([[1, 2], [3, 4]])
m

tensor([[1, 2],
        [3, 4]])

# 矩阵乘法
m.matmul(m)

tensor([[ 7, 10],
        [15, 22]])

# 内积
m * m

tensor([[ 1,  4],
        [ 9, 16]])

3. 搭建PyTorch神经网络进行气温预测

3.1 数据准备

data = pd.read_csv('2006_temp.csv')
data

	date	year	month	day	week	temp_1	temp_2	average	actual
0	2006-01-01	2006	1	1	Sunday	23	15	19.6	24
1	2006-01-02	2006	1	2	Monday	25	18	22.0	25
2	2006-01-03	2006	1	3	Tuesday	22	18	20.2	22
3	2006-01-04	2006	1	4	Wednesday	22	16	19.5	22
4	2006-01-05	2006	1	5	Thursday	22	18	20.3	23
...	...	...	...	...	...	...	...	...	...
360	2006-12-27	2006	12	27	Wednesday	17	13	15.5	18
361	2006-12-28	2006	12	28	Thursday	15	9	12.3	15
362	2006-12-29	2006	12	29	Friday	18	14	16.1	18
363	2006-12-30	2006	12	30	Saturday	18	11	14.9	18
364	2006-12-31	2006	12	31	Sunday	17	12	14.7	18

365 rows × 9 columns

date、year、month和day：时间
week：周几
temp_1：昨天的最高温度
temp_2：前天的最高温度
average：历史上这一天的平均最高温度
actual：当天真实温度

# 独热编码
features = pd.get_dummies(data.iloc[:,1:])
features

	year	month	day	temp_1	temp_2	average	actual	week_Friday	week_Monday	week_Saturday	week_Sunday	week_Thursday	week_Tuesday	week_Wednesday
0	2006	1	1	23	15	19.6	24	False	False	False	True	False	False	False
1	2006	1	2	25	18	22.0	25	False	True	False	False	False	False	False
2	2006	1	3	22	18	20.2	22	False	False	False	False	False	True	False
3	2006	1	4	22	16	19.5	22	False	False	False	False	False	False	True
4	2006	1	5	22	18	20.3	23	False	False	False	False	True	False	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
360	2006	12	27	17	13	15.5	18	False	False	False	False	False	False	True
361	2006	12	28	15	9	12.3	15	False	False	False	False	True	False	False
362	2006	12	29	18	14	16.1	18	True	False	False	False	False	False	False
363	2006	12	30	18	11	14.9	18	False	False	True	False	False	False	False
364	2006	12	31	17	12	14.7	18	False	False	False	True	False	False	False

365 rows × 14 columns

# 标签
labels = features['actual'].values

# 去除标签列
features = features.drop(columns='actual')
features_list = list(features.columns)

features = features.values

labels.shape, features.shape

((365,), (365, 13))

from sklearn.preprocessing import StandardScaler

# 标准化
input_features = StandardScaler().fit_transform(features)

3.2 构建网络模型

输入365 * 13，w1为13 * 128，将输入13个特征转为128个神经元，加上偏置项b1(128个值)，经过w1(128*1)+b2(1个值)

数据维度变化：

（365 * 13） * （13 * 128）+（128）=（365* 128）
（365 * 128） * （128* 1）+（1）=（365* 1）

# 转为张量
x = torch.tensor(input_features, dtype=float)
y = torch.tensor(labels, dtype=float)

# 权重参数初始化
torch.manual_seed(0)
weights = torch.randn((13, 128), dtype=float, requires_grad=True)
biases = torch.randn(128, dtype=float, requires_grad=True)
weights2 = torch.randn((128, 1), dtype=float, requires_grad=True)
biases2 = torch.randn(1, dtype=float, requires_grad=True)

learning_rate = 0.001
losses = []

for i in range(1000):
    # 计算隐层
    hidden = x.mm(weights) + biases
    # 加入激活函数
    hidden = torch.relu(hidden)
    # 预测结果
    predictions = hidden.mm(weights2) + biases2
    # 计算损失
    loss = torch.mean((predictions-y) ** 2)
    losses.append(loss.data.numpy())

    if i % 100 == 0:
        print('loss: ', loss)

    # 反向传播计算
    loss.backward()

    # 更新参数
    weights.data.add_(-learning_rate * weights.grad.data)
    biases.data.add_(-learning_rate * biases.grad.data)
    weights2.data.add_(-learning_rate * weights2.grad.data)
    biases2.data.add_(-learning_rate * biases2.grad.data)
    
    # 每次选代都得记得清空
    weights.grad.data.zero_()
    biases.grad.data.zero_()
    weights2.grad.data.zero_()
    biases2.grad.data.zero_()

loss:  tensor(2591.0377, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss:  tensor(89.3769, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss:  tensor(85.7984, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss:  tensor(84.2971, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss:  tensor(83.3808, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss:  tensor(82.7637, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss:  tensor(82.3125, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss:  tensor(81.9775, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss:  tensor(81.7209, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss:  tensor(81.5159, dtype=torch.float64, grad_fn=<MeanBackward0>)

torch模块

input_size = input_features.shape[1]
hidden_size = 128
output_size = 1
batch_size = 16

my_nn = torch.nn.Sequential(
    torch.nn.Linear(input_size, hidden_size),
    torch.nn. Sigmoid(),
    torch.nn. Linear(hidden_size, output_size),
)

cost = torch.nn.MSELoss(reduction='mean')

# 优化器
optimizer = torch.optim.Adam(my_nn.parameters(), lr=0.001)

# 训练网络
losses = []
for i in range(1001):
    batch_loss = []
    # MINI-Batch方法来进行训练
    for start in range(0, len(input_features), batch_size):
        end = start + batch_size if start + batch_size < len(input_features) else len(input_features)
        xx = torch.tensor(input_features[start:end], dtype=torch.float, requires_grad=True)
        yy = torch.tensor(labels[start:end], dtype=torch.float, requires_grad=True)
        prediction = my_nn(xx)
        loss = cost(prediction, yy)
        optimizer.zero_grad()
        loss.backward(retain_graph=True)  # 反向传播
        optimizer.step()  # 更新参数
        batch_loss.append(loss.data.numpy())
    # 打印损失
    if i % 100 == 0:
        losses.append(np.mean(batch_loss))
        print(i, np.mean(batch_loss))

0 1004.6153
100 16.486382
200 15.792033
300 15.071966
400 14.48944
500 13.912442
600 13.296806
700 12.735492
800 12.292979
900 11.964481
1000 11.716

x = torch.tensor(input_features, dtype=torch.float)
prediction = my_nn(x).data.numpy()

plt.figure(figsize=(12, 4))
plt.plot(data['date'], data['actual'], label='真实值')
plt.plot(data['date'], prediction, label='预测值')
plt.xticks(data['date'][::60])
plt.legend()
plt.show()