框架干的最厉害的一件事就是:手动定义需要求导的前向传播,把返向传播全部计算好了
import torch
#方法1
x = torch.randn(3,4,requires_grad=True)#构建3行4列的矩阵 requires_grad=True表示可以对当前的X进行求导,默认为false
x
#方法2
x = torch.randn(3,4)#
x.requires_grad=True
x
b = torch.randn(3,4,requires_grad=True)
t = x + b
y = t.sum()
y#y当作损失函数,反向传播就是从损失函数开始进行逐层求导
y.backward()
b.grad
out:tensor(2.1753, grad_fn=)
tensor([[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]])
虽然没有指定t的requires_grad但是需要用到它,也会默认的
x.requires_grad, b.requires_grad, t.requires_grad
out (True, True, True)
#计算流程
#对x w b随机值的初始化
x = torch.rand(1)
b = torch.rand(1, requires_grad = True)
w = torch.rand(1, requires_grad = True)
y = w * x
z = y + b
#返向传播计算
z.backward(retain_graph=True)#在pytorch框架中 ,如果不清空梯度会累加起来
做一个线性回归试试水
构造一组输入数据X和其对应的标签y
import numpy as np
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)#x现在是ndarry的格式还不能输入到pytorch中进行训练 要把ndarry转化为tensor格式
x_train = x_train.reshape(-1, 1)#为了防止后续出错转化为矩阵的格式
x_train.shape
y_values = [2*i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)
y_train.shape
import torch
import torch.nn as nn
线性回归模型其实线性回归就是一个不加激活函数的全连接层
class LinearRegressionModel(nn.Module):#不论构建多么复杂的模型 先把模型类定义出来 继承现有的nn.Module模块
def __init__(self, input_dim, output_dim):#在构造函数中写用到了那些层
super(LinearRegressionModel, self).__init__()
self.linear = nn.Linear(input_dim, output_dim) #调用nn的全连接层,传入输入和输出层的维度
def forward(self, x):#在前向传播中指定要用的层
out = self.linear(x)
return out
input_dim = 1
output_dim = 1
model = LinearRegressionModel(input_dim, output_dim)
指定好参数和损失函数进行训练
epochs = 1000#循环次数
learning_rate = 0.01#学习率
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)#定义优化器-SGD (要优化的参数,学习率)
criterion = nn.MSELoss()#指定MSE损失函数
训练模型
for epoch in range(epochs):
epoch += 1
# 注意转行成tensor
inputs = torch.from_numpy(x_train)
labels = torch.from_numpy(y_train)
# 梯度要清零每一次迭代
optimizer.zero_grad()
# 前向传播结果
outputs = model(inputs)
# 计算损失
loss = criterion(outputs, labels)
# 返向传播
loss.backward()
# 更新权重参数
optimizer.step()
if epoch % 50 == 0:
print('epoch {}, loss {}'.format(epoch, loss.item()))
测试模型预测结果
predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()#进行一次前向传播进行预测,把结果转化为ndarry格式,方便画图及pandas做表格时需要用ndarry格式
predicted
模型的保存与读取
torch.save(model.state_dict(), 'model.pkl')#保存为字典的格式 把权重参数和偏置保存下来
model.load_state_dict(torch.load('model.pkl'))
使用GPU进行训练
只需要把数据和模型传入到cuda里面就可以了
import torch
import torch.nn as nn
import numpy as np
class LinearRegressionModel(nn.Module):
def __init__(self, input_dim, output_dim):
super(LinearRegressionModel, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
def forward(self, x):
out = self.linear(x)
return out
input_dim = 1
output_dim = 1
model = LinearRegressionModel(input_dim, output_dim)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")#如果GPU配置好了用GPU
model.to(device)#把模型传入到cuda里
criterion = nn.MSELoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
epochs = 1000
for epoch in range(epochs):
epoch += 1
inputs = torch.from_numpy(x_train).to(device)#把训练数据传入到cuda里
labels = torch.from_numpy(y_train).to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
if epoch % 50 == 0:
print('epoch {}, loss {}'.format(epoch, loss.item()))