-
打包数据
- 每次训练都是一批一批进行的
- batch size:
- 黄金组合:
- 固定套路,死规则,八股文,照办就行!!
-
定义模型
-
代码实现:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import numpy as np
#数据生成
X, y = make_regression(n_samples=10000, n_features=100, n_informative=10, random_state=0)
#人为增加噪声
np.random.seed(0)
noise = np.random.randn(10000) * 10
y += noise
#数据切分
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# 给测试集人为增加噪声
np.random.seed(1)
nosie = np.random.randn(2000) * 10
y_test += nosie
#数据预处理,标准化
_mean = X_train.mean(axis=0)
_std = X_train.std(axis=0)
'''
定义数据集
'''
class MyDataset(Dataset):
#定义初始化方法
def __init__(self, X, y):
self.X = X
self.y = y
#定义获取大小方法
def __len__(self):
return len(self.X)
#定义索引方法
def __getitem__(self, idx):
x = torch.tensor(data=self.X[idx], dtype=torch.float32)
y = torch.tensor(data=[self.y[idx]], dtype=torch.float32)
return x, y
#打包训练集
train_dataset = MyDataset(X=X_train, y=y_train)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True, drop_last=False)
#打包测试集
test_dataset = MyDataset(X=X_test, y=y_test)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=128, shuffle=False, drop_last=False)
from torch import nn
class Model(nn.Module):
"""
自定义深度学习模型
"""
def __init__(self):
"""
初始化模型
"""
#初始化父类
super(Model, self).__init__()
#定义一个线性层
self.fc1 = nn.Linear(in_features=100, out_features=64)
self.fc2 = nn.Linear(in_features=64, out_features=32)
self.fc3 = nn.Linear(in_features=32, out_features=1)
def forward(self, x):
"""
正向传播
"""
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
x = torch.relu(x)
x = self.fc3(x)
return x
#构建模型
model = Model()
epochs = 100
learning_rate = 1e-4
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate)
train_mses = []
test_mses = []
"""
定义过程监控函数
"""
def get_mse(dataloader, model=model):
#设置模型为评估模式
model.eval()
mses = []
with torch.no_grad():
for batch_x, batch_y in dataloader:
y_pred = model(batch_x)
loss = loss_fn(y_pred, batch_y)
mse = loss.item()
mses.append(mse)
return torch.tensor(data=mses).mean().item()
"""
定义训练过程
"""
def train(dataloader=train_dataloader,
model=model,
loss_fn=loss_fn,
optimizer=optimizer,
epochs=epochs):
for epoch in range(epochs):
# 没有训练之前,模型在训练集和测试集上的表现是什么样的?
train_mse = get_mse(dataloader=train_dataloader)
test_mse = get_mse(dataloader=test_dataloader)
# 结果收起来
train_mses.append(train_mse)
test_mses.append(test_mse)
print(f"在训练之前,模型在训练集和测试集上的 MSE 分别是:{train_mse:.6f} 和 {test_mse:.6f}")
# 设置模型为训练模式
model.train()
for batch_idx, (batch_x, batch_y) in enumerate(dataloader):
# print(f'当前正在训练第{epoch + 1}轮次的第{batch_idx + 1}批次')
#正向传播
y_pred = model(batch_x)
#计算损失
loss = loss_fn(y_pred, batch_y)
#反向传播
loss.backward()
#更新参数
optimizer.step()
#清空梯度
optimizer.zero_grad()
# 本轮训练结束后,模型在训练集和测试集上的表现如何?
train_mse = get_mse(dataloader=train_dataloader)
test_mse = get_mse(dataloader=test_dataloader)
train_mses.append(train_mse)
test_mses.append(test_mse)
print(f"第 {epoch + 1} 轮后,模型在训练集和测试集上的 MSE 分别是:{train_mse:.6f} 和 {test_mse:.6f}")
"""
定义推理方法
"""
def predict(X, model=model):
#类型转换
if not isinstance(X, torch.Tensor):
X = torch.tensor(X, dtype=torch.float32)
#数据校验
if X.ndim != 2 or X.size(-1) != model.n_features:
raise ValueError('数据格式有误')
#数据预处理
X = (X - Model._mean) / Model._std
#设置模型为评估模式
model.eval()
#正向传播
with torch.no_grad():
y_pred = model(X)
return y_pred
train()
from matplotlib import pyplot as plt
plt.plot(train_mses)
plt.plot(test_mses)
X_to_test, y_to_test = test_dataset[0]
predict(X=X_to_test.unsqueeze(dim=0))
#保存模型
torch.save(model.state_dict(), 'model.pt')
#加载模型
model1 = Model()
model1.load_state_dict(state_dict=torch.load('model.pt'))