根据课程预代码,进行调试并且重构,得出模型的最低loss:1.2387
1. 模型的构建和训练代码:
import math
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
import numpy as np
import torch
import pandas as pd
from torch.utils.tensorboard import SummaryWriter
train_data_path = "HW1_file/HW1/covid.train_new.csv"
test_data_path = "HW1_file/HW1/covid.test_un.csv"
# 选择GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class COVID19Dataset(Dataset):
def __init__(self, path, is_train=True):
'''
通过 is_train 控制是加载训练集还是测试集。
如果是训练集,加载特征和目标值;否则只加载特征。
'''
data = pd.read_csv(path).values.astype(np.float32)
if is_train:
self.x = torch.FloatTensor(data[:, :-1]) # 取出特征 (去掉最后一列)
self.y = torch.FloatTensor(data[:, -1]) # 取出目标值 (最后一列)
else:
self.x = torch.FloatTensor(data[:, :]) # 只取出特征,测试集没有目标值
def __len__(self):
return len(self.x)
def __getitem__(self, idx):
if hasattr(self, 'y'): # 如果有目标值,返回 (特征, 目标值)
return self.x[idx], self.y[idx]
else: # 如果没有目标值,返回 (特征)
return self.x[idx]
def train_valid_split(data_set, valid_ratio, seed):
'''将提供的训练数据集拆分为训练集和验证集。'''
valid_set_size = int(valid_ratio * len(data_set)) # 计算验证集大小
train_set_size = len(data_set) - valid_set_size # 训练集大小
# 使用给定种子生成器,随机划分数据集
train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size],
generator=torch.Generator().manual_seed(seed)) # 使用指定的随机生成器来确保划分的随机性可复现
return train_set, valid_set # 返回训练集和验证集的 Subset 对象
def same_seed(seed):
"""为可复现性固定随机数生成器的种子。"""
torch.backends.cudnn.deterministic = True # 确保每次计算结果一致(固定计算顺序)
torch.backends.cudnn.benchmark = False # 关闭 cuDNN 的自动优化以确保复现
np.random.seed(seed) # 固定 numpy 的随机种子
torch.manual_seed(seed) # 固定 CPU 上的随机种子
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed) # 固定所有 GPU 的随机种子
# 设置随机种子
same_seed(1123)
# 加载训练集, 验证集和测试集
dataset = COVID19Dataset(train_data_path, is_train=True)
test_dataset = COVID19Dataset(test_data_path, is_train=False)
# 确定输入维度
data = pd.read_csv(train_data_path).values.astype(np.float32)
input_dim = data.shape[1] - 1
print(f"Input dimension: {input_dim}")
# 分离训练集和验证集
train_dataset, valid_dataset = train_valid_split(dataset, valid_ratio=0.2, seed=1123)
# 分组
train_loader = DataLoader(train_dataset, batch_size=108, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=108, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=108, shuffle=False)
# 构建神经网络
class Mymodule(nn.Module):
def __init__(self, input_dim):
super(Mymodule, self).__init__()
# 一个简单的三层全连接层的神经网络模型
self.layers = nn.Sequential(
nn.Linear(input_dim, 16), # 全连接层
nn.ReLU(), # 激活函数
nn.Linear(16, 8),
nn.ReLU(),
nn.Linear(8, 1)
)
def forward(self, x):
x = self.layers(x)
x = x.squeeze(1) # (B, 1) -> (B)
return x
net = Mymodule(input_dim).to(device)
# 载入tensorboard
writer = SummaryWriter("HW1")
# 创建损失函数
loss_mse = nn.MSELoss().to(device)
# 设置优化器
learning_rate = 1e-5
optim = torch.optim.SGD(net.parameters(), learning_rate, momentum=0.9, weight_decay=0.001)
# 设置步数
epoch_iter = 3000
train_step = 0
early_stop_count = 0
best_loss = float('inf')
for epoch in range(epoch_iter):
net.train() # 设置网络为训练模式
loss_mean = 0
for batch in train_loader:
x_train, y_train = batch # 获取特征和目标值
x_train, y_train = x_train.to(device), y_train.to(device) # 数据转移到 GPU
y_pre = net(x_train) # 前向传播
# 计算损失
loss_train = loss_mse(y_pre, y_train)
# 梯度清零
optim.zero_grad()
# 反向传播
loss_train.backward()
# 更新梯度
optim.step()
loss_mean += loss_train.item() # 累加损失
train_step += 1
if train_step % 20 == 0:
print(f"Epoch [{epoch + 1}/{epoch_iter}], Step [{train_step}], Loss: {loss_mean / 20:.4f}")
writer.add_scalar("train_loss", loss_mean / 20, train_step)
loss_mean = 0 # 重置损失累计值
# 验证集
net.eval() # 设置模型为评估模式
loss_record = []
with torch.no_grad(): # 禁用梯度计算(在评估模式下不需要计算梯度)
for x, y in valid_loader:
x, y = x.to(device), y.to(device)
pred = net(x) # 前向传播,计算模型预测值
loss = loss_mse(pred, y) # 计算损失
loss_record.append(loss.item()) # 记录损失值
# 计算验证集的平均损失,并将其记录到 TensorBoard
mean_valid_loss = sum(loss_record) / len(loss_record)
print(f'Epoch [{epoch + 1}/{epoch_iter}]: Valid loss: {mean_valid_loss:.4f}')
writer.add_scalar('Loss/valid', mean_valid_loss, epoch + 1)
# 如果当前验证损失小于最佳损失,则保存模型
if mean_valid_loss < best_loss:
best_loss = mean_valid_loss
torch.save(net.state_dict(), 'models/model.ckpt') # 保存当前最佳模型
print(f'Saving model with loss {best_loss:.3f}...')
early_stop_count = 0
else:
early_stop_count += 1 # 如果验证损失没有改善,则增加早停计数
# 如果早停计数达到设定值,则停止训练
if early_stop_count >= 300:
print('\nModel is not improving, so we halt the training session.')
break
print(f"-----best_loss:{best_loss}-----")
2. 验证集目标和模型的对比,测试集的预测:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torch import nn
# 选择GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 定义数据集类
class COVID19Dataset(Dataset):
def __init__(self, path, is_train=True):
with open(path, "r") as f:
data = pd.read_csv(path).values.astype(np.float32)
if is_train:
self.x = torch.FloatTensor(data[:, :-1]) # 取出特征 (去掉最后一列)
self.y = torch.FloatTensor(data[:, -1]) # 取出目标值 (最后一列)
else:
self.x = torch.FloatTensor(data[:, :]) # 只取出特征,测试集没有目标值
def __len__(self):
return len(self.x)
def __getitem__(self, idx):
if hasattr(self, 'y'): # 如果有目标值,返回 (特征, 目标值)
return self.x[idx], self.y[idx]
else: # 如果没有目标值,返回 (特征)
return self.x[idx]
# 定义神经网络模型
class Mymodule(nn.Module):
def __init__(self, input_dim):
super(Mymodule, self).__init__()
self.layers = nn.Sequential(
nn.Linear(input_dim, 16), # 全连接层
nn.ReLU(), # 激活函数
nn.Linear(16, 8),
nn.ReLU(),
nn.Linear(8, 1)
)
def forward(self, x):
x = self.layers(x)
x = x.squeeze(1) # (B, 1) -> (B)
return x
# 更新路径到新项目文件夹
train_data_path = "HW1_file/HW1/covid.train_new.csv"
test_data_path = "HW1_file/HW1/covid.test_un.csv"
model_path = "models/model.ckpt"
# 确定输入维度
data = pd.read_csv(train_data_path).values.astype(np.float32)
input_dim = data.shape[1] - 1
print(f"Input dimension: {input_dim}")
# 创建训练数据和测试数据的 DataLoader
dataset = COVID19Dataset(train_data_path, is_train=True)
dataset_test = COVID19Dataset(test_data_path, is_train=False)
train_loader = DataLoader(dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(dataset_test, batch_size=128, shuffle=False) # 这里修改为使用测试数据集
# 载入模型
net = Mymodule(input_dim).to(device)
net.load_state_dict(torch.load(model_path))
# 进行预测
net.eval() # 设置模型为评估模式
all_preds = []
all_targets = []
with torch.no_grad(): # 禁用梯度计算
for x_batch, y_batch in train_loader:
x_batch, y_batch = x_batch.to(device), y_batch.to(device)
y_pred = net(x_batch)
all_preds.append(y_pred.cpu().numpy())
all_targets.append(y_batch.cpu().numpy())
# 将所有批次的结果合并
all_preds = np.concatenate(all_preds, axis=0)
all_targets = np.concatenate(all_targets, axis=0)
# 绘制预测结果与真实目标的对比图像
plt.figure(figsize=(10, 6))
plt.scatter(all_targets, all_preds, alpha=0.5)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('True Values vs Predictions')
plt.plot([min(all_targets), max(all_targets)], [min(all_targets), max(all_targets)], 'r--', lw=2)
plt.grid(True)
plt.savefig("true_vs_predictions.png")
plt.show()
# 预测测试数据集
output_excel_path = "HW1_file/HW1/covid.test.xlsx"
test_predictions = []
with torch.no_grad(): # 禁用梯度计算
for x_batch in test_loader:
x_batch = x_batch.to(device) # 数据转移到 GPU
y_pred = net(x_batch)
test_predictions.append(y_pred.cpu().numpy())
# 将所有批次的预测结果合并
test_predictions = np.concatenate(test_predictions, axis=0)
# 保存预测结果到Excel
df = pd.DataFrame(test_predictions, columns=["Predicted"])
df.to_excel(output_excel_path, index=False)
print(f"Predictions saved to {output_excel_path}")
(初学记录用,侵权即删)