导入库
import torch
import torch.nn
from torch.utils.data import Dataset,DataLoader
import csv #方便读取csv文件
import numpy
#数据可视化
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
定义数据集类
class Mydata(Dataset):
def __init__(self,mode='train',path):
self.mode = mode
with open(path, 'r') as fp:
data = list(csv.reader(fp))
data = np.array(data[1:])(:,1:).astype(float)
#再根据模式的不同(训练还是测试)对数据进行不同的处理
def __getitem__(self):
return self.data[index], self.result[index] #测试数据集只返回data
def __len__(self):
return len(self.data)
定义数据集的Dataloader
def Mydataloader(path, mode, batch_size,num_workers)
dataset = Mydata(path, mode)
dataloader = DataLoader(dataset, batch_size,shuffle=True, num_workers)
return dataloader
搭建模型
class Mymodel(nn.Module):
def __init__(self):
super(Mymodel, self).__init__()
'''搭建模型'''
self.net = nn.Sequential(
nn.Linear(input_dim, 64),
nn.ReLU(),
nn.Linear(64, 1)
)
self.criterion = nn.MSELoss()
def forward(self,x):
return self.net(x)
def cal_loss(self, result, target):
return self.criterion(pred, target)
定义训练函数
def train(dataset, model, device, config):
epochs = config['epoch']
optimizer = getattr(torch.optim, config['optimizer'])()
loss_record = {'train' : [], 'val' : []} #记录训练过程的损失,方便后续的可视化和分析
for epoch in range(epochs):
model.train()
for x,y in dataset:
optimizer.zero_grad()
x, y = x.to(device), y.to(device)
result = model(x)
loss = model.cal_loss(result, y)
loss.backward()
optimizer.step()
loss_record['train'].append(loss.detach().cpu())
epoch = epoch + 1
print('Finished training after {} epochs'.format(epoch))
torch.save(model.state_dict, config['save_path']) #保存模型参数
return loss_record
参数及超参数的定义
# 确定训练时使用cpu还是gpu
device = torch.device('cuda:0' if (torch.cuda.is_available()) else 'cpu')
# config:通过字典的形式传递超参数配置
config = {'epoch' = 1000,
'batch_size' = 4,
'optimizer' = Adam
'save_path' = 'models/model.pth'
}
定义测试函数
def test(dataset, model, device):
model.eval()
preds = []
for x in dataset:
x = x.to(device)
with torch.no_grad():
pred = model(x)
preds.append(pred.detach().cpu())
preds = torch.cat(preds, dim=0).numpy()
return preds
测试结果的保存
def save_pred(preds, file):
print('Saving results to {}'.format(file))
with open(file, 'w') as fp:
writer = csv.writer(fp)
writer.writerow(['id', 'tested_positive'])
for i, p in enumerate(preds):
writer.writerow([i, p])
preds = test(tt_set, model, device)
save_pred(preds, 'pred.csv')
结果可视化
def plot_learning_curve(loss_record, title=''):
total_steps = len(loss_record['train'])
x_1 = range(total_steps)
x_2 = x_1[::len(loss_record['train']) // len(loss_record['dev'])]
figure(figsize=(6, 4))
plt.plot(x_1, loss_record['train'], c='tab:red', label='train')
plt.plot(x_2, loss_record['dev'], c='tab:cyan', label='dev')
plt.ylim(0.0, 5.)
plt.xlabel('Training steps')
plt.ylabel('MSE loss')
plt.title('Learning curve of {}'.format(title))
plt.legend()
plt.show()