1. 代码详细解释
1. 第一段代码
这段代码首先定义了一些参数,包括编码器个数、输入维度、句子长度、词嵌入维度等。然后它保存了这些超参数到指定路径。接着,它加载训练和验证数据集,并创建了对应的数据加载器。之后,它定义了一个模型,使用了一个叫做DSCTransformer的模型,以及交叉熵损失函数和Adam 优化器。最后,它将模型移动到可用的设备(如果有 GPU 则移动到 GPU,否则移动到 CPU)。
def train(model_save_path, train_result_path, val_result_path, hp_save_path, epochs=100):
#定义参数
N = 4 # 编码器个数
input_dim = 1024 # 输入维度
seq_len = 16 # 句子长度
d_model = 64 # 词嵌入维度
d_ff = 256 # 全连接层维度
head = 4 # 注意力头数
dropout = 0.1 # Dropout 比率
lr = 3E-5 # 学习率
batch_size = 64 # 批大小
# 保存超参数
hyper_parameters = {'任务编码器堆叠数: ': '{}'.format(N),
'全连接层维度: ': '{}'.format(d_ff),
'任务注意力头数: ': '{}'.format(head),
'dropout: ': '{}'.format(dropout),
'学习率: ': '{}'.format(lr),
'batch_size: ': '{}'.format(batch_size)}
fs = open(hp_save_path, 'w') # 打开文件以保存超参数
fs.write(str(hyper_parameters)) # 将超参数写入文件
fs.close() # 关闭文件
# 加载数据
train_path = r'.\data\train\train.csv' # 训练数据路径
val_path = r'.\data\val\val.csv' # 验证数据路径
train_dataset = MyDataset(train_path, 'fd') # 加载训练数据集
val_dataset = MyDataset(val_path, 'fd') # 加载验证数据集
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) # 创建训练数据加载器
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, drop_last=True) # 创建验证数据加载器
# 定义模型
model = DSCTransformer(input_dim=input_dim, num_classes=10, dim=d_model, depth=N,
heads=head, mlp_dim=d_ff, dim_head=d_model, emb_dropout=dropout, dropout=dropout) # 初始化模型
criterion = nn.CrossEntropyLoss() # 定义损失函数
params = [p for p in model.parameters() if p.requires_grad] # 获取模型参数
optimizer = optim.Adam(params, lr=lr) # 定义优化器
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # 判断是否有可用的 GPU
print("using {} device.".format(device)) # 打印使用的设备
model.to(device) # 将模型移动到对应的设备(GPU 或 CPU)
2. 第二段代码
这段代码是一个训练循环,它用于在每个训练周期(epoch)中训练模型,并在每个周期结束后评估模型的性能。在每个训练周期中,代码首先使用模型在训练数据上进行训练,然后使用模型在验证数据上进行验证,并打印出每个周期的训练损失、训练准确率、验证损失和验证准确率。
best_acc_fd = 0.0 # 初始化最佳准确率为0
train_result = [] # 记录训练结果
result_train_loss = [] # 记录训练损失
result_train_acc = [] # 记录训练准确率
val_result = [] # 记录验证结果
result_val_loss = [] # 记录验证损失
result_val_acc = [] # 记录验证准确率
# 训练循环
for epoch in range(epochs): # 遍历每个训练周期
# train
train_loss = [] # 用于记录每个批次的训练损失
train_acc = [] # 用于记录每个批次的训练准确率
model.train() # 将模型设置为训练模式
train_bar = tqdm(train_loader) # 创建一个进度条,用于显示训练进度
for datas, labels in train_bar: # 遍历训练数据加载器的每个批次
optimizer.zero_grad() # 梯度清零
outputs = model(datas.float().to(device)) # 前向传播
loss = criterion(outputs, labels.type(torch.LongTensor).to(device)) # 计算损失
loss.backward() # 反向传播
optimizer.step() # 更新模型参数
# 计算准确率
acc = (outputs.argmax(dim=-1) == labels.to(device)).float().mean()
# 记录损失和准确率
train_loss.append(loss.item())
train_acc.append(acc)
# 更新进度条的显示信息
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss.item())
# val
model.eval() # 将模型设置为评估模式
valid_loss = [] # 用于记录每个批次的验证损失
valid_acc = [] # 用于记录每个批次的验证准确率
val_bar = tqdm(val_loader) # 创建一个进度条,用于显示验证进度
for datas, labels in val_bar: # 遍历验证数据加载器的每个批次
with torch.no_grad(): # 禁止梯度计算
outputs = model(datas.float().to(device)) # 前向传播
loss = criterion(outputs, labels.type(torch.LongTensor).to(device)) # 计算损失
# 计算准确率
acc = (outputs.argmax(dim=-1) == labels.to(device)).float().mean()
# 记录损失和准确率
valid_loss.append(loss.item())
valid_acc.append(acc)
# 更新进度条的显示信息
val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs)
# 打印每个周期的训练和验证损失以及准确率
print(f"[{epoch + 1:02d}/{epochs:02d}] train loss = "
f"{sum(train_loss) / len(train_loss):.5f}, train acc = {sum(train_acc) / len(train_acc):.5f}", end=" ")
print(f"valid loss = {sum(valid_loss) / len(valid_loss):.5f}, valid acc = {sum(valid_acc) / len(valid_acc):.5f}")
3. 第三段代码
这段代码是用于记录训练和验证结果,并保存这些结果到文件中。
result_train_loss.append(sum(train_loss) / len(train_loss))
result_train_acc.append((sum(train_acc) / len(train_acc)).item())
result_val_loss.append(sum(valid_loss) / len(valid_loss))
result_val_acc.append((sum(valid_acc) / len(valid_acc)).item())
这几行代码分别计算并记录了每个训练周期中的平均训练损失、平均训练准确率、平均验证损失和平均验证准确率。
if best_acc_fd <= sum(valid_acc) / len(valid_acc):
best_acc_fd = sum(valid_acc) / len(valid_acc)
torch.save(model.state_dict(), model_save_path)
这段代码用于更新最佳验证准确率并保存最佳模型参数。如果当前的验证准确率大于之前记录的最佳准确率,则更新最佳准确率为当前准确率,并保存当前模型参数到指定路径。
train_result.append(result_train_loss)
train_result.append(result_train_acc)
val_result.append(result_val_loss)
val_result.append(result_val_acc)
这里将每个训练和验证结果存储到对应的列表中。
np.savetxt(train_result_path, np.array(train_result), fmt='%.5f', delimiter=',')
np.savetxt(val_result_path, np.array(val_result), fmt='%.5f', delimiter=',')
最后,将训练和验证结果保存到文件中。它使用了 NumPy 库的 savetxt 函数将列表转换为数组,并将数组保存到指定路径的文件中。
2.附上所有代码
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from model import DSCTransformer
from tqdm import tqdm
from data_set import MyDataset
from torch.utils.data import DataLoader
def train(model_save_path, train_result_path, val_result_path, hp_save_path, epochs=100):
#定义参数
N = 4 #编码器个数
input_dim = 1024
seq_len = 16 #句子长度
d_model = 64 #词嵌入维度
d_ff = 256 #全连接层维度
head = 4 #注意力头数
dropout = 0.1
lr = 3E-5 #学习率
batch_size = 64
#保存超参数
hyper_parameters = {'任务编码器堆叠数: ': '{}'.format(N),
'全连接层维度: ': '{}'.format(d_ff),
'任务注意力头数: ': '{}'.format(head),
'dropout: ': '{}'.format(dropout),
'学习率: ': '{}'.format(lr),
'batch_size: ': '{}'.format(batch_size)}
fs = open(hp_save_path, 'w')
fs.write(str(hyper_parameters))
fs.close()
#加载数据
train_path = r'.\data\train\train.csv'
val_path = r'.\data\val\val.csv'
train_dataset = MyDataset(train_path, 'fd')
val_dataset = MyDataset(val_path, 'fd')
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
#定义模型
model = DSCTransformer(input_dim=input_dim, num_classes=10, dim=d_model, depth=N,
heads=head, mlp_dim=d_ff, dim_head=d_model, emb_dropout=dropout, dropout=dropout)
criterion = nn.CrossEntropyLoss()
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.Adam(params, lr=lr)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("using {} device.".format(device))
model.to(device)
best_acc_fd = 0.0
train_result = []
result_train_loss = []
result_train_acc = []
val_result = []
result_val_loss = []
result_val_acc = []
#训练
for epoch in range(epochs):
#train
train_loss = []
train_acc = []
model.train()
train_bar = tqdm(train_loader)
for datas, labels in train_bar:
optimizer.zero_grad()
outputs = model(datas.float().to(device))
loss = criterion(outputs, labels.type(torch.LongTensor).to(device))
loss.backward()
optimizer.step()
# torch.argmax(dim=-1), 求每一行最大的列序号
acc = (outputs.argmax(dim=-1) == labels.to(device)).float().mean()
# Record the loss and accuracy
train_loss.append(loss.item())
train_acc.append(acc)
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss.item())
#val
model.eval()
valid_loss = []
valid_acc = []
val_bar = tqdm(val_loader)
for datas, labels in val_bar:
with torch.no_grad():
outputs = model(datas.float().to(device))
loss = criterion(outputs, labels.type(torch.LongTensor).to(device))
acc = (outputs.argmax(dim=-1) == labels.to(device)).float().mean()
# Record the loss and accuracy
valid_loss.append(loss.item())
valid_acc.append(acc)
val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs)
print(f"[{epoch + 1:02d}/{epochs:02d}] train loss = "
f"{sum(train_loss) / len(train_loss):.5f}, train acc = {sum(train_acc) / len(train_acc):.5f}", end=" ")
print(f"valid loss = {sum(valid_loss) / len(valid_loss):.5f}, valid acc = {sum(valid_acc) / len(valid_acc):.5f}")
result_train_loss.append(sum(train_loss) / len(train_loss))
result_train_acc.append((sum(train_acc) / len(train_acc)).item())
result_val_loss.append(sum(valid_loss) / len(valid_loss))
result_val_acc.append((sum(valid_acc) / len(valid_acc)).item())
if best_acc_fd <= sum(valid_acc) / len(valid_acc):
best_acc_fd = sum(valid_acc) / len(valid_acc)
torch.save(model.state_dict(), model_save_path)
train_result.append(result_train_loss)
train_result.append(result_train_acc)
val_result.append(result_val_loss)
val_result.append(result_val_acc)
np.savetxt(train_result_path, np.array(train_result), fmt='%.5f', delimiter=',')
np.savetxt(val_result_path, np.array(val_result), fmt='%.5f', delimiter=',')
if __name__ == '__main__':
group_index = 4
for i in range(5):
model_save_path = "result/result_own_noisy/group{}/exp0{}/model.pth".format(group_index, i + 1)
hp_save_path = "result/result_own_noisy/group{}/parameters.txt".format(group_index)
train_result_path = "result/result_own_noisy/group{}/exp0{}/train_result.txt".format(group_index, i + 1)
val_result_path = "result/result_own_noisy/group{}/exp0{}/val_result.txt".format(group_index, i + 1)
train(model_save_path, train_result_path, val_result_path, hp_save_path)
3. 所有文件链接如下:
链接:https://pan.baidu.com/s/12SEwGc36TN-jAbfx5fmHrw
提取码:rmue