import torch
import gzip # 打开gz文件的依赖包import csv
import time
import math
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence,pack_padded_sequence,pack_sequence,pad_packed_sequence
# 得到字母的ascll码值defname2list(name):
arr =[ord(c)for c in name]return arr,len(arr)defmake_tensors(names, countries):# 将名字转换成ascll码和名字的长度 list
sequences_and_lengths =[name2list(name)for name in names]# 拿到名字的ascll码值
name_sequences =[sl[0]for sl in sequences_and_lengths]# 拿到序列长度
seq_lengths = torch.LongTensor([sl[1]for sl in sequences_and_lengths])
countries = countries.long()# make tensor of name, BatchSize x SeqLen
seq_tensor = torch.zeros(len(name_sequences), seq_lengths.max()).long()for idx,(seq, seq_len)inenumerate(zip(name_sequences, seq_lengths),0):
seq_tensor[idx,:seq_len]= torch.LongTensor(seq)# sort by length to use pack_padded_sequence
seq_lengths, perm_idx = seq_lengths.sort(dim=0, descending=True)
seq_tensor = seq_tensor[perm_idx]
countries = countries[perm_idx]return seq_tensor, seq_lengths,countries
模型训练模块
deftime_since(since):
s = time.time()- since
m = math.floor(s /60)
s -= m *60return'%dm %ds'%(m, s)deftrainModel():
total_loss =0for i,(names, countries)inenumerate(trainloader,1):
inputs, seq_lengths, target = make_tensors(names, countries)# 使用分类器进行计算
output = classifier(inputs, seq_lengths)# 正向计算损失
loss = criterion(output, target)# 优化器梯度归零
optimizer.zero_grad()# 反向传播梯度
loss.backward()# 执行优化过程
optimizer.step()# 损失求和
total_loss += loss.item()if i %10==0:print(f'[{time_since(start)}] Epoch {epoch} ', end='')print(f'[{i *len(inputs)}/{len(trainset)}] ', end='')print(f'loss={total_loss /(i *len(inputs))}')return total_loss