import torch
from torchtext import data
SEED=1234
torch.manual_seed(SEED)#为CPU设置随机种子
torch.cuda.manual_seed(SEED)#为GPU设置随机种子#在程序刚开始加这条语句可以提升一点训练速度,没什么额外开销。
torch.backends.cudnn.deterministic = True
#用来定义字段的处理方法(文本字段,标签字段)TEXT= data.Field(tokenize='spacy')#torchtext.data.Field : LABEL= data.LabelField(dtype=torch.float)
from torchtext import datasets
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)print(f'Number of training examples: {len(train_data)}')print(f'Number of testing examples: {len(test_data)}')
2. 制作训练集和验证集¶
import random
train_data, valid_data = train_data.split(random_state=random.seed(SEED))
defcount_parameters(model):returnsum(p.numel()for p in model.parameters()if p.requires_grad)print(f'The model has {count_parameters(model):,} trainable parameters')
import torch.optim as optim
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
model = model.to(device)
criterion = criterion.to(device)
defbinary_accuracy(preds, y):"""
Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
"""#round predictions to the closest integer
rounded_preds = torch.round(torch.sigmoid(preds))
correct =(rounded_preds == y).float()#convert into float for division
acc = correct.sum()/len(correct)return acc