目录
Part3 05LinearRegression
Part5 07Multiple_Dimension_Input
Part6 08Dataset and Dataloader
Part7 09Softmax_Classifier(运行结果见另一篇博客)
Part1 03Gradient_Descent1
# 输入训练数据
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
# 设置初始参数
w = 1.0 # 初始权重
alpha = 0.005 #初始梯度下降法的学习率
# 定义计算y_hat的函数
def forward(x):
return x * w
# 定义计算平均损失的函数
def cost(xs, ys):
sum_cost = 0
for x, y in zip(xs, ys): # zip函数的功能是打包为元组列表
y_pred = forward(x)
sum_cost += (y_pred - y) ** 2
return sum_cost / len(xs)
def gradient(xs, ys):
grad = 0
for x, y in zip(xs, ys):
grad += 2 * x * (x * w - y)
return grad / len(xs)
print('Predict (before training)', 4, forward(4)) # 计算训练前初始参数对应的y_hat值
for epoch in range(1000):
cost_val = cost(x_data, y_data) # 计算平均损失值
grad_val = gradient(x_data, y_data) # 计算梯度值
w -= alpha * grad_val # 更新权重w
print('Epoch', epoch, 'w = ', w, 'loss = ', cost_val) # 输出当前迭代次数的权重值和平均损失值
print('Predict (after training)', 4, forward(4)) #计算训练权重w后,对应的y_hat值
Part2 03Gradient_Descent2
# 输入训练数据
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
# 设置初始参数
w = 1.0 # 初始权重
alpha = 0.005 #初始梯度下降法的学习率
# 定义计算y_hat的函数
def forward(x):
return x * w
# 定义计算单个样本损失的函数
def loss(xs, ys):
y_pred = forward(x) # 计算预测值y_hat
single_lost = (y_pred - ys) ** 2 # 计算误差
return single_lost
def gradient(xs, ys):
grad = 2 * x * (x * w - y)
return grad
print('Predict (before training)', 4, forward(4)) # 计算训练前初始参数对应的y_hat值
for epoch in range(1000): # 迭代次数
for x, y in zip(x_data, y_data): # 遍历数据
grad_val = gradient(x, y) # 计算当前数据的梯度值
w -= alpha * grad_val # 更新权重w
print("\tgrad: ", x, y, grad_val)
los = loss(x, y) # 计算当前数据的损失值
print('progress: ', epoch, 'w = ', w, 'loss = ', los)
print('Predict (after training)', 4, forward(4)) # 计算训练权重w后,对应的y_hat值
Part3 05LinearRegression
# Prepare dataset
import torch
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[2.0], [4.0], [6.0]])
# Design Model using Class
class LinearModel(torch.nn.Module):
def __init__(self): #构造函数
super(LinearModel, self).__init__() #调用父类的构造函数
self.linear = torch.nn.Linear(1, 1) #in_features out_featurs 输入纬度和输出维度
def forward(self, x): #逻辑运算函数
y_pred = self.linear(x)
return y_pred
model = LinearModel() #创建对象
# Construct Loss and Optimizer
criterion = torch.nn.MSELoss(size_average = False)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
# Training Cycle
for epoch in range(1000): #循环迭代
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
print(epoch, loss)
optimizer.zero_grad() #梯度置零
loss.backward() #计算梯度
optimizer.step() #更新权重 等价于w.data = w.data - 0.01*w.grad.data
# Output weight and bias
print('w = ', model.linear.weight.item()) #输出权重w,item是转换为标量,只显示数值
print('b = ', model.linear.bias.item())
# Test Model
x_test = torch.Tensor([[4.0]])
y_test = model(x_test)
print('y_pred = ', y_test.data)
Part4 06Logistic_Regression
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[0], [0], [1]])
##
class LogisticRegressionModel(torch.nn.Module):
def __init__(self): #构造函数
super(LogisticRegressionModel, self).__init__()
self.linear = torch.nn.Linear(1, 1) #线性层
def forward(self, x):
y_pred = F.sigmoid(self.linear(x)) #激活函数
return y_pred
model = LogisticRegressionModel()
##
criterion = torch.nn.BCELoss(size_average = False) #计算损失
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01) #优化器
##
for epoch in range(1000):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
print(epoch, loss.item())
optimizer.zero_grad() # 梯度置0
loss.backward() # 计算梯度,反向传播
optimizer.step() # 更新参数
##
x = np.linspace(0, 10, 200)
x_t = torch.Tensor(x).view((200, 1))
y_t = model(x_t)
y = y_t.data.numpy()
plt.plot(x, y)
plt.plot([0, 10], [0.5, 0.5], c='r')
plt.xlabel('Hours')
plt.ylabel('Probability of Pass')
plt.grid()
plt.show()
Part5 07Multiple_Dimension_Input
import numpy as np
import torch
xy = np.loadtxt('diabetes.csv.gz', delimiter = ',', dtype = np.float32) #文件名,数据间分隔符,数据类型
x_data = torch.from_numpy(xy[:, :-1]) #读取前N-1列数据
y_data = torch.from_numpy(xy[:, [-1]]) #读取最后一列数据
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__() #通过初始化父类属性以初始化自身继承了父类的那部分属性
self.linear1 = torch.nn.Linear(8, 6)
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x
model = Model()
criterion = torch.nn.BCELoss(size_average = True)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1)
for epoch in range(1000):
# Forward
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
print(epoch, loss.item())
# Backward
optimizer.zero_grad()
loss.backward()
# Updata
optimizer.step()
Part6 08Dataset and Dataloader
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
class DiabetesDataset(Dataset):
def __init__(self, filepath):
xy = np.loadtxt(filepath, delimiter = ',', dtype = np.float32) #文件名,数据间分隔符,数据类型
self.len = xy.shape[0] #计算样本数
self.x_data = torch.from_numpy(xy[:, :-1]) #读取前N-1列数据
self.y_data = torch.from_numpy(xy[:, [-1]]) #读取最后一列数据
def __getitem__(self, index):
return self.x_data[index], self.y_data[index] #返回指定索引的数据
def __len__(self):
return self.len # 返回数据的样本数
dataset = DiabetesDataset('diabetes.csv.gz') # 导入糖尿病样例数据
train_loader = DataLoader(dataset = dataset, batch_size = 32, shuffle = True, num_workers = 0) #数据集,批(batch)大小,是否打乱,进程数
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__() # 通过初始化父类属性以初始化自身继承了父类的那部分属性
self.linear1 = torch.nn.Linear(8, 6)
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
x = self.sigmoid(self.linear1(x)) # 激活函数
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x
model = Model()
criterion = torch.nn.BCELoss(size_average = True) # 计算损失
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01) # 优化器
for epoch in range(100): #一个epoch表示所有样本训练一次, 一个iteration表示一个batchsize大小的样本训练一次
for i, data in enumerate(train_loader, 0): #枚举遍历函数(返回索引和对象), 数据集和遍历起始位置
#1.Prepare data
inputs, labels = data #特征数据列,标签数据列
#2.Forward
y_pred = model(inputs)
loss = criterion(y_pred, labels)
print(epoch, i, loss.item())
#3.Backward
optimizer.zero_grad()
loss.backward()
#4.Updata
optimizer.step()
运行时为了节省时间(设定EPOCHS=10),运行过程中容易出现DataLoader worker (pid(s) 1982) exited unexpectedly的错误,需修改 num_workers的值为0(源码为2)。
Part7 09Softmax_Classifier(运行结果见另一篇博客)
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
batch_size = 64
transform = transforms.Compose([
transforms.ToTensor(), #将shape为(H, W, C)的img转为shape为(C, H, W)的tensor,将每一个数值归一化到[0,1]
transforms.Normalize((0.1307, ), (0.3081, )) #按通道进行数据标准化
])
train_dataset = datasets.MNIST(root = '../Pycharm/dataset/mnist/', train = True, download = True, transform = transform)
train_loader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)
test_dataset = datasets.MNIST(root = '../Pycharm/dataset/mnist/', train = False, download = True, transform = transform)
test_loader = DataLoader(test_dataset, shuffle = False, batch_size = batch_size)
class Net(torch.nn.Module):
def __init__(self): # 构造函数
super(Net, self).__init__()
self.l1 = torch.nn.Linear(784, 512) # 线性层,输入纬度,输出纬度
self.l2 = torch.nn.Linear(512, 256)
self.l3 = torch.nn.Linear(256, 128)
self.l4 = torch.nn.Linear(128, 64)
self.l5 = torch.nn.Linear(64, 10)
def forward(self, x):
x = x.view(-1, 784) # view函数相当于numpy中的reshape函数,-1表示一个不确定的数,784表示确定的列
x = F.relu(self.l1(x)) # relu()激活函数
x = F.relu(self.l2(x))
x = F.relu(self.l3(x))
x = F.relu(self.l4(x))
return self.l5(x)
model = Net()
criterion = torch.nn.CrossEntropyLoss() # 计算交叉熵损失
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5) #构建优化器,lr为学习率,momentum为冲量因子
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0): # 遍历函数,0表示从第0个元素开始,返回数据下标和数值
inputs, target = data #特征,标签
optimizer.zero_grad() #梯度归零
# forward + backward + updata
outputs = model(inputs)
loss = criterion(outputs, target) #计算损失
loss.backward() #反向传播梯度值
optimizer.step() #更新参数
running_loss += loss.item() #得到元素张量的一个元素值,将张量转换成浮点数
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad(): #数据不计算梯度
for data in test_loader:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, dim = 1) #predicted为tensor每行最大值的索引
total += labels.size(0) #总样本
correct += (predicted == labels).sum().item() #预测准确的样本数
print('Accuracy on test set: %d %%' % (100 * correct / total)) #准确率
def main():
for epoch in range(10):
train(epoch)
test()
main()
Part8 10_Basic_CNN
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
batch_size = 64
transform = transforms.Compose([
transforms.ToTensor(), #将shape为(H, W, C)的img转为shape为(C, H, W)的tensor,将每一个数值归一化到[0,1]
transforms.Normalize((0.1307, ), (0.3081, )) #按通道进行数据标准化
])
train_dataset = datasets.MNIST(root = '../Pycharm/dataset/mnist/', train = True, download = True, transform = transform)
train_loader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)
test_dataset = datasets.MNIST(root = '../Pycharm/dataset/mnist/', train = False, download = True, transform = transform)
test_loader = DataLoader(test_dataset, shuffle = False, batch_size = batch_size)
class Net(torch.nn.Module):
def __init__(self): # 构造函数
super(Net, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 10, kernel_size = 5) #卷积层1
self.conv2 = torch.nn.Conv2d(10, 20, kernel_size = 5) #卷积层2
self.pooling = torch.nn.MaxPool2d(2) # 2x2最大池化
self.fc = torch.nn.Linear(320, 10)
def forward(self, x):
# Flatten data from(n, 1, 28, 28) to (n, 784)
batch_size = x.size(0)
x = F.relu(self.pooling(self.conv1(x))) # 卷积、池化、激活函数
x = F.relu(self.pooling(self.conv2(x)))
x = x.view(batch_size, -1) # reshape, -1表示一个不确定的数
x = self.fc(x)
return x
model = Net()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 调用GPU或CPU
model.to(device)
criterion = torch.nn.CrossEntropyLoss() # 计算交叉熵损失
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5) #构建优化器,lr为学习率,momentum为冲量因子
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0): # 遍历函数,0表示从第0个元素开始,返回数据下标和数值
inputs, target = data #特征,标签
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad() #梯度归零
# forward + backward + updata
outputs = model(inputs)
loss = criterion(outputs, target) #计算损失
loss.backward() #反向传播梯度值
optimizer.step() #更新参数
running_loss += loss.item() #得到元素张量的一个元素值,将张量转换成浮点数
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
def test():
correct = 0
total = 0
with torch.no_grad(): #数据不计算梯度
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, dim = 1) #predicted为tensor每行最大值的索引
total += labels.size(0) # 总样本
correct += (predicted == labels).sum().item() #预测准确的样本数
print('Accuracy on test set: %d %%' % (100 * correct / total)) #准确率
def main():
for epoch in range(10):
train(epoch)
test()
main()
Part9 12RNN_Basic-RNNCell
import torch
input_size = 4
hidden_size = 4
batch_size = 1
# train 'hello' To 'ohlol'
idx2char = ['e', 'h', 'l', 'o'] # 字典
x_data = [1, 0, 2, 2, 3] # 输入字符'hello'序号
y_data = [3, 1, 2, 3, 2] # 输出字符'ohlol'序号
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data] # 将输入字符转换成向量形式,作为RnnCell输入向量
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size) # seq_len = -1,表示不确定
labels = torch.LongTensor(y_data).view(-1, 1) # seq_len = -1, 表示不确定
class Model(torch.nn.Module):
def __init__(self, input_size, hidden_size, batch_size): # 构造函数
super(Model, self).__init__()
self.batch_size = batch_size
self.input_size = input_size
self.hidden_size = hidden_size
self.rnncell = torch.nn.RNNCell(input_size = self.input_size, hidden_size = self.hidden_size) # RnnCell单元
def forward(self, input, hidden):
hidden = self.rnncell(input, hidden) # h_t = RNNCell( h_(t-1), x_t )
return hidden
def init_hidden(self): # 初始化RNN输入h_0
return torch.zeros(self.batch_size, self.hidden_size)
net = Model(input_size, hidden_size, batch_size)
criterion = torch.nn.CrossEntropyLoss() # 计算交叉熵
optimizer = torch.optim.Adam(net.parameters(), lr = 0.1) # 参数优化器
for epoch in range(15):
loss = 0
optimizer.zero_grad() # 梯度置0
hidden = net.init_hidden() # 获得h_0
print('Predicted string:', end = '')
for input, label in zip(inputs, labels): # 按序列取值
hidden = net(input, hidden) # 进行单个序列的RNNCell训练
loss += criterion(hidden, label) # 所有序列的损失累加
_, idx = hidden.max(dim = 1) # 求出概率最大的字符
print(idx2char[idx.item()], end = '') # 输出概率最大的字符
loss.backward() # 反向传播
optimizer.step() # 更新参数
print(', Epoch [%d/15] loss = %.4f' % (epoch + 1, loss.item())) # 输出训练结果
Part10 13_RNN_Classifier
Pycharm 运行(顺序略有修改)
# 导入第三方库
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence
import torch
import gzip
import csv
import matplotlib.pyplot as plt
import numpy as np
import time
import math
# 参数设置
HIDDEN_SIZE = 100
BATCH_SIZE = 256
N_LAYER = 2
N_EPOCHS = 100
N_CHARS = 128
USE_GPU = False
# 数据类
class NameDataset(Dataset):
def __init__(self, is_train_set = True): #构造函数
filename = 'names_train.csv.gz' if is_train_set else 'names_test.csv.gz' # 数据集的位置根据自己的文件夹设定,
# 此处与刘二老师的源码不同
with gzip.open(filename, 'rt') as f:
reader = csv.reader(f)
rows = list(reader)
self.names = [row[0] for row in rows] # names
self.len = len(self.names) # 样本数
self.countries = [row[1] for row in rows] # countries
self.country_list = list(sorted(set(self.countries))) # set()去重,删除重复的数据; sorted()排序
self.country_dict = self.getCountryDict()
self.country_num = len(self.country_list) # 国家数
def __getitem__(self, index): # 取数据
return self.names[index], self.country_dict[self.countries[index]]
def __len__(self): # 取样本数
return self.len
def getCountryDict(self):
country_dict = dict()
for idx, country_name in enumerate(self.country_list, 0): # 从0开始遍历
country_dict[country_name] = idx # 构造键字对,为国家编码;如:{'china': 1, 'japan': 2}
return country_dict
def idx2country(self, index):
return self.country_list[index] # 根据索引取出相应的国家名
def getCountriesNum(self):
return self.country_num # 返回国家数
# 导入数据集
trainset = NameDataset(is_train_set = True) # 训练集
trainloader = DataLoader(trainset, batch_size = BATCH_SIZE, shuffle = True)
testset = NameDataset(is_train_set = False) # 测试集
testloader = DataLoader(testset, batch_size = BATCH_SIZE, shuffle = False)
N_COUNTRY = trainset.getCountriesNum() # 国家数
#
class RNNClassifier(torch.nn.Module):
def __init__(self, input_size, hidden_size, output_size, n_layers = 1, bidirectional = True): # bidirectional:单双向循环
super(RNNClassifier, self).__init__() # 构造函数
self.hidden_size = hidden_size # 网络输出维度
self.n_layers = n_layers # 层
self.n_directions = 2 if bidirectional else 1 # 双向循环,输出的hidden是正向和反向hidden的拼接,所以要 *2
self.embedding = torch.nn.Embedding(input_size, hidden_size) #嵌入层
self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers, bidirectional = bidirectional) # GRU循环神经网络
self.fc = torch.nn.Linear(hidden_size * self.n_directions, output_size) # 全连接层
def _init_hidden(self, batch_size): #初始化h_0
hidden = torch.zeros(self.n_layers * self.n_directions, batch_size, self.hidden_size) # 双向: *2
return create_tensor(hidden)
def forward(self, input, seq_lengths):
# input shape : B x S -> S x B
input = input.t() # 转置
batch_size = input.size(1) # 计算batch_size
hidden = self._init_hidden(batch_size) # 获得h_0
embedding = self.embedding(input)
# pack them up
gru_input = pack_padded_sequence(embedding, seq_lengths) # 打包
output, hidden = self.gru(gru_input, hidden)
if self.n_directions == 2: #双向循环
hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim = 1) # 拼接hidden
else:
hidden_cat = hidden[-1]
fc_output = self.fc(hidden_cat) # 全连接层
return fc_output
def name2list(name):
arr = [ord(c) for c in name] # 函数ord()返回每一个字母的ascii值
return arr, len(arr) # 返回元组
def make_tensors(names, countries):
sequences_and_lengths = [name2list(name) for name in names] # 元组
name_sequences = [sl[0] for sl in sequences_and_lengths] # 取名字,实为一组ascii码
seq_lengths = torch.LongTensor([sl[1] for sl in sequences_and_lengths]) # LongTensor型,取长度
countries = countries.long()
# make tensor of name, BatchSize x SeqLen
seq_tensor = torch.zeros(len(name_sequences), seq_lengths.max()).long() # 初始化一个全零的tensor,行:名字数,列:最长的ascii名字
for idx, (seq, seq_len) in enumerate(zip(name_sequences, seq_lengths), 0): # 遍历
seq_tensor[idx, :seq_len] = torch.LongTensor(seq) # 将ascii码 依次输入到全零的tensor中(对应位置覆盖相应的ascii值,替代相应长度)
# sort by length to use pack_padded_sequence
seq_lengths, perm_idx = seq_lengths.sort(dim = 0, descending = True) # 排序,依据序列长度降序
seq_tensor = seq_tensor[perm_idx]
countries = countries[perm_idx]
return create_tensor(seq_tensor), create_tensor(seq_lengths), create_tensor(countries)
def create_tensor(tensor): # 是否使用GPU
if USE_GPU:
device = torch.device("cuda:0")
tensor = tensor.to(device)
return tensor
def time_since(since): # 计算程序运行的时间
s = time.time() - since
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
def trainModel(epoch):
total_loss = 0
for i, (names, countries) in enumerate(trainloader, 1):
inputs, seq_lengths, target = make_tensors(names, countries) # 生成符合尺寸大小的Tensor数据
output = classifier(inputs, seq_lengths) # 输入至网络训练
loss = criterion(output, target) # 计算损失
optimizer.zero_grad() # 梯度置0
loss.backward() # 反向传播
optimizer.step() # 优化参数
total_loss += loss.item()
if i % 10 == 0:
print(f'[{time_since(start)}] Epoch {epoch}', end = '')
print(f'[{i * len(inputs)}/{len(trainset)}]', end = '')
print(f'loss={total_loss / (i * len(inputs))}')
return total_loss
def testModel():
correct = 0
total = len(testset)
print("evaluating trained model ...")
with torch.no_grad():
for i, (names, countries) in enumerate(testloader, 1):
inputs, seq_lengths, target = make_tensors(names, countries)
output = classifier(inputs, seq_lengths)
pred = output.max(dim = 1, keepdim = True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
percent = '%.2f' % (100 * correct / total)
print(f'Test set: Accuracy {correct}/{total} {percent}%')
return correct/total
def main():
if USE_GPU:
device = torch.device("cuda:0")
classifier.to(device)
print("Training for %d epochs..." % N_EPOCHS)
acc_list = []
for epoch in range(1, N_EPOCHS + 1):
# Train cycle
trainModel(epoch)
acc = testModel()
acc_list.append(acc)
epoch = np.arange(1, len(acc_list) + 1, 1)
acc_list = np.array(acc_list)
plt.plot(epoch, acc_list)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid()
plt.show()
classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER) # 生成模型对象
criterion = torch.nn.CrossEntropyLoss() # 交叉熵损失计算器
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001) # 优化器
start = time.time() # 开始时间
main()
Anaconda(Jupyter Notebook 运行)-刘二老师源码
# 导入第三方库
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence
import torch
import gzip
import csv
import matplotlib.pyplot as plt
import numpy as np
import time
import math
# 参数设置
HIDDEN_SIZE = 100
BATCH_SIZE = 256
N_LAYER = 2
N_EPOCHS = 10
N_CHARS = 128
USE_GPU = False
# 数据类
class NameDataset(Dataset):
def __init__(self, is_train_set = True):
filename = 'names_train.csv.gz' if is_train_set else 'names_test.csv.gz' # 按所在文件夹修改
with gzip.open(filename, 'rt') as f:
reader = csv.reader(f)
rows = list(reader)
self.names = [row[0] for row in rows]
self.len = len(self.names)
self.countries = [row[1] for row in rows]
self.country_list = list(sorted(set(self.countries)))
self.country_dict = self.getCountryDict()
self.country_num = len(self.country_list)
def __getitem__(self, index):
return self.names[index], self.country_dict[self.countries[index]]
def __len__(self):
return self.len
def getCountryDict(self):
country_dict = dict()
for idx, country_name in enumerate(self.country_list, 0):
country_dict[country_name] = idx
return country_dict
def idx2country(self, index):
return self.country_list[index]
def getCountriesNum(self):
return self.country_num
# 导入数据集
trainset = NameDataset(is_train_set = True)
trainloader = DataLoader(trainset, batch_size = BATCH_SIZE, shuffle = True)
testset = NameDataset(is_train_set = False)
testloader = DataLoader(testset, batch_size = BATCH_SIZE, shuffle = False)
N_COUNTRY = trainset.getCountriesNum()
class RNNClassifier(torch.nn.Module):
def __init__(self, input_size, hidden_size, output_size, n_layers = 1, bidirectional = True):
super(RNNClassifier, self).__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.n_directions = 2 if bidirectional else 1
self.embedding = torch.nn.Embedding(input_size, hidden_size)
self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers, bidirectional = bidirectional)
self.fc = torch.nn.Linear(hidden_size * self.n_directions, output_size)
def _init_hidden(self, batch_size):
hidden = torch.zeros(self.n_layers * self.n_directions, batch_size, self.hidden_size)
return create_tensor(hidden)
def forward(self, input, seq_lengths):
# input shape : B x S -> S x B
input = input.t()
batch_size = input.size(1)
hidden = self._init_hidden(batch_size)
embedding = self.embedding(input)
# pack them up
gru_input = pack_padded_sequence(embedding, seq_lengths)
output, hidden = self.gru(gru_input, hidden)
if self.n_directions == 2:
hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim = 1)
else:
hidden_cat = hidden[-1]
fc_output = self.fc(hidden_cat)
return fc_output
def name2list(name):
arr = [ord(c) for c in name]
return arr, len(arr)
def make_tensors(names, countries):
sequences_and_lengths = [name2list(name) for name in names]
name_sequences = [sl[0] for sl in sequences_and_lengths]
seq_lengths = torch.LongTensor([sl[1] for sl in sequences_and_lengths])
countries = countries.long()
# make tensor of name, BatchSize x SeqLen
seq_tensor = torch.zeros(len(name_sequences), seq_lengths.max()).long()
for idx, (seq, seq_len) in enumerate(zip(name_sequences, seq_lengths), 0):
seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
# sort by length to use pack_padded_sequence
seq_lengths, perm_idx = seq_lengths.sort(dim = 0, descending = True)
seq_tensor = seq_tensor[perm_idx]
countries = countries[perm_idx]
return create_tensor(seq_tensor), create_tensor(seq_lengths), create_tensor(countries)
def create_tensor(tensor):
if USE_GPU:
device = torch.device("cuda:0")
tensor = tensor.to(device)
return tensor
def time_since(since):
s = time.time() - since
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
def trainModel():
total_loss = 0
for i, (names, countries) in enumerate(trainloader, 1):
inputs, seq_lengths, target = make_tensors(names, countries)
output = classifier(inputs, seq_lengths)
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
if i % 10 == 0:
print(f'[{time_since(start)}] Epoch {epoch}', end = '')
print(f'[{i * len(inputs)}/{len(trainset)}]', end = '')
print(f'loss={total_loss / (i * len(inputs))}')
return total_loss
def testModel():
correct = 0
total = len(testset)
print("evaluating trained model ...")
with torch.no_grad():
for i, (names, countries) in enumerate(testloader, 1):
inputs, seq_lengths, target = make_tensors(names, countries)
output = classifier(inputs, seq_lengths)
pred = output.max(dim = 1, keepdim = True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
percent = '%.2f' % (100 * correct / total)
print(f'Test set: Accuracy {correct}/{total} {percent}%')
return correct/total
if __name__ == '__main__':
classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)
if USE_GPU:
device = torch.device("cuda:0")
classifier.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
start = time.time()
print("Training for %d epochs..." % N_EPOCHS)
acc_list = []
for epoch in range(1, N_EPOCHS + 1):
# Train cycle
trainModel()
acc = testModel()
acc_list.append(acc)
epoch = np.arange(1, len(acc_list) + 1, 1)
acc_list = np.array(acc_list)
plt.plot(epoch, acc_list)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid()
plt.show()