Task1.0 学习笔记线性回归;Softmax与分类模型、多层感知机
线性回归模型使用pytorch的简洁实现
In [16]:
import torch
from torch import nn
import numpy as np
torch.manual_seed(1)
print(torch.version)
torch.set_default_tensor_type(‘torch.FloatTensor’)
生成数据集
在这里生成数据集跟从零开始的实现中是完全一样的。
In [17]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = torch.tensor(np.random.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float)
读取数据集
In [18]:
import torch.utils.data as Data
batch_size = 10
#combine featues and labels of dataset
dataset = Data.TensorDataset(features, labels)
#put dataset into DataLoader
data_iter = Data.DataLoader(
dataset=dataset, # torch TensorDataset format
batch_size=batch_size, # mini batch size
shuffle=True, # whether shuffle the data or not
num_workers=2, # read data in multithreading
)
In [19]:
for X, y in data_iter:
print(X, ‘\n’, y)
break
#定义模型
In [20]:
class LinearNet(nn.Module):
def init(self, n_feature):
super(LinearNet, self).init() # call father function to init
self.linear = nn.Linear(n_feature, 1) # function prototype: torch.nn.Linear(in_features, out_features, bias=True)
def forward(self, x):
y = self.linear(x)
return y
net = LinearNet(num_inputs)
print(net)
In [21]:
#ways to init a multilayer network
#method one
net = nn.Sequential(
nn.Linear(num_inputs, 1)
# other layers can be added here
)
#method two
net = nn.Sequential()
net.add_module(‘linear’, nn.Linear(num_inputs, 1))
#net.add_module …
#method three
from collections import OrderedDict
net = nn.Sequential(OrderedDict([
(‘linear’, nn.Linear(num_inputs, 1))
# …
]))
print(net)
print(net[0])
#初始化模型参数
In [22]:
from torch.nn import init
init.normal_(net[0].weight, mean=0.0, std=0.01)
init.constant_(net[0].bias, val=0.0) # or you can use net[0].bias.data.fill_(0)
to modify it directly
In [23]:
for param in net.parameters():
print(param)
#定义损失函数
In [24]:
loss = nn.MSELoss() # nn built-in squared loss function
# function prototype: torch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')
#定义优化函数
In [25]:
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.03) # built-in random gradient descent function
print(optimizer) # function prototype: torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)
#训练
In [26]:
num_epochs = 3
for epoch in range(1, num_epochs + 1):
for X, y in data_iter:
output = net(X)
l = loss(output, y.view(-1, 1))
optimizer.zero_grad() # reset gradient, equal to net.zero_grad()
l.backward()
optimizer.step()
print(‘epoch %d, loss: %f’ % (epoch, l.item()))
In [27]:
#result comparision
dense = net[0]
print(true_w, dense.weight.data)
print(true_b, dense.bias.data)
获取Fashion-MNIST训练集和读取数据
在介绍softmax回归的实现前我们先引入一个多类图像分类数据集。它将在后面的章节中被多次使用,以方便我们观察比较算法之间在模型精度和计算效率上的区别。图像分类数据集中最常用的是手写数字识别数据集MNIST[1]。但大部分模型在MNIST上的分类精度都超过了95%。为了更直观地观察算法之间的差异,我们将使用一个图像内容更加复杂的数据集Fashion-MNIST[2]。
我这里我们会使用torchvision包,它是服务于PyTorch深度学习框架的,主要用来构建计算机视觉模型。torchvision主要由以下几部分构成:
- torchvision.datasets: 一些加载数据的函数及常用的数据集接口;
- torchvision.models: 包含常用的模型结构(含预训练模型),例如AlexNet、VGG、ResNet等;
- torchvision.transforms: 常用的图片变换,例如裁剪、旋转等;
- torchvision.utils: 其他的一些有用的方法。
In [37]:
#import needed package
%matplotlib inline
from IPython import display
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import time
import sys
sys.path.append("/home/kesci/input")
import d2lzh1981 as d2l
print(torch.version)
print(torchvision.version)
1.3.0
0.4.1a0+d94043a
get dataset
In [38]:
mnist_train = torchvision.datasets.FashionMNIST(root=’/home/kesci/input/FashionMNIST2065’, train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root=’/home/kesci/input/FashionMNIST2065’, train=False, download=True, transform=transforms.ToTensor())
class torchvision.datasets.FashionMNIST(root, train=True, transform=None, target_transform=None, download=False)
• root(string)– 数据集的根目录,其中存放processed/training.pt和processed/test.pt文件。
• train(bool, 可选)– 如果设置为True,从training.pt创建数据集,否则从test.pt创建。
• download(bool, 可选)– 如果设置为True,从互联网下载数据并放到root文件夹下。如果root目录下已经存在数据,不会再次下载。
• transform(可被调用 , 可选)– 一种函数或变换,输入PIL图片,返回变换之后的数据。如:transforms.RandomCrop。
• target_transform(可被调用 , 可选)– 一种函数或变换,输入目标,进行变换。
In [39]:
#show result
print(type(mnist_train))
print(len(mnist_train), len(mnist_test))
<class ‘torchvision.datasets.mnist.FashionMNIST’>
60000 10000
In [40]:
#我们可以通过下标来访问任意一个样本
feature, label = mnist_train[0]
print(feature.shape, label) # Channel x Height x Width
torch.Size([1, 28, 28]) 9
如果不做变换输入的数据是图像,我们可以看一下图片的类型参数:
In [41]:
mnist_PIL = torchvision.datasets.FashionMNIST(root=’/home/kesci/input/FashionMNIST2065’, train=True, download=True)
PIL_feature, label = mnist_PIL[0]
print(PIL_feature)
<PIL.Image.Image image mode=L size=28x28 at 0x7F57E8736F28>
In [42]:
#本函数已保存在d2lzh包中方便以后使用
def get_fashion_mnist_labels(labels):
text_labels = [‘t-shirt’, ‘trouser’, ‘pullover’, ‘dress’, ‘coat’,
‘sandal’, ‘shirt’, ‘sneaker’, ‘bag’, ‘ankle boot’]
return [text_labels[int(i)] for i in labels]
In [43]:
def show_fashion_mnist(images, labels):
d2l.use_svg_display()
# 这里的_表示我们忽略(不使用)的变量
_, figs = plt.subplots(1, len(images), figsize=(12, 12))
for f, img, lbl in zip(figs, images, labels):
f.imshow(img.view((28, 28)).numpy())
f.set_title(lbl)
f.axes.get_xaxis().set_visible(False)
f.axes.get_yaxis().set_visible(False)
plt.show()
In [44]:
X, y = [], []
for i in range(10):
X.append(mnist_train[i][0]) # 将第i个feature加到X中
y.append(mnist_train[i][1]) # 将第i个label加到y中
show_fashion_mnist(X, get_fashion_mnist_labels(y))
In [45]:
#读取数据
batch_size = 256
num_workers = 4
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)
In [46]:
start = time.time()
for X, y in train_iter:
continue
print(’%.2f sec’ % (time.time() - start))
4.89 sec
softmax从零开始的实现
In [47]:
import torch
import torchvision
import numpy as np
import sys
sys.path.append("/home/kesci/input")
import d2lzh1981 as d2l
print(torch.version)
print(torchvision.version)
1.3.0
0.4.1a0+d94043a
获取训练集数据和测试集数据
In [48]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
模型参数初始化
In [49]:
num_inputs = 784
print(28*28)
num_outputs = 10
W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float)
b = torch.zeros(num_outputs, dtype=torch.float)
784
In [50]:
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)
Out[50]:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)
对多维Tensor按维度操作
In [51]:
X = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(X.sum(dim=0, keepdim=True)) # dim为0,按照相同的列求和,并在结果中保留列特征
print(X.sum(dim=1, keepdim=True)) # dim为1,按照相同的行求和,并在结果中保留行特征
print(X.sum(dim=0, keepdim=False)) # dim为0,按照相同的列求和,不在结果中保留列特征
print(X.sum(dim=1, keepdim=False)) # dim为1,按照相同的行求和,不在结果中保留行特征
tensor([[5, 7, 9]])
tensor([[ 6],
[15]])
tensor([5, 7, 9])
tensor([ 6, 15])
定义softmax操作
yj=exp(oj)∑3i=1exp(oi)yj=exp(oj)∑i=13exp(oi)
In [52]:
def softmax(X):
X_exp = X.exp()
partition = X_exp.sum(dim=1, keepdim=True)
# print("X size is ", X_exp.size())
# print("partition size is ", partition, partition.size())
return X_exp / partition # 这里应用了广播机制
In [53]:
X = torch.rand((2, 5))
X_prob = softmax(X)
print(X_prob, ‘\n’, X_prob.sum(dim=1))
tensor([[0.1927, 0.2009, 0.1823, 0.1887, 0.2355],
[0.1274, 0.1843, 0.2536, 0.2251, 0.2096]])
tensor([1., 1.])
softmax回归模型
o(i)y(i)=x(i)W+b,=softmax(o(i)).o(i)=x(i)W+b,y(i)=softmax(o(i)).
In [54]:
def net(X):
return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)
定义损失函数
H(y(i),y(i))=−∑j=1qy(i)jlogy(i)j,H(y(i),y(i))=−∑j=1qyj(i)logyj(i),
ℓ(Θ)=1n∑i=1nH(y(i),y(i)),ℓ(Θ)=1n∑i=1nH(y(i),y(i)),
ℓ(Θ)=−(1/n)∑i=1nlogy(i)y(i)ℓ(Θ)=−(1/n)∑i=1nlogyy(i)(i)
In [55]:
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = torch.LongTensor([0, 2])
y_hat.gather(1, y.view(-1, 1))
Out[55]:
tensor([[0.1000],
[0.5000]])
In [56]:
def cross_entropy(y_hat, y):
return - torch.log(y_hat.gather(1, y.view(-1, 1)))
定义准确率
我们模型训练完了进行模型预测的时候,会用到我们这里定义的准确率。
In [57]:
def accuracy(y_hat, y):
return (y_hat.argmax(dim=1) == y).float().mean().item()
In [58]:
print(accuracy(y_hat, y))
0.5
In [59]:
#本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进:它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
for X, y in data_iter:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n
In [60]:
print(evaluate_accuracy(test_iter, net))
0.1457
训练模型
In [61]:
num_epochs, lr = 5, 0.1
#本函数已保存在d2lzh_pytorch包中方便以后使用
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
params=None, lr=None, optimizer=None):
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y).sum()
# 梯度清零
if optimizer is not None:
optimizer.zero_grad()
elif params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
l.backward()
if optimizer is None:
d2l.sgd(params, lr, batch_size)
else:
optimizer.step()
train_l_sum += l.item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)
epoch 1, loss 0.7870, train acc 0.751, test acc 0.794
epoch 2, loss 0.5702, train acc 0.813, test acc 0.809
epoch 3, loss 0.5254, train acc 0.826, test acc 0.814
epoch 4, loss 0.5009, train acc 0.832, test acc 0.822
epoch 5, loss 0.4853, train acc 0.837, test acc 0.828
模型预测
现在我们的模型训练完了,可以进行一下预测,我们的这个模型训练的到底准确不准确。 现在就可以演示如何对图像进行分类了。给定一系列图像(第三行图像输出),我们比较一下它们的真实标签(第一行文本输出)和模型预测结果(第二行文本输出)。
In [62]:
X, y = iter(test_iter).next()
true_labels = d2l.get_fashion_mnist_labels(y.numpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles = [true + ‘\n’ + pred for true, pred in zip(true_labels, pred_labels)]
d2l.show_fashion_mnist(X[0:9], titles[0:9])
softmax的简洁实现
In [63]:
#加载各种包或者模块
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("/home/kesci/input")
import d2lzh1981 as d2l
print(torch.version)
1.3.0
初始化参数和获取数据
In [64]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
定义网络模型
In [65]:
num_inputs = 784
num_outputs = 10
class LinearNet(nn.Module):
def init(self, num_inputs, num_outputs):
super(LinearNet, self).init()
self.linear = nn.Linear(num_inputs, num_outputs)
def forward(self, x): # x 的形状: (batch, 1, 28, 28)
y = self.linear(x.view(x.shape[0], -1))
return y
#net = LinearNet(num_inputs, num_outputs)
class FlattenLayer(nn.Module):
def init(self):
super(FlattenLayer, self).init()
def forward(self, x): # x 的形状: (batch, *, *, …)
return x.view(x.shape[0], -1)
from collections import OrderedDict
net = nn.Sequential(
# FlattenLayer(),
# LinearNet(num_inputs, num_outputs)
OrderedDict([
(‘flatten’, FlattenLayer()),
(‘linear’, nn.Linear(num_inputs, num_outputs))]) # 或者写成我们自己定义的 LinearNet(num_inputs, num_outputs) 也可以
)
初始化模型参数
In [66]:
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)
Out[66]:
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)
定义损失函数
In [67]:
loss = nn.CrossEntropyLoss() # 下面是他的函数原型
#class torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction=‘mean’)
定义优化函数
In [68]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1) # 下面是函数原型
#class torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)
训练
In [69]:
num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
epoch 1, loss 0.0031, train acc 0.749, test acc 0.794
epoch 2, loss 0.0022, train acc 0.814, test acc 0.800
epoch 3, loss 0.0021, train acc 0.826, test acc 0.811
epoch 4, loss 0.0020, train acc 0.833, test acc 0.826
epoch 5, loss 0.0019, train acc 0.837, test acc 0.825
多层感知机pytorch实现
In [21]:
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("/home/kesci/input")
import d2lzh1981 as d2l
print(torch.version)
1.3.0
初始化模型和各个参数
In [22]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256
net = nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs, num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens, num_outputs),
)
for params in net.parameters():
init.normal_(params, mean=0, std=0.01)
训练
In [23]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size,root=’/home/kesci/input/FashionMNIST2065’)
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
epoch 1, loss 0.0031, train acc 0.701, test acc 0.774
epoch 2, loss 0.0019, train acc 0.821, test acc 0.806
epoch 3, loss 0.0017, train acc 0.841, test acc 0.805
epoch 4, loss 0.0015, train acc 0.855, test acc 0.834
epoch 5, loss 0.0014, train acc 0.866, test acc 0.840