pytorch学习三、softmax回归

来自于 https://tangshusen.me/Dive-into-DL-PyTorch/#/

官方文档 https://pytorch.org/docs/stable/tensors.html

softmax回归

Fashion-MNIST[2]

获取数据集

import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import time
import sys
sys.path.append("..") # 为了导入上层目录的d2lzh_pytorch
import d2lzh_pytorch as d2l
mnist_train = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=False, download=True, transform=transforms.ToTensor())
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw\train-images-idx3-ubyte.gz


100.0%

Extracting C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw\train-images-idx3-ubyte.gz to C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw\train-labels-idx1-ubyte.gz


111.0%

Extracting C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw\train-labels-idx1-ubyte.gz to C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100.0%

Extracting C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


159.1%

Extracting C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to C:\Users\jiame/Datasets/FashionMNIST\FashionMNIST\raw
Processing...
Done!
print(type(mnist_train))
print(len(mnist_train),len(mnist_test))
<class 'torchvision.datasets.mnist.FashionMNIST'>
60000 10000
feature,label = mnist_train[0]
print(feature.shape,label)
torch.Size([1, 28, 28]) 9
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]
def show_fashion_mnist(images, labels):
    display.set_matplotlib_formats('svg')
    # 这里的_表示我们忽略(不使用)的变量
    _, figs = plt.subplots(1, len(images), figsize=(12, 12))
    for f, img, lbl in zip(figs, images, labels):
        f.imshow(img.view((28, 28)).numpy())
        f.set_title(lbl)
        f.axes.get_xaxis().set_visible(False)
        f.axes.get_yaxis().set_visible(False)
    plt.show()
x,y=[],[]
for i in range(10):
    x.append(mnist_train[i][0])
    y.append(mnist_test[i][1])
show_fashion_mnist(x,get_fashion_mnist_labels(y))
---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-16-d71304a2dd17> in <module>()
      3     x.append(mnist_train[i][0])
      4     y.append(mnist_test[i][1])
----> 5 show_fashion_mnist(x,get_fashion_mnist_labels(y))


<ipython-input-15-0a5e5ef3ca34> in show_fashion_mnist(images, labels)
      4     return [text_labels[int(i)] for i in labels]
      5 def show_fashion_mnist(images, labels):
----> 6     display.set_matplotlib_formats('svg')
      7     # 这里的_表示我们忽略(不使用)的变量
      8     _, figs = plt.subplots(1, len(images), figsize=(12, 12))


AttributeError: 'function' object has no attribute 'set_matplotlib_formats'
batch_size=256
if sys.platform.startswith('win'):
    num_workers=0
else:
    num_workers = 4
train_iter = torch.utils.data.DataLoader(mnist_train,batch_size,shuffle=True,num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test,batch_size,shuffle=False,num_workers=num_workers)

start = time.time()
for x,y in train_iter:
    continue
print('%.2f sec'%(time.time()-start))
8.49 sec

初始化模型参数

import numpy as np
num_inputs=784
num_outputs = 10
W = torch.tensor(np.random.normal(0,0.01,(num_inputs,num_outputs)),dtype=torch.float)
b = torch.zeros(num_outputs,dtype=torch.float)

W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

实现softmax运算

x=torch.tensor([[1,2,3],[4,5,6]])
print(x.sum(dim=0,keepdim=True))
print(x.sum(dim=1,keepdim=True))

tensor([[5, 7, 9]])
tensor([[ 6],
        [15]])
def softmax(X):
    X_exp=X.exp()
    partition = X_exp.sum(dim=1,keepdim=True)
    return X_exp/partition

x=torch.rand((2,5))
x_prob=softmax(x)
print(x_prob,x_prob.sum(dim=1))
tensor([[0.2245, 0.1597, 0.2563, 0.2303, 0.1293],
        [0.2678, 0.2187, 0.1224, 0.1078, 0.2834]]) tensor([1., 1.])

定义模型

def net(x):
    return softmax(torch.mm(x.view((-1,num_inputs)),W)+b)

损失函数

y_hat = torch.tensor([[0.1,0.3,0.6],[0.3,0.2,0.5]])
y=torch.LongTensor([0,2])
y_hat.gather(1,y.view(-1,1))
tensor([[0.1000],
        [0.5000]])
def cross_entropy(y_hat,y):
    return -torch.log(y_hat.gather(1,y.view(-1,1)))

分类准确率

def accuracy(y_hat, y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n
print(evaluate_accuracy(test_iter, net))
0.0791

训练模型

def sgd(params, lr, batch_size):  # 本函数已保存在d2lzh_pytorch包中方便以后使用
    for param in params:
        param.data -= lr * param.grad / batch_size # 注意这里更改param时用的param.data
num_epochs,lr=5,0.1
def train_ch3(net,trian_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n=0.0,0.0,0
        for x,y in train_iter:
            y_hat=net(x)
            y_hat=net(x)
            l=loss(y_hat,y).sum()
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward()
            if optimizer is None:
                sgd(params,lr,batch_size)
            else:
                optimizer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
          % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net,train_iter,test_iter,cross_entropy,num_epochs,batch_size,[W,b],lr)
    
epoch 1, loss 0.7859, train acc 0.749, test acc 0.787
epoch 2, loss 0.5697, train acc 0.813, test acc 0.811
epoch 3, loss 0.5243, train acc 0.826, test acc 0.820
epoch 4, loss 0.5018, train acc 0.833, test acc 0.821
epoch 5, loss 0.4851, train acc 0.838, test acc 0.827

二、简介实现

from torch import nn
from torch.nn import init
num_inputs=784
num_outputs=10

class LinearNet(nn.Module):
    def __init__(self,num_inputs,num_outputs):
        super(LinearNet,self).__init__()
        self.linear=nn.Linear(num_inputs,num_outputs)
    def forward(self,x):
        y=self.linear(x.view(x.shape[0],-1))
        return y
net=LinearNet(num_inputs,num_outputs)
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer,self).__init__()
    def forward(self,x):
        return x.view(x.shape[0],-1)

模型

from collections import OrderedDict
net = nn.Sequential(
    OrderedDict([
        ('flatten',FlattenLayer()),
        ('linear',nn.Linear(num_inputs,num_outputs))
    ])
    
)
# 初始化
init.normal_(net.linear.weight,mean=0,std=0.01)
init.constant_(net.linear.bias,val=0)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr=0.1)
num_epochs,lr=5,0.1
def train_ch3(net,trian_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n=0.0,0.0,0
        for x,y in train_iter:
            y_hat=net(x)
            y_hat=net(x)
            l=loss(y_hat,y).sum()
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward()
            if optimizer is None:
                sgd(params,lr,batch_size)
            else:
                optimizer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
          % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,None,None,optimizer)
epoch 1, loss 0.0031, train acc 0.749, test acc 0.791
epoch 2, loss 0.0022, train acc 0.813, test acc 0.813
epoch 3, loss 0.0021, train acc 0.826, test acc 0.818
epoch 4, loss 0.0020, train acc 0.832, test acc 0.824
epoch 5, loss 0.0019, train acc 0.837, test acc 0.814
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值