深度学习实验（二）——使用AutoGrad

introversi0n

已于 2024-01-29 20:33:57 修改

阅读量473

点赞数

分类专栏： # 深度学习实验文章标签：深度学习 python

于 2022-12-03 22:37:30 首次发布

本文链接：https://blog.csdn.net/code_bro/article/details/128167665

版权

深度学习实验专栏收录该内容

5 篇文章

订阅专栏

深度学习实验二: 使用AutoGrad

name = '杨宇海'#填写你的姓名
sid = 'B02014152'#填写你的学号

print('姓名:%s, 学号:%s'%(name, sid))

import torch
import numpy as np
import matplotlib.pyplot as plt

1. 使用张量的自动求导计算一元函数的导数

请按以下要求，使用张量编写代码，计算sigmoid函数的导数，并绘制曲线图。

#在下面定义一个用于计算sigmoid的函数
def sigmoid(z):
    a = 1/(1+torch.exp(-z))     #请用一行代码实现
    return a

#测试sigmoid函数:
print(sigmoid(torch.tensor([-1,0,1],dtype = torch.float32)))

#在[-5,5]区间等间隔取100个数，得到一维张量x

x = torch.linspace(-5,5,100,dtype = torch.float32)#请用一行代码实现

print(x.shape)

x.requires_grad

#在下面设置张量x的requires_grad属性为True

x.requires_grad = True

x.grad is None

x.grad_fn is None

y = sigmoid(x)

y.grad_fn

plt.plot(x.detach().numpy(),y.detach().numpy(),'r-')
plt.show()

#在下面编写代码，计算y的元素和 
z =  y.sum() #请用一行代码实现


# print(z.item())

#使用Autograd，反向传播，得到x的梯度
#请用一行代码实现,提示:Tensor.backward()
z.backward(retain_graph=True) # 这里retain_graph动态图不释放，以便后面继续反向传播
# z.backward()

# x.grad.zero_()
x.grad

plt.plot(x.detach(), y.detach(),'r-')
plt.plot(x.detach(), x.grad.detach(),'k-')
plt.show()

#在下面尝试第二次执行反向传播，看看结果是什么
z.backward()
print(x.grad)
plt.plot(x.detach(), y.detach(),'r-')
plt.plot(x.detach(), x.grad.detach(),'k-')
plt.show()
# 发现导数在原有基础上翻倍，说明梯度累加

2. 使用torch.optim训练线性分类器

编写一个softmax分类器模型，并用torch.optim训练

下面的代码生成一个一组随机2维样本，包含了三个类别，每个类别有100个样本

x0 = torch.randn((2,100))*0.5 + torch.tensor([[0.5],
                                              [0.5]])
x1 = torch.randn_like(x0)*0.5 - 1
x2 = torch.randn_like(x0)*0.5 + torch.tensor([[-1.5],[1.5]])

x = torch.hstack((x0,x1,x2))
y = torch.hstack((torch.zeros(1,100), torch.zeros(1,100)+1, torch.zeros(1,100)+2)).squeeze()

plt.plot(x[0,y==0],x[1,y==0],'r*')
plt.plot(x[0,y==1],x[1,y==1],'bo')
plt.plot(x[0,y==2],x[1,y==2],'kd')
plt.axis('tight')
plt.axis('equal')
plt.show()

#定义Softmax函数
def softmax(z):
    ###假设z是一个d行n列的张量，每一列表示一个样本
    #第一步，对每一列减去该列的最大值,提示:Tensor.max(...).values
    z = z - z.max(dim=0)[0]  #请用一行代码实现
    #第二步，计算指数变换
    a = z.exp()  #请用一行代码实现
    #第三步，归一化每一列为一个概率分布
    a = a/a.sum(dim=0)   #请用一行代码实现
    return a

#测试softmax函数
dummy = torch.rand((3,10),dtype = torch.float32)
softmax(dummy)

import torch.optim as optim

#定义Softmax分类器模型,用一个类表示
class SoftmaxClassifier:
    def __init__(self, in_features, out_features):#编写构造函数，记录输入特征和输出特征的维度，并初始化参数
        self.in_features = in_features
        self.out_features = out_features
        self.__initialize__()
                
    def __initialize__(self):#编写参数初始化函数，初始化权重矩阵和偏置向量
        # 初始化为0或者小随机值
        self.W = torch.zeros([self.out_features,self.in_features], requires_grad=True)
        # self.W = torch.zeros([self.out_features,self.in_features])
        self.b = torch.zeros([self.out_features,1], requires_grad=True)
        stp = self.W+self.b
        # self.b = torch.zeros([self.out_features,1])

    def predict(self, X):#编写预测函数，调用前面定义的softmax函数
        print("W.size():", self.W.size())
        print("X.size():", X.size())
        # z = (self.W)@X.cuda() + self.b#请用一行代码实现
        z = (self.W)@X + self.b#请用一行代码实现
        a = softmax(z)#请用一行代码实现
        return a
    
    def fit(self, X,Y, lr = 0.1):#编写训练函数
        #第一步，初始化参数
        #请用一行代码实现
        self.__initialize__()
        
        #将Label Y转换为one-hot vector Y1
        # Y1 = torch.zeros((self.out_features,X.size(1))).cuda()
        Y1 = torch.zeros((self.out_features,X.size(1)))
        Y1[Y,torch.arange(X.size(1))] = 1


        l0 = 0
        epsilon = 1e-6
        iter = 0
        
        #第二步，定义优化器为SGD
        #请用一行代码实现
        optimizer = optim.SGD(params=[self.W, self.b],lr = lr)
        while True:
            #1. 将参数的梯度置为0，提示zero_grad(),请用一行代码实现
            optimizer.zero_grad()
    
            #2. FP:对训练样本做出预测
            rho = self.predict(X)#请用一行代码实现
            
            #3.计算softmax loss
            l = -torch.log(rho[Y1 == 1]).mean()#请用一行代码实现  
            print(rho.size())
            print(Y1.size())
            print(rho[Y1==1].size())


            l1 = l.detach().item()
            # print(l1)
            
            print('iter = %d, loss = %f\n'%(iter,l1))
            iter = iter + 1
            
            if abs(l1-l0)<epsilon:
                break
            l0 = l1
            
            #4.BP:更新梯度,请用一行代码实现
            l.backward()
            
            #5. GD: 更新权重,请用一行代码实现
            optimizer.step()

#测试SoftmaxClassifier
cls = SoftmaxClassifier(2,3)
cls.predict(x[:,:10])

#训练模型

cls = SoftmaxClassifier(2,3)
y = y.to(torch.int64)
cls.fit(x,y)

#评估训练结果
#应该达到95%以上准确率
with torch.no_grad():
    rho = cls.predict(x)
    print(rho.size())
    yhat = rho.argmax(dim = 0)
    print(yhat.size())
    acc = (yhat==y).to(torch.float32).mean().item()

    print('accuracy = ',acc)

3. （选做）用上面定义的softmax对MNIST手写体分类

import torch
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets,transforms

data_path = '../data/'
mnist_train = datasets.MNIST(data_path,download=True,train = True,transform = transforms.ToTensor())
mnist_test =  datasets.MNIST(data_path,download=True,train = False,transform = transforms.ToTensor())

import torch
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size = 32, shuffle = True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size = 32, shuffle = True)

import torch.optim as optim

#定义Softmax函数
def softmax(z):
    ###假设z是一个d行n列的张量，每一列表示一个样本
    #第一步，对每一列减去该列的最大值,提示:Tensor.max(...).values
    z = z - z.max(dim=0)[0]  #请用一行代码实现
    #第二步，计算指数变换
    a = z.exp()  #请用一行代码实现
    #第三步，归一化每一列为一个概率分布
    a = a/a.sum(dim=0)   #请用一行代码实现
    return a

# 定义Softmax分类器模型,用一个类表示
class SoftmaxClassifier:
    def __init__(self, in_features, out_features):# 编写构造函数，记录输入特征和输出特征的维度，并初始化参数
        self.in_features = in_features
        self.out_features = out_features
        self.__initialize__()
                
    def __initialize__(self):# 编写参数初始化函数，初始化权重矩阵和偏置向量
        # 初始化为0或者小随机值
        self.W = torch.zeros([self.out_features,self.in_features], requires_grad=True)
        # self.W = torch.zeros([self.out_features,self.in_features])
        self.b = torch.zeros([self.out_features,1], requires_grad=True)
        stp = self.W+self.b
        # self.b = torch.zeros([self.out_features,1])
        self.l0 = 0

    def predict(self, X):# 编写预测函数，调用前面定义的softmax函数
        # print("W.size():", self.W.size())
        # print("X.size():", X.size())
        # z = (self.W)@X.cuda() + self.b#请用一行代码实现
        z = (self.W)@X + self.b#请用一行代码实现
        a = softmax(z)#请用一行代码实现
        return a
    
    def fit(self, X,Y, lr = 0.1):# 编写训练函数
        # 第一步，初始化参数
        # 请用一行代码实现
        # self.__initialize__()
        
        # 将Label Y转换为one-hot vector Y1
        # Y1 = torch.zeros((self.out_features,X.size(1))).cuda()
        Y1 = torch.zeros((self.out_features,X.size(1)))
        Y1[Y,torch.arange(X.size(1))] = 1


        # self.l0 = 0
        epsilon = 1e-6
        iter = 0
        
        #第二步，定义优化器为SGD
        #请用一行代码实现
        optimizer = optim.SGD(params=[self.W, self.b],lr = lr)
        while True:
            #1. 将参数的梯度置为0，提示zero_grad(),请用一行代码实现
            optimizer.zero_grad()
    
            #2. FP:对训练样本做出预测
            rho = self.predict(X)#请用一行代码实现
            
            #3.计算softmax loss
            l = -torch.log(rho[Y1 == 1]).mean()#请用一行代码实现  
            # print(rho.size())
            # print(Y1.size())
            # print(rho[Y1==1].size())


            l1 = l.detach().item()
            # print(l1)
            
            # print('iter = %d, loss = %f\n'%(iter,l1))
            iter = iter + 1
            
            if abs(l1-self.l0)<epsilon:
                break
            self.l0 = l1
            
            #4.BP:更新梯度,请用一行代码实现
            l.backward()
            
            #5. GD: 更新权重,请用一行代码实现
            optimizer.step()

# 初始化模型
cls = SoftmaxClassifier(784,10)
max_batch_idx = 50
# 训练模型
def train():
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, label = data

        inputs = inputs.reshape([32, 784]).permute(1,0)
        label = label.to(torch.int64)

        cls.fit(inputs, label)
        print('batch_idx = %d, loss = %f'%(batch_idx,cls.l0))
        # 训练max_batch_idx个batch就退出
        if batch_idx == max_batch_idx:
            break
train()

def test():
    correct = 0 # 正确分类样本
    total = 0 # 总样本
    with torch.no_grad(): 
        # 这里之后的计算都不会更新模型参数，仅用于模型测试
        for batch_idx, data in enumerate(test_loader, 0):
            inputs, label = data
            batch_size_input = int(inputs.numel()/784)
            inputs = inputs.reshape([batch_size_input, 784]).permute(1,0)
            label = label.to(torch.int64)
            _,predicted = torch.max(cls.predict(inputs), dim=0) # 预测结果
            total += label.size(0) # 累计样本数，添加到计数器total
            correct += (predicted == label).sum().item() # 累计预测正确的样本数 
    print('accuracy on test set: %d %% ' % (100*correct/total)) # 打印正确样本和总样本的商

test()
# max_batch_idx = 10  ===>  81% 
# max_batch_idx = 20  ===>  82% 
# max_batch_idx = 30  ===>  81% 
# max_batch_idx = 50  ===>  86% 
# max_batch_idx = 100  ===>  83%

# 找出第index个图片作展示

# 设置展板尺寸
fig = plt.figure(figsize=(5,5))
# 展示图片1
ax1 = fig.add_subplot(141)
ax1.set_title(torch.max(cls.predict(mnist_train[0][0].reshape([784,1])),dim=0)[1].item())
plt.imshow(transforms.ToPILImage()(mnist_train[0][0]))
# 展示图片2
ax2 = fig.add_subplot(142)
ax2.set_title(torch.max(cls.predict(mnist_train[10][0].reshape([784,1])),dim=0)[1].item())
plt.imshow(transforms.ToPILImage()(mnist_train[10][0]))
# 展示图片3
ax3 = fig.add_subplot(143)
ax3.set_title(torch.max(cls.predict(mnist_train[20][0].reshape([784,1])),dim=0)[1].item())
plt.imshow(transforms.ToPILImage()(mnist_train[20][0]))
# 展示图片4
ax4 = fig.add_subplot(144)
ax4.set_title(torch.max(cls.predict(mnist_train[30][0].reshape([784,1])),dim=0)[1].item())
plt.imshow(transforms.ToPILImage()(mnist_train[30][0]))