Python随机梯度下降法（四）【完结篇】

最新推荐文章于 2024-04-09 14:12:11 发布

寅恪光潜

最新推荐文章于 2024-04-09 14:12:11 发布

阅读量1.8k

点赞数 4

分类专栏： Python 文章标签： python 机器学习神经网络随机梯度下降深度学习

本文链接：https://blog.csdn.net/weixin_41896770/article/details/120264473

版权

Python 专栏收录该内容

120 篇文章 33 订阅

订阅专栏

有了前面知识的铺垫，现在来做一个总结，利用随机梯度下降法来实现MNIST数据集的手写识别，关于MNIST的详细介绍，可以参考我的前面两篇文章 MNIST数据集手写数字识别（一），MNIST数据集手写数字识别（二），详细介绍了这个数据集的应用。

在下面代码中，导入模块的时候需要用到two_layer_net.py，common目录里的functions.py里面的sigmoid、softmax、cross_entropy_error以及gradient.py里面的numerical_gradient等函数。我将这些函数统一放在文章最后，方便查阅。

先来实现简单的神经网络求梯度

import sys,os
import numpy as np
os.chdir('D:\Anaconda3\TONYTEST')
sys.path.append('D:\Anaconda3\TONYTEST')
from common.functions import *
from common.gradient import numerical_gradient
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

class simpleNet:
    def __init__(self):
        #生成2行3列的高斯分布随机数据初始化权重参数
        self.W=np.random.randn(2,3)
    def predict(self,x):
        return np.dot(x,self.W)
    def loss(self,x,t):
        z=self.predict(x)
        y=softmax(z)
        loss=cross_entropy_error(y,t)
        return loss
net=simpleNet()
x=np.array([0.6,0.9])
p=net.predict(x)
print(p)
[ 0.92817761  0.710316    0.03639342]
t=np.array([1,0,0])
net.loss(x,t)#0.79487308372513821

f=lambda w:net.loss(x,t)
dW=numerical_gradient(f,net.W)
print(dW)
[[-0.32901686  0.21793449  0.11108236]
 [-0.49352528  0.32690174  0.16662355]]

其中f=lambda w:net.loss(x,t)的匿名函数（简便）的完整写法如下：
def f(W):
return net.loss(x,t)

x=np.random.rand(100,784)
t=np.random.rand(100,10)
grads=net.numerical_gradient(x,t)
grads['W1'].shape#(784, 100)
grads['b1'].shape#(100,)
grads['W2'].shape#(100, 10)
grads['b2'].shape#(10,)

求出了神经网络的梯度，那么接下来只需要根据梯度法，不断更新权重和偏置的参数即可。
现在我们来加载前面的MNIST数据集，通过迭代自动更新权重和偏置参数达到最优化，并查看精度的变化情况，计算精度不需要for循环的每一个都计算，每个epoch（多少次mini-batch的更新）查看下是否在顺利学习即可。

(x_test,t_test)=load_mnist(normalize=True,one_hot_label=True)
train_loss_list=[]
itersNum=10000
trainSize=x_train.shape[0]
batchSize=100
lr=0.1
#监督数据和测试数据的精度列表
train_acc_list=[]
test_acc_list=[]
epochNum=max(trainSize/batchSize,1)#600

network=TwoLayerNet(inputSize=784,hiddenSize=50,outputSize=10)
#梯度法更新10000次
for i in range(itersNum):
    #随机从60000个中取出100个
    batchMask=np.random.choice(trainSize,batchSize)#(100,)
    x_batch=x_train[batchMask]#(100,784)
    t_batch=t_train[batchMask]
    #grads=network.numerical_gradient(x_batch,t_batch)
    grads=network.gradient(x_batch,t_batch)#高速版
    #更新权重和偏置参数
    for k in ('W1','b1','W2','b2'):
        network.params[k]-=lr*grads[k]
    #记录学习过程
    loss=network.loss(x_batch,t_batch)
    train_loss_list.append(loss)
    #计算每个epoch的精度即可
    if i%epochNum==0:
        train_acc=network.accuracy(x_train,t_train)
        test_acc=network.accuracy(x_test,t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc,test_acc,end=' | ')

这个过程~~很耗时很占CPU~~，我电脑CPU一直处于97%左右，所以一般都使用下面介绍的误差反向传播法来求梯度，速度非常之快。结果可以看到监督数据和测试数据的精度都是一直在增长，10000次的循环有94%左右的精度，10万次有99.3%以上的精度。
我们也可以通过画图来更直观的查看精度的变化（循环10万次之后做的图形）

import matplotlib.pyplot as plt
x=np.arange(len(train_acc_list))
plt.plot(x,train_acc_list,'r--',label='Train Accuracy')
plt.plot(x,test_acc_list,'b',label='Test Accuracy')
plt.legend()
plt.show()

functions.py

import numpy as np
def sigmoid(x):
    '''
	激活函数（决定如何来激活信号的总和）
	'''
    return 1 / (1 + np.exp(-x))
def sigmoid_grad(x):
    return (1.0 - sigmoid(x)) * sigmoid(x)
def softmax(x):
    '''
	分类函数，输出是0.0到1.0之间的实数，且输出值的总和为1
	所以也把输出直接叫做概率
	'''
    if x.ndim == 2:
        x = x.T#矩阵转置
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x) # 溢出对策
    return np.exp(x) / np.sum(np.exp(x))
	
def cross_entropy_error(y, t):
    '''
	交叉熵误差(损失函数)
	损失函数的目标就是获得使识别精度尽可能高的神经网络
	'''
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    # 监督数据是one-hot-vector的情况下，转换为正确解标签的索引
    if t.size == y.size:
        t = t.argmax(axis=1)
             
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

gradient.py

# coding: utf-8
import numpy as np

def _numerical_gradient_1d(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 还原值
        
    return grad


def numerical_gradient_2d(f, X):
    if X.ndim == 1:
        return _numerical_gradient_1d(f, X)
    else:
        grad = np.zeros_like(X)
        
        for idx, x in enumerate(X):
            grad[idx] = _numerical_gradient_1d(f, x)
        
        return grad


def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 还原值
        it.iternext()   
        
    return grad

two_layer_net.py

import sys, os
sys.path.append(os.pardir)
from common.functions import *
from common.gradient import numerical_gradient
class TwoLayerNet:
    def __init__(self,inputSize,hiddenSize,outputSize,weightStd=0.01):
        self.params={}#用来保存神经网络全部参数
        self.params['W1']=weightStd*np.random.randn(inputSize,hiddenSize)#权重参数高斯分布
        self.params['b1']=np.zeros(hiddenSize)#偏置按照隐藏层大小置0
        self.params['W2']=weightStd*np.random.randn(hiddenSize,outputSize)
        self.params['b2']=np.zeros(outputSize)

    def predict(self,x):
        W1,W2=self.params['W1'],self.params['W2']
        b1,b2=self.params['b1'],self.params['b2']
        a1=np.dot(x,W1)+b1
        z1=sigmoid(a1)
        a2=np.dot(z1,W2)+b2
        y=softmax(a2)
        return y
    
    def loss(self,x,t):
        '''
        根据predict()结果和正确标签t,计算交叉熵误差
        '''
        y=self.predict(x)
        return cross_entropy_error(y,t)

    def accuracy(self,x,t):
        y=self.predict(x)
        y=np.argmax(y,axis=1)
        t=np.argmax(t,axis=1)
        accuracy=np.sum(y==t)/float(x.shape[0])
        return accuracy

    def numerical_gradient(self,x,t):
        lossW=lambda W:self.loss(x,t)
        grads={}
        grads['W1']=numerical_gradient(lossW,self.params['W1'])
        grads['b1']=numerical_gradient(lossW,self.params['b1'])
        grads['W2']=numerical_gradient(lossW,self.params['W2'])
        grads['b2']=numerical_gradient(lossW,self.params['b2'])
        return grads

    def gradient(self, x, t):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        
        batch_num = x.shape[0]
        
        # forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        # backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        
        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads

寅恪光潜

关注

4
点赞
踩
13

收藏

觉得还不错? 一键收藏
打赏
1
评论
Python随机梯度下降法（四）【完结篇】

有了前面知识的铺垫，现在就做一个总结，利用随机下降梯度法来实现MNIST数据集的手写识别，关于MNIST的详细介绍，可以参考我的前面两篇文章 MNIST数据集手写数字识别（一）MNIST数据集手写数字识别（二），详细介绍了这个数据集的应用。在下面代码中，导入模块的时候需要用到common目录里的functions.py里面的sigmoid、softmax、cross_entropy_error以及gradient.py里面的numerical_gradient函数，two_layer_net.py。我将
复制链接

扫一扫