吴恩达机器学习作业Python3实现(四):神经网络及其反向传播

欢迎关注我的 微信公众号:破壳Ai,分享最佳学习路径、教程和资源。成长路上,有我陪你。

本次作业代码全部采用numpy的array数组来进行计算,不采用矩阵
两者在乘法和数量积的运算上采用的符号不同
最好不要混用,开始之前先定下代码里用哪一种

神经网络 Neural Networks

数据可视化 Visualizing the data

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat 
data = loadmat('ex4data1.mat')
data
{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}
data['X'].shape, data['y'].shape
((5000, 400), (5000, 1))
# 随机挑选100个图像
sample_idx = np.random.choice(np.arange(data['X'].shape[0]), 100)
sample_images = data['X'][sample_idx, :]
sample_images.shape
(100, 400)
# 展示图像
fig, ax = plt.subplots(10, 10, sharey=True, sharex=True, figsize=(10, 10))
for r in range(10):
    for c in range(10):
        ax[r, c].matshow(np.array(sample_images[10 * r + c].reshape(20, 20)).T, cmap=matplotlib.cm.binary)
        plt.xticks(np.array([]))
        plt.yticks(np.array([])) 

在这里插入图片描述

如果文中公式不可见,可以去我GitHub上查看

处理训练数据

标签重编码

首先我们要将标签值(1,2,3,4,…,10)转化成非线性相关的向量,向量对应位置(y[i-1])上的值等于1,
例如y[0]=6转化为y[0]=[0,0,0,0,0,1,0,0,0,0]。

def expend_y(y):
    result = []
    # 将y的每个元素转换为一个向量,标签值的位置为1,其余为0
    for label in y:
        y_array = np.zeros(10)
        y_array[label-1] = 1
        result.append(y_array)
    return np.array(result)

'''
也可以利用sklearn的编码函数

from sklearn.preprocessing import OneHotEncoder
    encoder = OneHotEncoder(sparse=False)
    y_onehot = encoder.fit_transform(y)
    y_onehot.shape 
'''
'\n也可以利用sklearn的编码函数\n\nfrom sklearn.preprocessing import OneHotEncoder\n    encoder = OneHotEncoder(sparse=False)\n    y_onehot = encoder.fit_transform(y)\n    y_onehot.shape \n'

验证y

y1 = np.arange(1, 11, 1).reshape((10, 1))
y1
array([[ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10]])
y2 = expend_y(y1)
y2.shape
(10, 10)
y2
array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

获取训练数据集

X = np.insert(data['X'], 0, np.ones(len(data['X'])), axis=1)
raw_y = data['y']
y = expend_y(raw_y)

X.shape, y.shape
((5000, 401), (5000, 10))

读取已训练好的权重

weight = loadmat('ex4weights.mat')
weight
{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Tue Oct 18 14:57:02 2011',
 '__version__': '1.0',
 '__globals__': [],
 'Theta1': array([[-2.25623899e-02, -1.05624163e-08,  2.19414684e-09, ...,
         -1.30529929e-05, -5.04175101e-06,  2.80464449e-09],
        [-9.83811294e-02,  7.66168682e-09, -9.75873689e-09, ...,
         -5.60134007e-05,  2.00940969e-07,  3.54422854e-09],
        [ 1.16156052e-01, -8.77654466e-09,  8.16037764e-09, ...,
         -1.20951657e-04, -2.33669661e-06, -7.50668099e-09],
        ...,
        [-1.83220638e-01, -8.89272060e-09, -9.81968100e-09, ...,
          2.35311186e-05, -3.25484493e-06,  9.02499060e-09],
        [-7.02096331e-01,  3.05178374e-10,  2.56061008e-09, ...,
         -8.61759744e-04,  9.43449909e-05,  3.83761998e-09],
        [-3.50933229e-01,  8.85876862e-09, -6.57515140e-10, ...,
         -1.80365926e-06, -8.14464807e-06,  8.79454531e-09]]),
 'Theta2': array([[-0.76100352, -1.21244498, -0.10187131, -2.36850085, -1.05778129,
         -2.20823629,  0.56383834,  1.21105294,  2.21030997,  0.44456156,
         -1.18244872,  1.04289112, -1.60558756,  1.30419943,  1.37175046,
          1.74825095, -0.23365648, -1.52014483,  1.15324176,  0.10368082,
         -0.37207719, -0.61530019, -0.1256836 , -2.27193038, -0.71836208,
         -1.29690315],
        [-0.61785176,  0.61559207, -1.26550639,  1.85745418, -0.91853319,
         -0.05502589, -0.38589806,  1.29520853, -1.56843297, -0.97026419,
         -2.18334895, -2.85033578, -2.07733086,  1.63163164,  0.3490229 ,
          1.82789117, -2.44174379, -0.8563034 , -0.2982564 , -2.07947873,
         -1.2933238 ,  0.89982032,  0.28306578,  2.31180525, -2.46444086,
          1.45656548],
        [-0.68934072, -1.94538151,  2.01360618, -3.12316188, -0.2361763 ,
          1.38680947,  0.90982429, -1.54774416, -0.79830896, -0.65599834,
          0.7353833 , -2.58593294,  0.47210839,  0.55349499,  2.51255453,
         -2.4167454 , -1.63898627,  1.2027302 , -1.20245851, -1.83445959,
         -1.88013027, -0.34056098,  0.23692483, -1.06137919,  1.02759232,
         -0.47690832],
        [-0.67832479,  0.46299226,  0.58492321, -0.1650184 ,  1.93264192,
         -0.22965765, -1.84731492,  0.49011768,  1.07146054, -3.31905643,
          1.54113507,  0.37371947, -0.86484681, -2.58273522,  0.97062447,
         -0.51021867, -0.68427897, -1.64713607,  0.21153145, -0.27422442,
          1.72599755,  1.32418658, -2.63984479, -0.08055871, -2.03510803,
         -1.46123776],
        [-0.59664339, -2.04481799,  2.05698407,  1.95100909,  0.17637699,
         -2.16141218, -0.40394736,  1.80157532, -1.56278739, -0.25253004,
          0.23586497,  0.71656699,  1.07689092, -0.35457279, -1.67743058,
         -0.12939255, -0.67488849,  1.14066535,  1.32431237,  3.21158484,
         -2.15888898, -2.60164082, -3.2226466 , -1.89612906, -0.87488068,
          2.51038628],
        [-0.87794907,  0.4344112 , -0.93161049,  0.18390778, -0.36078216,
          0.61958137,  0.38624948, -2.65150343,  2.29710773, -2.08818098,
         -1.86382323,  1.06057836,  0.77562146,  2.1346861 , -1.14973702,
         -0.52081426,  0.99743429, -1.48309353, -2.3139424 ,  0.29517333,
         -0.38704879, -2.20607697,  0.30702191, -1.17646114, -1.63462966,
         -0.82467661],
        [-0.52746527,  1.21564288, -1.50095981, -2.03195359, -1.52366734,
         -2.43732079, -2.37570311, -1.39987277, -0.88735315, -0.63278873,
          1.50450176, -1.580763  ,  0.58599217, -0.77540416,  0.94257331,
          2.10919653,  0.54479132,  0.43773612, -1.28024228, -0.04360994,
          1.4774997 , -1.13276949, -0.72846904,  0.04734716,  1.6574566 ,
          1.68540944],
        [-0.7490154 , -0.72249056, -3.15228173,  0.36577778,  0.19811362,
         -0.73059946,  1.65263918, -2.300357  , -1.87468162,  0.98095387,
         -1.58825159,  1.35434142,  2.17895331, -1.99239762, -2.00371362,
         -0.388613  , -2.33992976, -2.91719062,  0.99398645, -2.70476768,
         -1.27139772,  1.86091461, -1.20519404, -0.38014194,  0.7087181 ,
         -2.11014003],
        [-0.6665468 ,  0.53601845,  1.30307573, -1.03372714, -4.03084753,
          0.58173469, -2.65717902,  0.80379994, -1.09241928,  2.49910058,
          0.362008  ,  0.66195337, -0.92160534, -0.83123666, -2.00200952,
         -2.94897501,  0.64564202, -1.10114694,  0.74510309,  0.58506717,
         -1.99545251,  0.62591105,  1.80596103, -0.22309315, -1.40442136,
         -2.1319153 ],
        [-0.46089119, -1.43944954, -1.21809509,  0.71093011,  0.45216919,
         -0.35953381,  0.62284954, -0.67005297, -0.7069138 ,  0.06311351,
         -1.23199074, -1.74645233, -2.71960897, -2.21437178, -1.69307505,
         -0.90927394,  0.87852311,  1.18664814, -1.87041262,  0.39796295,
          1.72113872, -1.36934055,  0.8580668 , -0.24779579,  1.28009118,
         -1.32752042]])}
theta1 = weight['Theta1']
theta2 = weight['Theta2']

展开/合并参数

当我们使用高级优化方法来优化神经网络时,我们需要将多个参数矩阵展开,才能传入优化函数,然后再恢复形状。

def serialize(a, b):
    return np.concatenate((np.ravel(a), np.ravel(b)))
a = np.arange(10)
b = np.ones(5)
c = serialize(a, b)
c
array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 1., 1., 1., 1., 1.])
def deserialize(seq):
    return seq[:25*401].reshape(25, 401), seq[25*401:].reshape(10, 26)

模型表示 Model representation

我们的网络有三层,输入层,隐藏层,输出层。我们的输入是数字图像的像素值,因为每个数字的图像大小为20*20,所以我们输入层有400个单元(这里不包括总是输出要加一个偏置单元)。
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-TowVU94W-1610838114454)(attachment:image.png)]

如果文中公式不可见,可以去我GitHub上查看

正向传播 Feedforward

# sigmoid函数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))
# 前向传播
def forward_propagation(theta, X): # theta,X都是array数组类型,非矩阵
    t1, t2 = deserialize(theta)
    
    a1 = X # 5000*401
    
    z2 = a1 @ t1.T # 5000*25
    a2 = sigmoid(z2)
    a2 = np.insert(a2, 0, np.ones(len(a2)), axis=1) # 5000*26
    
    z3 = a2 @ t2.T # 5000*10
    a3 = sigmoid(z3)
    h = a3  # 5000*10
    
    return a1, z2, a2, z3, h
theta = serialize(theta1, theta2)
a1, z2, a2, z3, h = forward_propagation(theta, X)
a1.shape, z2.shape, a2.shape, z3.shape, h.shape
((5000, 401), (5000, 25), (5000, 26), (5000, 10), (5000, 10))
forward_propagation(theta, X)[-1]
array([[1.12661530e-04, 1.74127856e-03, 2.52696959e-03, ...,
        4.01468105e-04, 6.48072305e-03, 9.95734012e-01],
       [4.79026796e-04, 2.41495958e-03, 3.44755685e-03, ...,
        2.39107046e-03, 1.97025086e-03, 9.95696931e-01],
       [8.85702310e-05, 3.24266731e-03, 2.55419797e-02, ...,
        6.22892325e-02, 5.49803551e-03, 9.28008397e-01],
       ...,
       [5.17641791e-02, 3.81715020e-03, 2.96297510e-02, ...,
        2.15667361e-03, 6.49826950e-01, 2.42384687e-05],
       [8.30631310e-04, 6.22003774e-04, 3.14518512e-04, ...,
        1.19366192e-02, 9.71410499e-01, 2.06173648e-04],
       [4.81465717e-05, 4.58821829e-04, 2.15146201e-05, ...,
        5.73434571e-03, 6.96288990e-01, 8.18576980e-02]])

代价函数 Cost function

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-Zkus12rZ-1610838114457)(attachment:image.png)]

def nnCost(theta, X, y): # 均为数组
    h = forward_propagation(theta, X)[-1]
    temp = -y * np.log(h) - (1 - y) * np.log(1- h)
    return  temp.sum() / len(X)
nnCost(theta, X, y)
0.2876291651613189

正则化代价函数 regularized cost function

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-1haV2JCg-1610838114460)(attachment:image.png)]

def nnCostReg(theta, X, y, lambdaRate):
    theta1, theta2 = deserialize(theta)
    
    first = nnCost(theta, X, y)
    
    reg1 = (np.power(theta1[:, 1: ], 2)).sum()
    reg2 = (np.power(theta2[:, 1: ], 2)).sum()
    reg = lambdaRate / (2 * len(X)) * (reg1 + reg2)
    
    return first + reg
nnCostReg(theta, X, y, lambdaRate=1)
0.38376985909092365

反向传播 Back propagation

S函数梯度 Sigmoid gradient

在这里插入图片描述

def sigmoid_gradient(z):
    return sigmoid(z) * (1- sigmoid(z))
sigmoid_gradient(0)
0.25
siggra_x = np.arange(-10, 10, 0.01)
plt.plot(siggra_x, sigmoid_gradient(siggra_x))
plt.grid()
plt.show()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-JVaAL3Un-1610838114469)(output_42_0.png)]

由此可见,sigmoid的导数值最大为0.25,决定了其无法用于深层网络

theta随机初始化 Random initialization

当我们训练神经网络时,随机初始化参数是很重要的,可以打破数据的对称性。
一个有效的策略是在均匀分布(−e,e)中随机选择值,我们可以选择 e = 0.12 这个范围的值来确保参数足够小,使得训练更有效率。

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-QllZFs7O-1610838114471)(attachment:image.png)]

# 从服从均匀分布的范围中随机返回size大小的值
def random_init(size):
    return np.random.uniform(-0.12, 0.12, size)
random_init((3, 4))
array([[-0.03588089, -0.01860106,  0.0231179 ,  0.08834786],
       [-0.00270229, -0.04455686, -0.10708566,  0.09343351],
       [-0.04520061,  0.06483762, -0.08712267,  0.0488686 ]])

反向传播

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-YabduEYZ-1610838114474)(attachment:image.png)]

目标:获取整个网络代价函数的梯度,以便在优化算法中求解。
步骤:给定训练集,先计算正向传播,再对于层的每个节点,计算误差项,这个数据衡量这个节点对最后输出
的误差“贡献”了多少。

这里面一定要理解正向传播和反向传播的过程,才能弄清楚各种参数在网络中的维度,切记。
最好手写出每次传播的式子。

公式

  1. [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-BgOSjktj-1610838114476)(attachment:1.jpg)]
  2. [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-pJoe5dq1-1610838114479)(attachment:2.jpg)]
  3. [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-knU9wIAo-1610838114482)(attachment:3.jpg)]
  4. [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-hNpTFPDA-1610838114484)(attachment:4.jpg)]

更新梯度

# 更新梯度
def nnGradient(theta, X, y):
    theta1, theta2 = deserialize(theta)
    # 首先正向传播
    a1, z2, a2, z3, h = forward_propagation(theta, X)
    # 针对每层的各个节点计算误差项
    d3 = h - y # 输出层误差 5000*10
    d2 = d3 @ theta2[:, 1:] * sigmoid_gradient(z2) # 隐藏层误差 5000*25
    
    delta1 = d2.T @ a1 # 25 * 401
    delta2 = d3.T @ a2 # 10 * 26
    delta = serialize(delta1, delta2) # (10285, )
    
    return delta / len(X) # (10285, )
d1, d2 = deserialize(nnGradient(theta, X, y))
d1.shape, d2.shape
((25, 401), (10, 26))
theta1.shape, theta2.shape
((25, 401), (10, 26))

梯度检验 Gradient checking

进行梯度校验时,需要把参数Θ(1)、Θ(2)连接成一个长向量。之后你可以使用如下公式计算:
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-RJUnnuVU-1610838114487)(attachment:image.png)]

def gradient_checking(theta, X, y, eps, regularized=False):
    m = len(theta)
    
    def a_numeric_grad(plus, minus, X, y):
        if regularized:
            return (nnCostReg(plus, X, y) - nnCostReg(minus, X, y)) / (2 * eps)
        else:
            return (nnCost(plus, X, y) - nnCost(minus, X, y)) / (2 * eps)
        
    approxGrad = np.zeros(m)
    # 计算偏导数
    for i in range(m):  
        thetaPlus = theta.copy()
        thetaPlus[i] = theta[i] + eps
        
        thetaMinus = theta.copy()
        thetaMinus[i] = theta[i] - eps
        
        grad = a_numeric_grad(thetaPlus, thetaMinus, X, y)
        approxGrad[i] = grad
    
    # 用梯度公式
    funcGrad = nnGradientReg(theta, X, y) if  regularized else nnGradient(theta, X, y)
    
    diff = np.linalg.norm(approxGrad - funcGrad) / np.linalg.norm(approxGrad + funcGrad)
    
    print('If your backpropagation implementation is correct,\n'\
         + 'the relative difference will be smaller than 10e-9 (assume epsilon=0.0001).\n'
         + 'Relative Difference: {}\n'.format(diff))
    
    #return approxGrad.shape
# gradient_checking(theta, X, y, eps=0.0001, regularized=False) #这个运行很慢,谨慎运行

正则化神经网络

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-VkZPid3c-1610838114489)(attachment:image.png)]

def nnGradientReg(theta, X, y, lambdaRate):
    first = nnGradient(theta, X, y)
    
    t1, t2 = deserialize(theta)
    t1[:, 0] = 0
    t2[:, 0] = 0
    
    reg = lambdaRate / len(X) * serialize(t1, t2)
    
    return first + reg
d1, d2 = deserialize(nnGradientReg(theta, X, y, lambdaRate=1))
d1.shape, d2.shape
((25, 401), (10, 26))

优化参数 Learning parameters using fmincg

from scipy import optimize as opt

def nnTraining(X, y):
    # 初始化theta
    init_theta = random_init(10285)
    
    res = opt.minimize(fun=nnCostReg,
                       x0=init_theta,
                       args=(X, y, 1),
                       method='TNC',
                       jac=nnGradientReg,
                       options={'maxiter':500})
    return res
res = nnTraining(X, y)
res
     fun: 0.33298285824302043
     jac: array([ 1.41541160e-04, -2.29401553e-07, -2.55382916e-07, ...,
       -5.01885346e-06, -3.60910199e-05, -2.47061167e-05])
 message: 'Max. number of function evaluations reached'
    nfev: 250
     nit: 21
  status: 3
 success: False
       x: array([ 0.00000000e+00, -1.14700777e-03, -1.27691458e-03, ...,
        1.63199029e+00, -2.96556538e+00, -1.95288757e+00])

性能评价

final_theta = res.x
final_theta.shape
(10285,)
h = forward_propagation(final_theta, X)[-1]
h.shape
(5000, 10)
y_predict = np.argmax(h, axis=1) + 1
y_predict = y_predict.reshape(5000, 1)
y_predict
array([[10],
       [10],
       [10],
       ...,
       [ 9],
       [ 9],
       [10]], dtype=int64)
raw_y
array([[10],
       [10],
       [10],
       ...,
       [ 9],
       [ 9],
       [ 9]], dtype=uint8)
from sklearn.metrics import classification_report

print(classification_report(raw_y, y_predict))
              precision    recall  f1-score   support

           1       0.97      0.99      0.98       500
           2       0.98      0.98      0.98       500
           3       0.98      0.98      0.98       500
           4       1.00      0.95      0.97       500
           5       1.00      0.98      0.99       500
           6       0.98      0.99      0.99       500
           7       0.98      0.99      0.98       500
           8       0.98      0.99      0.99       500
           9       0.99      0.96      0.97       500
          10       0.96      1.00      0.98       500

    accuracy                           0.98      5000
   macro avg       0.98      0.98      0.98      5000
weighted avg       0.98      0.98      0.98      5000

可视化隐层 Visualizing the hidden layer

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-034eo7J4-1610838114492)(attachment:image.png)]

theta1, theta2 = deserialize(final_theta)
hidden_layer = theta1[:, 1: ]
hidden_layer.shape
(25, 400)
fig, ax = plt.subplots(5, 5, sharey=True, sharex=True, figsize=(10, 10))
for r in range(5):
    for c in range(5):
        ax[r, c].matshow(np.array(hidden_layer[5 * r + c].reshape(20, 20)).T, cmap=matplotlib.cm.binary)
        plt.xticks(np.array([]))
        plt.yticks(np.array([])) 

在这里插入图片描述

欢迎关注我的 微信公众号:破壳Ai,分享最佳学习路径、教程和资源。成长路上,有我陪你。

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值