吴恩达机器学习ex4 python实现

这个项目包含了吴恩达机器学习ex4的python实现,主要知识点为反向传播神经网络

1.神经网络

1.1数据可视化

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat
from sklearn.preprocessing import OneHotEncoder
data=loadmat(r'C:\Users\xxx\Desktop\机器学习\machine-learning-ex4\machine-learning-ex4\ex4\ex4data1.mat')
data
{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}
X=data['X']
y=data['y']
X.shape,y.shape
((5000, 400), (5000, 1))
weight=loadmat(r'C:\Users\xxx\Desktop\机器学习\machine-learning-ex4\machine-learning-ex4\ex4\ex4weights.mat')
theta1,theta2=weight['Theta1'],weight['Theta2']
theta1.shape,theta2.shape
((25, 401), (10, 26))
sample_idx = np.random.choice(np.arange(data['X'].shape[0]), 100)
sample_images = data['X'][sample_idx, :]
fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(12, 12))
for r in range(10):
    for c in range(10):
        ax_array[r, c].matshow(np.array(sample_images[10 * r + c].reshape((20, 20))).T,cmap=matplotlib.cm.binary)
        plt.xticks(np.array([]))
        plt.yticks(np.array([])) 

在这里插入图片描述

1.2定义前向传播函数和代价函数

def sigmoid(z):
    return 1/(1+np.exp(-z))

神经网络结构
在这里插入图片描述

def forward_propagate(X,theta1,theta2):
    m=X.shape[0]
    
    a1=np.insert(X,0,values=np.ones(m),axis=1)
    z2=a1*theta1.T
    a2=np.insert(sigmoid(z2),0,values=np.ones(m),axis=1)
    z3=a2*theta2.T
    h=sigmoid(z3)
    return a1,z2,a2,z3,h

这是没有正则化的代价函数
在这里插入图片描述

def cost(theta1,theta2,input_size,hidden_size,num_lables,X,y,learning_rate):
    m=X.shape[0]
    X=np.matrix(X)
    y=np.matrix(y)
    a1,z2,a2,z3,h=forward_propagate(X,theta1,theta2)
    J=0
    for i in range(m):
        first_trem=np.multiply(-y[i,:],np.log(h[i,:]))
        second_term=np.multiply((1-y[i,:]),np.log(1-h[i,:]))
        J+=np.sum(first_trem-second_term)
    J=J/m
    
    return J
        

1.3数据预处理和初始化参数

encoder=OneHotEncoder(sparse=False)#False返回array对象,True返回稀疏矩阵
y_onehot=encoder.fit_transform(y)
y_onehot.shape
(5000, 10)
y[0],y_onehot[0,:]
(array([10], dtype=uint8), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]))
input_size=400
hidden_size=25
num_labels=10
learning_rate=1

计算代价

cost(theta1,theta2,input_size,hidden_size,num_labels,X,y_onehot,learning_rate)
0.2876291651613188

1.4 定义有正则化的代价函数和反向传播函数

正则化代价函数
在这里插入图片描述

def costReg(theta1, theta2, input_size, hidden_size, num_labels, X, y, learning_rate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)

    # run the feed-forward pass
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    
    # compute the cost
    J = 0
    for i in range(m):
        first_term = np.multiply(-y[i,:], np.log(h[i,:]))
        second_term = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
        J += np.sum(first_term - second_term)
    
    J = J / m
    
    J+=(float(learning_rate)/(2*m))*(np.sum(np.power(theta1[:,1:],2))+np.sum(np.power(theta2[:,1:],2)))
    return J
costReg(theta1, theta2, input_size, hidden_size, num_labels, X, y_onehot, learning_rate)
0.38376985909092354
def sigmoid_gradient(z):
    return np.multiply(sigmoid(z),(1-sigmoid(z)))
sigmoid_gradient(0)
0.25

随机初始化

params=(np.random.random(size=hidden_size*(input_size+1)+num_labels*(hidden_size+1))-0.5)*0.24

反向传播
在这里插入图片描述

def backprop(params,input_size,hidden_size,num_lables,X,y,learning_rate):
    m=X.shape[0]
    X=np.matrix(X)
    y=np.matrix(y)
    
    a1,z2,a2,z3,h=forward_propagate(X,theta1,theta2)
    
    theta1=np.matrix(np.reshape(params[:hidden_size*(input_size+1)],(hidden_size,(input_size+1))))
    theta2=np.matrix(np.reshape(params[hidden_size*(input_size+1):],(num_labels,(hidden_size+1))))
    
    J=0
    delta1=np.zeros(theta1.shape)
    delta2=np.zeros(theta2.shape)
    
    for i in range(m):
        first_term = np.multiply(-y[i,:], np.log(h[i,:]))
        second_term = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
        J += np.sum(first_term - second_term)
    
    J = J / m
    
    for t in range(m):
        a1t=a1[t,:]
        z2t=z2[t,:]
        a2t=a2[t,:]
        ht=h[t,:]
        yt=y[t,:]
        
        d3t=ht-yt
        
        z2t=np.insert(z2t,0,values=np.ones(1))
        d2t=np.multiply((theta2.T*d3t.T).T,sigmoid_gradient(z2t))
        
        delta1=delta1+(d2t[:,1:]).T*a1t
        delta2=delta2+d3t.T*a2t
        
    delta1=delta1/m
    delta2=delta2/m
    
    return J,delta1,delta2
        
        

有正则化的反向传播

def backpropReg(params, input_size, hidden_size, num_labels, X, y, learning_rate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    
    # reshape the parameter array into parameter matrices for each layer
    theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
    theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
    
    # run the feed-forward pass
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    
    # initializations
    J = 0
    delta1 = np.zeros(theta1.shape)  # (25, 401)
    delta2 = np.zeros(theta2.shape)  # (10, 26)
    
    # compute the cost
    for i in range(m):
        first_term = np.multiply(-y[i,:], np.log(h[i,:]))
        second_term = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
        J += np.sum(first_term - second_term)
    
    J = J / m
    
    # add the cost regularization term
    J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
    
    # perform backpropagation
    for t in range(m):
        a1t = a1[t,:]  # (1, 401)
        z2t = z2[t,:]  # (1, 25)
        a2t = a2[t,:]  # (1, 26)
        ht = h[t,:]  # (1, 10)
        yt = y[t,:]  # (1, 10)
        
        d3t = ht - yt  # (1, 10)
        
        z2t = np.insert(z2t, 0, values=np.ones(1))  # (1, 26)
        d2t = np.multiply((theta2.T * d3t.T).T, sigmoid_gradient(z2t))  # (1, 26)
        
        delta1 = delta1 + (d2t[:,1:]).T * a1t
        delta2 = delta2 + d3t.T * a2t
        
    delta1 = delta1 / m
    delta2 = delta2 / m
    
    delta1[:,1:] = delta1[:,1:] + (theta1[:,1:] * learning_rate)/ m
    delta2[:,1:] = delta2[:,1:] + (theta2[:,1:] * learning_rate) / m
    
    grad=np.concatenate((np.ravel(delta1),np.ravel(delta2)))
    #拼接两个数组
    
    return J,grad

1.5利用工具库计算最优解

from scipy.optimize import minimize

fmin=minimize(fun=backpropReg,x0=(params),args=(input_size,hidden_size,num_labels, X, y_onehot, learning_rate),method='TNC',jac=True,options={'maxiter':250})
#maxiter 最大迭代次数
fmin
     fun: 0.32940356143623817
     jac: array([ 1.72209086e-04,  7.10131568e-08, -7.66715574e-08, ...,
        3.31147356e-06, -2.45840176e-05, -2.04963093e-04])
 message: 'Max. number of function evaluations reached'
    nfev: 250
     nit: 19
  status: 3
 success: False
       x: array([-7.62726613e-01,  3.55065784e-04, -3.83357787e-04, ...,
       -9.58437980e-02, -3.56685537e+00, -5.83283702e-02])
X = np.matrix(X)
thetafinal1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
thetafinal2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
a1, z2, a2, z3, h = forward_propagate(X, thetafinal1, thetafinal2 )
y_pred = np.array(np.argmax(h, axis=1) + 1)
y_pred
array([[10],
       [10],
       [10],
       ...,
       [ 9],
       [ 9],
       [ 9]], dtype=int64)

评价报告

from sklearn.metrics import classification_report#这个包是评价报告
print(classification_report(y, y_pred))
              precision    recall  f1-score   support

           1       0.99      0.99      0.99       500
           2       1.00      1.00      1.00       500
           3       0.99      0.99      0.99       500
           4       1.00      0.99      1.00       500
           5       1.00      1.00      1.00       500
           6       1.00      0.99      1.00       500
           7       0.99      1.00      0.99       500
           8       0.99      1.00      1.00       500
           9       0.99      0.98      0.99       500
          10       0.99      1.00      1.00       500

    accuracy                           0.99      5000
   macro avg       0.99      0.99      0.99      5000
weighted avg       0.99      0.99      0.99      5000

1.6 可视化隐藏层

hidden_layer=thetafinal1[:,1:]
hidden_layer.shape
(25, 400)
fig, ax_array = plt.subplots(nrows=5, ncols=5, sharey=True, sharex=True, figsize=(12, 12))
for r in range(5):
    for c in range(5):
        ax_array[r, c].matshow(np.array(hidden_layer[5 * r + c].reshape((20, 20))),cmap=matplotlib.cm.binary)
        plt.xticks(np.array([]))
        plt.yticks(np.array([])) 

在这里插入图片描述

2 总结

  1. minimize函数jac相关取值意义
jac:{callable,‘2-point’,‘3-point’,cs’,bool},可选
计算梯度向量的方法。仅适用于CG、BFGS、Newton CG、L-BFGS-B、TNC、SLSQP、dogleg、trust ncg、trust krylov、trust exact和trust Construct。
如果它是可调用的,它应该是一个返回梯度向量的函数: jac(x,*args)->array_like,shape(n,).其中x是(n,)的数组,args是具有固定参数的元组。
如果jac是布尔值且为真,则假定fun返回objective和gradient作为(f,g)元组。“Newton CG”、“trust ncg”、“dogleg”、“trust exact”和“trust krylov”方法要求提供可调用函数,或者fun返回目标和梯度。
如果NoneFalse,则使用绝对步长的两点有限差分估计来估计梯度。或者,关键字{2-point’、‘3-point’、‘cs’}可用于选择具有相对步长的梯度的数值估计的有限差分格式。这些有限差分格式服从任何指定的边界。
  1. 可以进行梯度检测来判断神经网络是否正确
  2. OneHotEncoder(sparse=False)#False返回array对象,True返回稀疏矩阵
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值