吴恩达机器学习ex4 python实现

最新推荐文章于 2023-02-01 12:00:52 发布

qq_26402041

最新推荐文章于 2023-02-01 12:00:52 发布

阅读量419

点赞数

分类专栏：机器学习文章标签：神经网络机器学习 python

本文链接：https://blog.csdn.net/qq_26402041/article/details/109301645

版权

机器学习专栏收录该内容

8 篇文章 1 订阅

订阅专栏

这个项目包含了吴恩达机器学习ex4的python实现，主要知识点为反向传播神经网络

1.神经网络

1.1数据可视化

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat
from sklearn.preprocessing import OneHotEncoder

data=loadmat(r'C:\Users\xxx\Desktop\机器学习\machine-learning-ex4\machine-learning-ex4\ex4\ex4data1.mat')
data

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}

X=data['X']
y=data['y']
X.shape,y.shape

((5000, 400), (5000, 1))

weight=loadmat(r'C:\Users\xxx\Desktop\机器学习\machine-learning-ex4\machine-learning-ex4\ex4\ex4weights.mat')
theta1,theta2=weight['Theta1'],weight['Theta2']
theta1.shape,theta2.shape

((25, 401), (10, 26))

sample_idx = np.random.choice(np.arange(data['X'].shape[0]), 100)
sample_images = data['X'][sample_idx, :]
fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(12, 12))
for r in range(10):
    for c in range(10):
        ax_array[r, c].matshow(np.array(sample_images[10 * r + c].reshape((20, 20))).T,cmap=matplotlib.cm.binary)
        plt.xticks(np.array([]))
        plt.yticks(np.array([]))

在这里插入图片描述

1.2定义前向传播函数和代价函数

def sigmoid(z):
    return 1/(1+np.exp(-z))

神经网络结构

def forward_propagate(X,theta1,theta2):
    m=X.shape[0]
    
    a1=np.insert(X,0,values=np.ones(m),axis=1)
    z2=a1*theta1.T
    a2=np.insert(sigmoid(z2),0,values=np.ones(m),axis=1)
    z3=a2*theta2.T
    h=sigmoid(z3)
    return a1,z2,a2,z3,h

这是没有正则化的代价函数

def cost(theta1,theta2,input_size,hidden_size,num_lables,X,y,learning_rate):
    m=X.shape[0]
    X=np.matrix(X)
    y=np.matrix(y)
    a1,z2,a2,z3,h=forward_propagate(X,theta1,theta2)
    J=0
    for i in range(m):
        first_trem=np.multiply(-y[i,:],np.log(h[i,:]))
        second_term=np.multiply((1-y[i,:]),np.log(1-h[i,:]))
        J+=np.sum(first_trem-second_term)
    J=J/m
    
    return J

1.3数据预处理和初始化参数

encoder=OneHotEncoder(sparse=False)#False返回array对象，True返回稀疏矩阵
y_onehot=encoder.fit_transform(y)
y_onehot.shape

(5000, 10)

y[0],y_onehot[0,:]

(array([10], dtype=uint8), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]))

input_size=400
hidden_size=25
num_labels=10
learning_rate=1

计算代价

cost(theta1,theta2,input_size,hidden_size,num_labels,X,y_onehot,learning_rate)

0.2876291651613188

1.4 定义有正则化的代价函数和反向传播函数

正则化代价函数

def costReg(theta1, theta2, input_size, hidden_size, num_labels, X, y, learning_rate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)

    # run the feed-forward pass
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    
    # compute the cost
    J = 0
    for i in range(m):
        first_term = np.multiply(-y[i,:], np.log(h[i,:]))
        second_term = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
        J += np.sum(first_term - second_term)
    
    J = J / m
    
    J+=(float(learning_rate)/(2*m))*(np.sum(np.power(theta1[:,1:],2))+np.sum(np.power(theta2[:,1:],2)))
    return J

costReg(theta1, theta2, input_size, hidden_size, num_labels, X, y_onehot, learning_rate)

0.38376985909092354

def sigmoid_gradient(z):
    return np.multiply(sigmoid(z),(1-sigmoid(z)))

sigmoid_gradient(0)

0.25

随机初始化

params=(np.random.random(size=hidden_size*(input_size+1)+num_labels*(hidden_size+1))-0.5)*0.24

反向传播

def backprop(params,input_size,hidden_size,num_lables,X,y,learning_rate):
    m=X.shape[0]
    X=np.matrix(X)
    y=np.matrix(y)
    
    a1,z2,a2,z3,h=forward_propagate(X,theta1,theta2)
    
    theta1=np.matrix(np.reshape(params[:hidden_size*(input_size+1)],(hidden_size,(input_size+1))))
    theta2=np.matrix(np.reshape(params[hidden_size*(input_size+1):],(num_labels,(hidden_size+1))))
    
    J=0
    delta1=np.zeros(theta1.shape)
    delta2=np.zeros(theta2.shape)
    
    for i in range(m):
        first_term = np.multiply(-y[i,:], np.log(h[i,:]))
        second_term = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
        J += np.sum(first_term - second_term)
    
    J = J / m
    
    for t in range(m):
        a1t=a1[t,:]
        z2t=z2[t,:]
        a2t=a2[t,:]
        ht=h[t,:]
        yt=y[t,:]
        
        d3t=ht-yt
        
        z2t=np.insert(z2t,0,values=np.ones(1))
        d2t=np.multiply((theta2.T*d3t.T).T,sigmoid_gradient(z2t))
        
        delta1=delta1+(d2t[:,1:]).T*a1t
        delta2=delta2+d3t.T*a2t
        
    delta1=delta1/m
    delta2=delta2/m
    
    return J,delta1,delta2

有正则化的反向传播

def backpropReg(params, input_size, hidden_size, num_labels, X, y, learning_rate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    
    # reshape the parameter array into parameter matrices for each layer
    theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
    theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
    
    # run the feed-forward pass
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    
    # initializations
    J = 0
    delta1 = np.zeros(theta1.shape)  # (25, 401)
    delta2 = np.zeros(theta2.shape)  # (10, 26)
    
    # compute the cost
    for i in range(m):
        first_term = np.multiply(-y[i,:], np.log(h[i,:]))
        second_term = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
        J += np.sum(first_term - second_term)
    
    J = J / m
    
    # add the cost regularization term
    J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
    
    # perform backpropagation
    for t in range(m):
        a1t = a1[t,:]  # (1, 401)
        z2t = z2[t,:]  # (1, 25)
        a2t = a2[t,:]  # (1, 26)
        ht = h[t,:]  # (1, 10)
        yt = y[t,:]  # (1, 10)
        
        d3t = ht - yt  # (1, 10)
        
        z2t = np.insert(z2t, 0, values=np.ones(1))  # (1, 26)
        d2t = np.multiply((theta2.T * d3t.T).T, sigmoid_gradient(z2t))  # (1, 26)
        
        delta1 = delta1 + (d2t[:,1:]).T * a1t
        delta2 = delta2 + d3t.T * a2t
        
    delta1 = delta1 / m
    delta2 = delta2 / m
    
    delta1[:,1:] = delta1[:,1:] + (theta1[:,1:] * learning_rate)/ m
    delta2[:,1:] = delta2[:,1:] + (theta2[:,1:] * learning_rate) / m
    
    grad=np.concatenate((np.ravel(delta1),np.ravel(delta2)))
    #拼接两个数组
    
    return J,grad

1.5利用工具库计算最优解

from scipy.optimize import minimize

fmin=minimize(fun=backpropReg,x0=(params),args=(input_size,hidden_size,num_labels, X, y_onehot, learning_rate),method='TNC',jac=True,options={'maxiter':250})
#maxiter 最大迭代次数
fmin

     fun: 0.32940356143623817
     jac: array([ 1.72209086e-04,  7.10131568e-08, -7.66715574e-08, ...,
        3.31147356e-06, -2.45840176e-05, -2.04963093e-04])
 message: 'Max. number of function evaluations reached'
    nfev: 250
     nit: 19
  status: 3
 success: False
       x: array([-7.62726613e-01,  3.55065784e-04, -3.83357787e-04, ...,
       -9.58437980e-02, -3.56685537e+00, -5.83283702e-02])

X = np.matrix(X)
thetafinal1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
thetafinal2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

a1, z2, a2, z3, h = forward_propagate(X, thetafinal1, thetafinal2 )
y_pred = np.array(np.argmax(h, axis=1) + 1)
y_pred

array([[10],
       [10],
       [10],
       ...,
       [ 9],
       [ 9],
       [ 9]], dtype=int64)

评价报告

from sklearn.metrics import classification_report#这个包是评价报告
print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           1       0.99      0.99      0.99       500
           2       1.00      1.00      1.00       500
           3       0.99      0.99      0.99       500
           4       1.00      0.99      1.00       500
           5       1.00      1.00      1.00       500
           6       1.00      0.99      1.00       500
           7       0.99      1.00      0.99       500
           8       0.99      1.00      1.00       500
           9       0.99      0.98      0.99       500
          10       0.99      1.00      1.00       500

    accuracy                           0.99      5000
   macro avg       0.99      0.99      0.99      5000
weighted avg       0.99      0.99      0.99      5000

1.6 可视化隐藏层

hidden_layer=thetafinal1[:,1:]
hidden_layer.shape

(25, 400)

fig, ax_array = plt.subplots(nrows=5, ncols=5, sharey=True, sharex=True, figsize=(12, 12))
for r in range(5):
    for c in range(5):
        ax_array[r, c].matshow(np.array(hidden_layer[5 * r + c].reshape((20, 20))),cmap=matplotlib.cm.binary)
        plt.xticks(np.array([]))
        plt.yticks(np.array([]))

在这里插入图片描述

2 总结

minimize函数jac相关取值意义

jac：{callable，‘2-point’，‘3-point’，cs’，bool}，可选
计算梯度向量的方法。仅适用于CG、BFGS、Newton CG、L-BFGS-B、TNC、SLSQP、dogleg、trust ncg、trust krylov、trust exact和trust Construct。
如果它是可调用的，它应该是一个返回梯度向量的函数： jac（x，*args）->array_like，shape（n，）.其中x是（n，）的数组，args是具有固定参数的元组。
如果jac是布尔值且为真，则假定fun返回objective和gradient作为（f，g）元组。“Newton CG”、“trust ncg”、“dogleg”、“trust exact”和“trust krylov”方法要求提供可调用函数，或者fun返回目标和梯度。
如果None或False，则使用绝对步长的两点有限差分估计来估计梯度。或者，关键字{‘2-point’、‘3-point’、‘cs’}可用于选择具有相对步长的梯度的数值估计的有限差分格式。这些有限差分格式服从任何指定的边界。

可以进行梯度检测来判断神经网络是否正确
OneHotEncoder(sparse=False)#False返回array对象，True返回稀疏矩阵

qq_26402041

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
3
评论
吴恩达机器学习ex4 python实现

这个项目包含了吴恩达机器学习ex4的python实现，主要知识点为反向传播神经网络1.神经网络1.1数据可视化import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport matplotlibfrom scipy.io import loadmatfrom sklearn.preprocessing import OneHotEncoderdata=loadmat(r'C:\Users\xxx\Des
复制链接

扫一扫