吴恩达《机器学习》ex4

1emonade

于 2023-08-30 14:32:35 发布

阅读量95

点赞数

分类专栏：吴恩达机器学习文章标签： python 机器学习

本文链接：https://blog.csdn.net/weixin_62040971/article/details/132580569

版权

吴恩达机器学习专栏收录该内容

8 篇文章 0 订阅

订阅专栏

简介

本文是吴恩达《机器学习》习题四的python实现。习题四要求我们自己实现神经网络来对数字图片进行识别，本质上是一个多分类问题。

代码

这部分和ex3里是一样的，5000张20*20像素的手写数字数据集，以及对应的数字（1-9，0对应10）

# import libs
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import matplotlib
from sklearn.preprocessing import OneHotEncoder

# load and plot data
data = loadmat('../data_sets/ex4data1.mat')
sample_idx = np.random.choice(np.arange(data['X'].shape[0]), 100)
sample_images = data['X'][sample_idx, :]
fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(12, 12))
for r in range(10):
    for c in range(10):
        ax_array[r, c].matshow(np.array(sample_images[10 * r + c].reshape((20, 20))).T,cmap=matplotlib.cm.binary)
        plt.xticks(np.array([]))
        plt.yticks(np.array([]))

data sample

# active function, use sigmoid here, feel free to use others such as tanh, relu
def sigmoid(x):
    return 1/(1+np.exp(-x))

# define NN forward function, here our NN has two layers
def forward(theta1,theta2,X):
    theta1 = np.matrix(theta1)
    theta2 = np.matrix(theta2)
    X = np.insert(X,0,np.ones(X.shape[0]),axis=1)
    X = np.matrix(X)
    A0 = X
    Z1 = X * theta1.T
    A1 = sigmoid(Z1)
    A1 = np.insert(A1,0,np.ones(A1.shape[0]),axis=1)
    A1 = np.matrix(A1)
    Z2 = A1 * theta2.T
    A2 = sigmoid(Z2)
    return A0,Z1,A1,Z2,A2
  
# define cost function which we gonna try to minimize
def cost(theta1,theta2,X,y,lambd):
    A0,Z1,A1,Z2,y_hat = forward(theta1,theta2,X)
    first = np.multiply(-y,np.log(y_hat))
    second = np.multiply(1-y,np.log(1-y_hat))
    J=(first-second)
    J = J/len(X)
    J = np.sum(J)
    reg = float(lambd) * (np.sum(np.power(theta1[:,1:],2))+ np.sum(np.power(theta2[:,1:],2))) /(2* len(X))
    J+=reg
    return J

data['X'].shape,data['y'].shape
# ((5000, 400), (5000, 1))

encoder = OneHotEncoder(sparse=False)
y_onehot = encoder.fit_transform(data['y'])
y_onehot.shape
# (5000, 10)

cost(theta1,theta2,data['X'],y_onehot,1)
# 3 0.3837698590909236

# derivative of sigmoid function
def d_sigmoid(x):
    return np.multiply(sigmoid(x),(1-sigmoid(x)))

# backward 
def back(params,hidden,label,input,X,y,lambd):
    theta1 = np.matrix(np.reshape(params[:hidden * (input + 1)], (hidden, (input + 1))))
    theta2 = np.matrix(np.reshape(params[hidden * (input + 1):], (label, (hidden + 1))))
    A0,Z1,A1,Z2,y_pred = forward(theta1,theta2,X)
    J = cost(theta1,theta2,X,y,lambd)
    DZ2 = y_pred-y
    Z1 = np.insert(Z1,0,values=np.ones(len(X)),axis=1)
    DZ1 = np.multiply(DZ2 * theta2, d_sigmoid(Z1))
    DT2 = (DZ2.T * A1)/len(X)
    DT1 = (DZ1[:,1:].T * A0)/len(X)
    DT2[:,1:] = DT2[:,1:] + lambd * theta2[:,1:]/len(X)
    DT1[:,1:] = DT1[:,1:] + lambd * theta1[:,1:]/len(X)
    grad = np.concatenate((np.ravel(DT1),np.ravel(DT2)))
    return J,grad
# get params ready
hidden = 25
input = 400
label = 10
# init thetas randomly
params = (np.random.random(size=hidden * (input + 1) + label * (hidden + 1)) - 0.5) * 0.24

# learning rate is 1
J,grad = back(params,hidden,label,input,data['X'],y_onehot,1)

# it is necessary to do grad check to make sure our implementation is correct
def get_approx_grad(params,change,hidden,label,input,X,y,alpha):
    grad = np.zeros(len(params))
    for i in range(len(params)):
        params[i]+=change
        theta1 = np.matrix(np.reshape(params[:hidden * (input + 1)], (hidden, (input + 1))))
        theta2 = np.matrix(np.reshape(params[hidden * (input + 1):], (label, (hidden + 1))))
        J_up = cost(theta1,theta2,X,y,alpha)
        params[i]-=2*change
        theta1 = np.matrix(np.reshape(params[:hidden * (input + 1)], (hidden, (input + 1))))
        theta2 = np.matrix(np.reshape(params[hidden * (input + 1):], (label, (hidden + 1))))
        J_down = cost(theta1,theta2,X,y,alpha)
        grad[i] = (J_up - J_down)/(2*change)
        params[i] += change
    return grad

grad_approx = get_approx_grad(params,0.001,hidden,label,input,data['X'],y_onehot,1)
grad_varient = np.sqrt(np.sum(np.power(grad_approx-grad,2)))
grad_varient
# 3.2421633421969986e-08

# fit model
from scipy.optimize import minimize

# minimize the objective function
fmin = minimize(fun=back, x0=(params), args=(hidden, label, input, data['X'], y_onehot, 1), 
                method='TNC', jac=True,options={'maxfun': 250})
fmin
# message: Max. number of function evaluations reached
# success: False
#  status: 3
#     fun: 0.3256656137323782
#       x: [-2.727e+00  3.072e-03 ... -6.346e-01 -3.700e+00]
#     nit: 22
#     jac: [-9.848e-05  6.145e-07 ...  1.824e-05  1.513e-05]
#    nfev: 250

# reshape result
X = np.matrix(data=['X'])
thetafinal1 = np.matrix(np.reshape(fmin.x[:hidden * (input + 1)], (hidden, (input + 1))))
thetafinal2 = np.matrix(np.reshape(fmin.x[hidden * (input + 1):], (label, (hidden + 1))))

# model performance
a1, z2, a2, z3, h = forward( thetafinal1, thetafinal2,data['X'])
y_pred = np.array(np.argmax(h, axis=1) + 1)
from sklearn.metrics import classification_report
print(classification_report(data['y'], y_pred))
#                precision    recall  f1-score   support
#
#           1       0.99      1.00      1.00       500
#           2       0.99      0.99      0.99       500
#           3       0.99      0.98      0.99       500
#           4       1.00      0.99      0.99       500
#           5       1.00      0.99      1.00       500
#           6       1.00      1.00      1.00       500
#           7       0.99      1.00      0.99       500
#           8       1.00      1.00      1.00       500
#           9       0.99      0.99      0.99       500
#          10       0.99      1.00      1.00       500
#
#    accuracy                           0.99      5000
#   macro avg       0.99      0.99      0.99      5000
#weighted avg       0.99      0.99      0.99      5000