吴恩达机器学习作业4-反向传播神经网络

最新推荐文章于 2021-11-03 23:52:51 发布

kingsure001

最新推荐文章于 2021-11-03 23:52:51 发布

阅读量749

点赞数

分类专栏：机器学习文章标签：神经网络 numpy python

本文链接：https://blog.csdn.net/kingsure001/article/details/107696374

版权

机器学习专栏收录该内容

22 篇文章 4 订阅

订阅专栏

神经网络

对于这个练习，我们将再次处理手写数字数据集。这次使用反向传播的前馈神经网络，自动学习神经网络的参数。
这部分和ex3里是一样的，5000张20*20像素的手写数字数据集，以及对应的数字（1-9，0对应10）

导入数据

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat
from sklearn.preprocessing import OneHotEncoder
data=loadmat("E:\shujuji\ex4data1.mat")
data

在这里插入图片描述

X=data["X"]
y=data["y"]
X.shape,y.shape

((5000, 400), (5000, 1))

sigmoid

def sigmoid(z):
    return 1/(1+np.exp(-z))

前向传播

在这里插入图片描述

def forward_propagate(X,theta1,theta2):
    m=X.shape[0]#行数
    a1=np.insert(X,0,values=np.ones(m),axis=1)
#np.insert（arr，obj，values，axis）：
#在arr数组的基础上插入元素，obj插入的位置,values是需要插入的值,axis=1行，=0列
#np.ones生成全为1矩阵，m为维数
    z2=a1*theta1.T
    a2=np.insert(sigmoid(z2),0,values=np.ones(m),axis=1)
    z3=a2*theta2.T
    h=sigmoid(z3)
    return a1,z2,a2,z3,h

代价函数

在这里插入图片描述

def cost(theta1,theta2,input_size,hidden_size,num_labels,X,y,learning_rate):
     m=X.shape[0]
     X=np.matrix(X)
     y=np.matrix(y)
     a1,z2,a2,z3,h=forward_propagate(X,theta1,theta2)
     J=0
     for i in range(m):
         first=np.multiply(-y[i,:],np.log(h[i,:]))
         second=np.multiply(1-y[i,:],np.log(1-h[i,:]))
         J=J+np.sum(first-second)
     J=J/m
     return J

one-hot 标签 y

是我刚刚在sklearn中学的,具体详见链接: sklearn-数据处理中one hot.

encoder=OneHotEncoder(sparse=False)
y_onehot = encoder.fit_transform(y)
#对我们的y标签进行一次one-hot 编码。 one-hot 编码将类标签n（k类）转换为长度为k的向量，其中索引n为“hot”（1），而其余为0
y_onehot.shape

(5000, 10)

初始化

第一种：随机生成theta1,theta2

input_size = 400
hidden_size = 25
num_labels = 10
learning_rate = 1
# 随机初始化完整网络参数大小的参数数组
params = (np.random.random(size=hidden_size * (input_size + 1) + num_labels * (hidden_size + 1)) - 0.5) * 0.25
#np.random.random生成（-0.5~0.5）*0.25的浮点型随机数组
m=X.shape[0]
X=np.matrix(X)
y=np.matrix(y)
# 将参数数组解开为每个层的参数矩阵，reshape重新定义维度
theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
a1.shape, z2.shape, a2.shape, z3.shape, h.shape

((5000, 401), (5000, 25), (5000, 26), (5000, 10), (5000, 10))

第二种：根据数据集所给权重来生成theta1,theta2

weight = loadmat("E:\shujuji/ex4weights.mat")
theta1, theta2 = weight['Theta1'], weight['Theta2']
theta1.shape, theta2.shape

((25, 401), (10, 26))

cost(theta1, theta2, input_size, hidden_size, num_labels, X, y_onehot, learning_rate)

第一种随机生成结果可能不同
第二种结果：0.2876291651613187

正则化代价函数

在这里插入图片描述

def costReg(theta1, theta2, input_size, hidden_size, num_labels, X, y, learning_rate):
    J=cost(theta1,theta2,input_size,hidden_size,num_labels,X,y,learning_rate)
    J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
    return J
costReg(theta1, theta2, input_size, hidden_size, num_labels, X, y_onehot, learning_rate)

反向传播

sigmoid梯度

在这里插入图片描述

def sigmoid_gradient(z):
    return np.multiply(sigmoid(z),(1 - sigmoid(z)))

反向传播神经网络

在这里插入图片描述

def backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate):
   #初始化
    m=X.shape[0]
    X=np.matrix(X)
    y=np.matrix(y)
    
    theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)],(hidden_size, (input_size + 1))))
    theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    
    J = 0
    delta1=np.zeros(theta1.shape)# (25, 401)
    delta2=np.zeros(theta2.shape)# (10, 26)
    #costReg函数
    for i in range(m):
        first_term = np.multiply(-y[i,:],np.log(h[i,:]))
        second_term = np.multiply((1 - y[i,:]),np.log(1 - h[i,:]))
        J += np.sum(first_term - second_term)
    J = J / m
    J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
    #返回J
    
    for t in range(m):
        a1t=a1[t,:]# (1, 401)
        z2t=z2[t,:]# (1, 25)
        a2t=a2[t,:]# (1, 26)
        ht=h[t,:]  # (1, 10)
        yt=y[t,:]  # (1, 10)
        d3t=ht-yt  # (1, 26)
        z2t=np.insert(z2t, 0, values=np.ones(1)) # (1, 26)
        d2t=np.multiply(d3t*theta2,sigmoid_gradient(z2t))# (1, 26)
        delta1=delta1+(d2t[:,1:]).T*a1t #去掉一维
        delta2=delta2+d3t.T*a2t
    delta1 = delta1 / m  #循环了m次，除个m
    delta2 = delta2 / m 
    
    #正则化
    delta1[:,1:]=delta1[:,1:]+(theta1[:,1:] * learning_rate)/ m
    delta2[:,1:]=delta2[:,1:]+(theta2[:,1:] * learning_rate)/ m
    #np.concatenate数组的拼接
    grad=np.concatenate((np.ravel(delta1),np.ravel(delta2)))

    return J, grad   
J,grad=backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate)
J,grad.shape

使用工具库计算参数最优解

from scipy.optimize import minimize
# minimize the objective function
fmin = minimize(fun=backpropReg, x0=(params), args=(input_size, hidden_size, num_labels, X, y_onehot, learning_rate), 
                method='TNC', jac=True, options={'maxiter': 250})
fmin


X = np.matrix(X)
theta1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
y_pred = np.array(np.argmax(h, axis=1) + 1)
y_pred

预测

correct = [1 if a == b else 0 for (a, b) in zip(y_pred, y)]
accuracy = (sum(map(int, correct)) / float(len(correct)))
print ('accuracy = {0}%'.format(accuracy * 100))

kingsure001

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
吴恩达机器学习作业4-反向传播神经网络

神经网络导入数据import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport matplotlibfrom scipy.io import loadmatfrom sklearn.preprocessing import OneHotEncoderdata=loadmat("E:\shujuji\ex4data1.mat")dataX=data["X"]y=data["y"]X.shape,y
复制链接

扫一扫