吴恩达机器学习作业4-反向传播神经网络

神经网络

对于这个练习,我们将再次处理手写数字数据集。这次使用反向传播的前馈神经网络,自动学习神经网络的参数。
这部分和ex3里是一样的,5000张20*20像素的手写数字数据集,以及对应的数字(1-9,0对应10)

导入数据

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat
from sklearn.preprocessing import OneHotEncoder
data=loadmat("E:\shujuji\ex4data1.mat")
data

在这里插入图片描述

X=data["X"]
y=data["y"]
X.shape,y.shape

((5000, 400), (5000, 1))

sigmoid

def sigmoid(z):
    return 1/(1+np.exp(-z))

前向传播

在这里插入图片描述

在这里插入图片描述

def forward_propagate(X,theta1,theta2):
    m=X.shape[0]#行数
    a1=np.insert(X,0,values=np.ones(m),axis=1)
#np.insert(arr,obj,values,axis):
#在arr数组的基础上插入元素,obj插入的位置,values是需要插入的值,axis=1行,=0列
#np.ones生成全为1矩阵,m为维数
    z2=a1*theta1.T
    a2=np.insert(sigmoid(z2),0,values=np.ones(m),axis=1)
    z3=a2*theta2.T
    h=sigmoid(z3)
    return a1,z2,a2,z3,h

代价函数

在这里插入图片描述

def cost(theta1,theta2,input_size,hidden_size,num_labels,X,y,learning_rate):
     m=X.shape[0]
     X=np.matrix(X)
     y=np.matrix(y)
     a1,z2,a2,z3,h=forward_propagate(X,theta1,theta2)
     J=0
     for i in range(m):
         first=np.multiply(-y[i,:],np.log(h[i,:]))
         second=np.multiply(1-y[i,:],np.log(1-h[i,:]))
         J=J+np.sum(first-second)
     J=J/m
     return J

one-hot 标签 y

是我刚刚在sklearn中学的,具体详见链接: sklearn-数据处理中one hot.

encoder=OneHotEncoder(sparse=False)
y_onehot = encoder.fit_transform(y)
#对我们的y标签进行一次one-hot 编码。 one-hot 编码将类标签n(k类)转换为长度为k的向量,其中索引n为“hot”(1),而其余为0
y_onehot.shape

(5000, 10)

初始化

第一种:随机生成theta1,theta2

input_size = 400
hidden_size = 25
num_labels = 10
learning_rate = 1
# 随机初始化完整网络参数大小的参数数组
params = (np.random.random(size=hidden_size * (input_size + 1) + num_labels * (hidden_size + 1)) - 0.5) * 0.25
#np.random.random生成(-0.5~0.5)*0.25的浮点型随机数组
m=X.shape[0]
X=np.matrix(X)
y=np.matrix(y)
# 将参数数组解开为每个层的参数矩阵,reshape重新定义维度
theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
a1.shape, z2.shape, a2.shape, z3.shape, h.shape

((5000, 401), (5000, 25), (5000, 26), (5000, 10), (5000, 10))

第二种:根据数据集所给权重来生成theta1,theta2

weight = loadmat("E:\shujuji/ex4weights.mat")
theta1, theta2 = weight['Theta1'], weight['Theta2']
theta1.shape, theta2.shape

((25, 401), (10, 26))

cost(theta1, theta2, input_size, hidden_size, num_labels, X, y_onehot, learning_rate)

第一种随机生成结果可能不同
第二种结果:0.2876291651613187

正则化代价函数

在这里插入图片描述

def costReg(theta1, theta2, input_size, hidden_size, num_labels, X, y, learning_rate):
    J=cost(theta1,theta2,input_size,hidden_size,num_labels,X,y,learning_rate)
    J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
    return J
costReg(theta1, theta2, input_size, hidden_size, num_labels, X, y_onehot, learning_rate)

反向传播

sigmoid梯度

在这里插入图片描述

def sigmoid_gradient(z):
    return np.multiply(sigmoid(z),(1 - sigmoid(z)))   

反向传播神经网络

在这里插入图片描述

def backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate):
   #初始化
    m=X.shape[0]
    X=np.matrix(X)
    y=np.matrix(y)
    
    theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)],(hidden_size, (input_size + 1))))
    theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    
    J = 0
    delta1=np.zeros(theta1.shape)# (25, 401)
    delta2=np.zeros(theta2.shape)# (10, 26)
    #costReg函数
    for i in range(m):
        first_term = np.multiply(-y[i,:],np.log(h[i,:]))
        second_term = np.multiply((1 - y[i,:]),np.log(1 - h[i,:]))
        J += np.sum(first_term - second_term)
    J = J / m
    J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
    #返回J
    
    for t in range(m):
        a1t=a1[t,:]# (1, 401)
        z2t=z2[t,:]# (1, 25)
        a2t=a2[t,:]# (1, 26)
        ht=h[t,:]  # (1, 10)
        yt=y[t,:]  # (1, 10)
        d3t=ht-yt  # (1, 26)
        z2t=np.insert(z2t, 0, values=np.ones(1)) # (1, 26)
        d2t=np.multiply(d3t*theta2,sigmoid_gradient(z2t))# (1, 26)
        delta1=delta1+(d2t[:,1:]).T*a1t #去掉一维
        delta2=delta2+d3t.T*a2t
    delta1 = delta1 / m  #循环了m次,除个m
    delta2 = delta2 / m 
    
    #正则化
    delta1[:,1:]=delta1[:,1:]+(theta1[:,1:] * learning_rate)/ m
    delta2[:,1:]=delta2[:,1:]+(theta2[:,1:] * learning_rate)/ m
    #np.concatenate数组的拼接
    grad=np.concatenate((np.ravel(delta1),np.ravel(delta2)))

    return J, grad   
J,grad=backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate)
J,grad.shape

使用工具库计算参数最优解

from scipy.optimize import minimize
# minimize the objective function
fmin = minimize(fun=backpropReg, x0=(params), args=(input_size, hidden_size, num_labels, X, y_onehot, learning_rate), 
                method='TNC', jac=True, options={'maxiter': 250})
fmin


X = np.matrix(X)
theta1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
y_pred = np.array(np.argmax(h, axis=1) + 1)
y_pred

预测

correct = [1 if a == b else 0 for (a, b) in zip(y_pred, y)]
accuracy = (sum(map(int, correct)) / float(len(correct)))
print ('accuracy = {0}%'.format(accuracy * 100))
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值