神经网络
对于这个练习,我们将再次处理手写数字数据集。这次使用反向传播的前馈神经网络,自动学习神经网络的参数。
这部分和ex3里是一样的,5000张20*20像素的手写数字数据集,以及对应的数字(1-9,0对应10)
导入数据
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat
from sklearn.preprocessing import OneHotEncoder
data=loadmat("E:\shujuji\ex4data1.mat")
data
X=data["X"]
y=data["y"]
X.shape,y.shape
((5000, 400), (5000, 1))
sigmoid
def sigmoid(z):
return 1/(1+np.exp(-z))
前向传播
def forward_propagate(X,theta1,theta2):
m=X.shape[0]#行数
a1=np.insert(X,0,values=np.ones(m),axis=1)
#np.insert(arr,obj,values,axis):
#在arr数组的基础上插入元素,obj插入的位置,values是需要插入的值,axis=1行,=0列
#np.ones生成全为1矩阵,m为维数
z2=a1*theta1.T
a2=np.insert(sigmoid(z2),0,values=np.ones(m),axis=1)
z3=a2*theta2.T
h=sigmoid(z3)
return a1,z2,a2,z3,h
代价函数
def cost(theta1,theta2,input_size,hidden_size,num_labels,X,y,learning_rate):
m=X.shape[0]
X=np.matrix(X)
y=np.matrix(y)
a1,z2,a2,z3,h=forward_propagate(X,theta1,theta2)
J=0
for i in range(m):
first=np.multiply(-y[i,:],np.log(h[i,:]))
second=np.multiply(1-y[i,:],np.log(1-h[i,:]))
J=J+np.sum(first-second)
J=J/m
return J
one-hot 标签 y
是我刚刚在sklearn中学的,具体详见链接: sklearn-数据处理中one hot.
encoder=OneHotEncoder(sparse=False)
y_onehot = encoder.fit_transform(y)
#对我们的y标签进行一次one-hot 编码。 one-hot 编码将类标签n(k类)转换为长度为k的向量,其中索引n为“hot”(1),而其余为0
y_onehot.shape
(5000, 10)
初始化
第一种:随机生成theta1,theta2
input_size = 400
hidden_size = 25
num_labels = 10
learning_rate = 1
# 随机初始化完整网络参数大小的参数数组
params = (np.random.random(size=hidden_size * (input_size + 1) + num_labels * (hidden_size + 1)) - 0.5) * 0.25
#np.random.random生成(-0.5~0.5)*0.25的浮点型随机数组
m=X.shape[0]
X=np.matrix(X)
y=np.matrix(y)
# 将参数数组解开为每个层的参数矩阵,reshape重新定义维度
theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
a1.shape, z2.shape, a2.shape, z3.shape, h.shape
((5000, 401), (5000, 25), (5000, 26), (5000, 10), (5000, 10))
第二种:根据数据集所给权重来生成theta1,theta2
weight = loadmat("E:\shujuji/ex4weights.mat")
theta1, theta2 = weight['Theta1'], weight['Theta2']
theta1.shape, theta2.shape
((25, 401), (10, 26))
cost(theta1, theta2, input_size, hidden_size, num_labels, X, y_onehot, learning_rate)
第一种随机生成结果可能不同
第二种结果:0.2876291651613187
正则化代价函数
def costReg(theta1, theta2, input_size, hidden_size, num_labels, X, y, learning_rate):
J=cost(theta1,theta2,input_size,hidden_size,num_labels,X,y,learning_rate)
J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
return J
costReg(theta1, theta2, input_size, hidden_size, num_labels, X, y_onehot, learning_rate)
反向传播
sigmoid梯度
def sigmoid_gradient(z):
return np.multiply(sigmoid(z),(1 - sigmoid(z)))
反向传播神经网络
def backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate):
#初始化
m=X.shape[0]
X=np.matrix(X)
y=np.matrix(y)
theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)],(hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
J = 0
delta1=np.zeros(theta1.shape)# (25, 401)
delta2=np.zeros(theta2.shape)# (10, 26)
#costReg函数
for i in range(m):
first_term = np.multiply(-y[i,:],np.log(h[i,:]))
second_term = np.multiply((1 - y[i,:]),np.log(1 - h[i,:]))
J += np.sum(first_term - second_term)
J = J / m
J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
#返回J
for t in range(m):
a1t=a1[t,:]# (1, 401)
z2t=z2[t,:]# (1, 25)
a2t=a2[t,:]# (1, 26)
ht=h[t,:] # (1, 10)
yt=y[t,:] # (1, 10)
d3t=ht-yt # (1, 26)
z2t=np.insert(z2t, 0, values=np.ones(1)) # (1, 26)
d2t=np.multiply(d3t*theta2,sigmoid_gradient(z2t))# (1, 26)
delta1=delta1+(d2t[:,1:]).T*a1t #去掉一维
delta2=delta2+d3t.T*a2t
delta1 = delta1 / m #循环了m次,除个m
delta2 = delta2 / m
#正则化
delta1[:,1:]=delta1[:,1:]+(theta1[:,1:] * learning_rate)/ m
delta2[:,1:]=delta2[:,1:]+(theta2[:,1:] * learning_rate)/ m
#np.concatenate数组的拼接
grad=np.concatenate((np.ravel(delta1),np.ravel(delta2)))
return J, grad
J,grad=backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate)
J,grad.shape
使用工具库计算参数最优解
from scipy.optimize import minimize
# minimize the objective function
fmin = minimize(fun=backpropReg, x0=(params), args=(input_size, hidden_size, num_labels, X, y_onehot, learning_rate),
method='TNC', jac=True, options={'maxiter': 250})
fmin
X = np.matrix(X)
theta1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
y_pred = np.array(np.argmax(h, axis=1) + 1)
y_pred
预测
correct = [1 if a == b else 0 for (a, b) in zip(y_pred, y)]
accuracy = (sum(map(int, correct)) / float(len(correct)))
print ('accuracy = {0}%'.format(accuracy * 100))