吴恩达机器学习第四次作业
这次作业是利用BP神经网络完成手写数字的识别,数据集依然和上次作业相同。
这次课上完感觉有点懵逼,作业参考了一些博客,下面做一个简单记录。
先是对数据集的处理,x的处理依然是增加一列全为1的列,因为本次用到的损失函数和逻辑回归的相同,所以这里对y的处理是进行one-hot编码处理,代码如下。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat
path = 'C:/Users/ASUS/Desktop/ex3data1.mat'
data = loadmat(path)
data['X'].shape,data['y'].shape#X的形状(5000, 400),y的形状(5000, 1)
raw_x = np.insert(data['X'],0,1,axis=1) #给x插入一列1
#对y进行独热编码处理
def one_hot(y):
result = []
for i in y:
temp = np.zeros(10)
temp[i[0]-1] = 1
result.append(temp)
return result
raw_y = one_hot(data['y'])
本次的权重参数θ1形状是(25, 401),θ2的形状为(10, 26),因为后面用到的优化函数需要传入的θ是一维,所以这里先要写一个序列化权重参数函数,把权重参数变成一维。同时要有一个解序列化函数,把一维数组恢复成原有维度,代码如下。
#序列化权重参数 因为后续优化函数要求θ的输入形状为(n,)
def serialize(a,b):
return np.append(a.flatten(),b.flatten())
#theta_serialize = serialize(theta1,theta2)
#解序列化权重参数
def deserialize(theta_serialize):
theta1 = np.reshape(theta_serialize[:25*401],(25,401))
theta2 = np.reshape(theta_serialize[25*401:],(10,26))
return theta1,theta2
#theta1,theta2 = deserialize(theta_serialize )
然后完成前向传播部分,代码如下。
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def feed_forward(X,theta_serialize):
theta1,theta2 = deserialize(theta_serialize)
a1 = X
z2 = X@theta1.T
a2 = sigmoid(z2)
a2 = np.insert(a2,0,1,axis=1)
z3 = a2@theta2.T
h = sigmoid(z3)
return a1,z2,a2,z3,h
之后是损失函数部分,代码如下。这里写了无正则化的损失函数和含正则化的损失函数。
#没有正则化的损失函数
def cost(theta_serialize,X,y):
a1,z2,a2,z3,h = feed_forward(X,theta_serialize)
J = -np.sum(y*np.log(h)+(1-np.array(y))*np.log(1-h))/len(X)
return J
def cost_reg(theta_serialize,X,y,lamda):
theta1,theta2 = deserialize(theta_serialize)
sum1 = np.sum(np.power(theta1[:,1:],2)) #不惩罚偏置单元
sum2 = np.sum(np.power(theta2[:,1:],2))
reg = (sum1+sum2)*lamda/(2*len(X))
return reg + cost(theta_serialize,X,y)
下来是反向传播求梯度部分,就是这部分很懵逼。
def sigmoid_gra(z):
return sigmoid(z)*(1-sigmoid(z))
def gradient(theta_serialize,X,y):
theta1,theta2 = deserialize(theta_serialize)
a1,z2,a2,z3,h = feed_forward(X,theta_serialize)
d3 = h-y #h形状(5000,10)
d2 = d3@theta2[:,1:]*sigmoid_gra(z2) #theta2(10,26) z2(5000,25)
D3 = d3.T@a2/len(X) #(10,26)
D2 = d2.T@a1/len(X) #(25,401)
return serialize(D2,D3)
def gradient_reg(theta_serialize,X,y,lamda):
theta1,theta2 = deserialize(theta_serialize)
D = gradient(theta_serialize,X,y)
D2,D3 = deserialize(D)
D3[:,1:] = D3[:,1:] + (theta2[:,1:])*lamda/len(X)
D2[:,1:] = D2[:,1:] + (theta1[:,1:])*lamda/len(X)
return serialize(D2,D3)
最后是用优化函数进行网络的优化,第一种是不带正则化的,代码如下。
from scipy.optimize import minimize
def training(X,y):
theta0 = np.random.uniform(-0.5,0.5,10285)
res = minimize(fun = cost,
x0 = theta0,
args = (X,y),
method = 'TNC',
jac = gradient,
options = {'maxiter':300})
return res
#最大迭代次数300次
res = training(raw_x,raw_y)
_,_,_,_,h = feed_forward(raw_x,res.x)
y_pre = np.argmax(h,axis=1)+1
acc = np.mean(y_pre==data['y'].flatten()) #准确度为0.99
第二种是带正则化的,代码如下。
from scipy.optimize import minimize
def training(X,y):
theta0 = np.random.uniform(-0.5,0.5,10285)
res = minimize(fun = cost_reg,
x0 = theta0,
args = (X,y,lamda),
method = 'TNC',
jac = gradient_reg,
options = {'maxiter':300})
return res
#最大迭代次数300次
lamda = 10
res = training(raw_x,raw_y)
_,_,_,_,h = feed_forward(raw_x,res.x)
y_pre = np.argmax(h,axis=1)+1
acc = np.mean(y_pre==data['y'].flatten()) #准确度为0.94