英文文档图片均来自原档作业pdf截图 网址https://www.coursera.org/learn/machine-learning/programming/8f3qT/linear-regression*
Neural Networks Learning
本次书写的代码是3层神经网络,即一层隐含层
1.前馈
先将上节课的前馈网络写成函数:
https://blog.csdn.net/Cowry5/article/details/80399350
从大佬的博客学习了:theta1和theta2要合并之后才能传给minimize
def serialize(a, b):
'''展开参数 https://blog.csdn.net/Cowry5/article/details/80399350'''
return np.r_[a.flatten(),b.flatten()]
def deserialize(seq):
'''提取参数 https://blog.csdn.net/Cowry5/article/details/80399350'''
return seq[:25*401].reshape(25, 401), seq[25*401:].reshape(10, 26)
def feedforwardnn(theta, X): # 前馈(X包括常数项)
theta1,theta2 = deserialize(thata)
a1 = X # 输入层
z2 = X @ theta1 # 隐含层输入
a2 = sigmoid(z2) # 隐含层输出
a2 = np.insert(a2, 0, values=1, axis=1) # 添加常数列
z3 = a2 @ theta2 # 输出层输入
a3 = sigmoid(z3) # 输出层输出
return a1, z2, a2, z3, a3
2.修改输出y
y = data['y'] # 分类标签 y.shape=(5000,1) # 注:仍保留label"10"
def y_init(y): #对输出向量进行设置
new = []
for i in y:
yy = np.zeros(np.unique(y).shape[0])
yy[i] = 1
new.append(yy)
return np.array(new) #shape=(5000, 10)
即改成这种形式:
3.代价函数:
加上正则项:
hθ(x)是输出节点组成的矩阵(k个输出节点即k维向量)
正则项也不包括每层的常数1。
def cost(theta, X, y, lam): # 正则化代价函数 (y为调整后模为1的向量组成)
a1,z2,a2,z3,a3 = feedforwardnn(theta, X)
first = np.sum(- y * np.log(a3 + 1e-5) - (1 - y) * np.log(1 - a3+ 1e-5)) # y.shape(1,m) 要转置成列向量
theta1,theta2 = deserialize(theta)
theta1[0,:] = 0
theta2[0,:] = 0
second = np.sum(np.power(theta1,2))+np.sum(np.power(theta2,2))
m = X.shape[0]-1
return first/m +second/2/m*lam
初值验证:
print(cost(theta,X,y,1))
0.38372982089246455
4.梯度函数
重点
注意矩阵的维度*
def gra_g(z):
return sigmoid(z)*(1-sigmoid(z))
def gradient(theta, X, y, lam): # 正则化梯度函数
# 常数对应的梯度不需要正则化
theta1, theta2 = deserialize(theta)
a1, z2, a2, z3, a3 = feedforwardnn(theta, X)
del3 = a3 - y # (5000,10)
del2 = del3 @ theta2[1:,:].T * gra_g(z2) #(5000,25)
D2 = a2.T @ del3 # (26,10)
D1 = a1.T @ del2 # (401,25)
theta1[0,:] = 0
theta2[0,:] = 0
m = X.shape[0] - 1
DD2 = D2/m + theta2/m*lam
DD1 = D1/m + theta1/m*lam
return serialize(DD1, DD2)
梯度算出来还需要与理论值进行比对,这边先空着后面再回来补:)**
5.随机化初值
def theta_init(a,b):
return np.random.uniform(-0.12,0.12,(a, b))
6.使用minimize函数
import random
from scipy.optimize import minimize
def NN(theta, X, y, lam):
result = minimize(fun=cost, x0=theta, args=(X, y, lam), method='TNC', jac=gradient, options={'maxiter': 400})
return result
thetainit = serialize(theta_init(401,25), theta_init(26,10))
R = NN(thetainit, X, y, lam)
print(R)
please input the lambda:1
fun: 0.8266844881847328
jac: array([-0.00322697, 0.00046125, 0.00155233, ..., -0.00089185,
0.0005899 , 0.00059701])
message: 'Converged (|f_n-f_(n-1)| ~= 0)'
nfev: 358
nit: 17
status: 1
success: True
x: array([ 0.24906532, -0.68048419, -0.47365931, ..., -1.83687721,
-2.32626506, -1.04057674])
7.预测结果
R = NN(thetainit, X, y, lam)
theta_final = R.x
def Prediction(theta_final, sample_x): #sample_x以ndarry类型
sample_x = np.insert(sample_x, 0, values=1, axis=1)
a1, z2, a2, z3, a3 = feedforwardnn(theta_final, sample_x)
predic = np.argmax(a3, axis=1) + 1
return predic # 一维
def Judgement(y1, predic): # 计算准确率
result = y1 - predic.reshape(predic.shape[0],1)
# 不为0的项(即不相等)就是判断错误的
correct = np.sum(result == 0)
return correct / y1.shape[0]
整合代码:
'''
三层神经网络
'''
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.io import loadmat
import random
import scipy.special
from scipy.optimize import minimize
def sigmoid(x):
return scipy.special.expit(x) #return 1 / (1 + exp(-inx))
def y_init(y): #对输出向量进行设置
new = []
for i in y:
yy = np.zeros(np.unique(y).shape[0])
yy[i-1] = 1 # 也就是说标签“10”在最后的位置
new.append(yy)
return np.array(new)
def serialize(a, b):
'''展开参数 https://blog.csdn.net/Cowry5/article/details/80399350'''
return np.r_[a.flatten(),b.flatten()]
def deserialize(seq):
'''提取参数 https://blog.csdn.net/Cowry5/article/details/80399350'''
return seq[:401*25].reshape(401, 25), seq[401*25:].reshape(26, 10)
def theta_init(a,b):
return np.random.uniform(-0.12,0.12,(a, b))
def feedforwardnn(theta, X): # 前馈(X包括常数项)
theta1,theta2 = deserialize(theta)
a1 = X # 输入层
z2 = X @ theta1 # 隐含层输入
a2 = sigmoid(z2) # 隐含层输出
a2 = np.insert(a2, 0, values=1, axis=1) # 添加常数列
z3 = a2 @ theta2 # 输出层输入
a3 = sigmoid(z3) # 输出层输出
return a1, z2, a2, z3, a3
def cost(theta, X, y, lam): # 正则化代价函数 (y为调整后模为1的向量组成)
a1,z2,a2,z3,a3 = feedforwardnn(theta, X)
first = np.sum(- y * np.log(a3 + 1e-5) - (1 - y) * np.log(1 - a3+ 1e-5)) # y.shape(1,m) 要转置成列向量
theta1,theta2 = deserialize(theta)
theta1[0,:] = 0
theta2[0,:] = 0
second = np.sum(np.power(theta1,2))+np.sum(np.power(theta2,2))
m = X.shape[0]-1
return first/m +second/2/m*lam
def gra_g(z):
return sigmoid(z)*(1-sigmoid(z))
def gradient(theta, X, y, lam): # 正则化梯度函数
# 常数对应的梯度不需要正则化
theta1, theta2 = deserialize(theta)
a1, z2, a2, z3, a3 = feedforwardnn(theta, X)
del3 = a3 - y # (5000,10)
del2 = del3 @ theta2[1:,:].T * gra_g(z2) #(5000,25)
D2 = a2.T @ del3 # (26,10)
D1 = a1.T @ del2 # (401,25)
theta1[0,:] = 0
theta2[0,:] = 0
m = X.shape[0] - 1
DD2 = D2/m + theta2/m*lam
DD1 = D1/m + theta1/m*lam
return serialize(DD1, DD2)
def NN(theta, X, y, lam):
result = minimize(fun=cost, x0=theta, args=(X, y, lam), method='TNC', jac=gradient, options={'maxiter': 400})
return result
def Prediction(theta_final, sample_x): #sample_x以ndarry类型
sample_x = np.insert(sample_x, 0, values=1, axis=1)
a1, z2, a2, z3, a3 = feedforwardnn(theta_final, sample_x)
predic = np.argmax(a3, axis=1) + 1
return predic # 一维
def Judgement(y1, predic): # 计算准确率
result = y1 - predic.reshape(predic.shape[0],1)
# 不为0的项(即不相等)就是判断错误的
correct = np.sum(result == 0)
return correct / y1.shape[0]
def PrintRandom100ImageWithPredict(sample_x, predic, percent): # 随机打印其中100张图片并标注预测结果
# 创建10行10列子图
fig = plt.figure(figsize=(6,6))
fig.suptitle('Prediction with the precision rate :'+ str(percent*100) +'%') #总标题
lis = random.sample(range(5000), 100) # 随机选取100个图片的序号,即X的行号
font = {'family': 'Times New Roman', 'weight': 'normal', 'size': 8} # 坐标轴字体
for i in range(100):
ax = fig.add_subplot(10, 10, i+1)
img = sample_x[lis[i], :].reshape((20, 20))
plt.imshow(img, cmap=plt.cm.gray)
plt.xticks([])
plt.yticks([])
plt.xlabel('this is:' + str(int(predic[lis[i]])), font)
# plt.subplots_adjust(wspace=0, hspace=-0.05) # 如果要去除图片之间的间隙
plt.tight_layout() # 调整子图间距
plt.show()
path = r'D:\Ninachen\wg_machinelearning\machine-learning-ex4\ex4\ex4data1.mat'
data = loadmat(path) # dict
X1 = data['X'] # ndarray 训练集
X = np.insert(X1, 0, values=1, axis=1) # 添加常数列
y1 = data['y'] # 分类标签 y1.shape=(5000,1)
# y[np.where(y==10)] = 0 # 将10改成0
y = y_init(y1) # 转化为向量
# path2 = r'D:\Ninachen\wg_machinelearning\machine-learning-ex4\ex4\ex4weights.mat'
# data2 = loadmat(path2) # dict
# theta1 = data2['Theta1'].T
# theta2 = data2['Theta2'].T
# theta = serialize(theta1, theta2)
thetainit = serialize(theta_init(401,25), theta_init(26,10))
lam =float(input('please input the lambda:'))
R = NN(thetainit, X, y, lam)
theta_final = R.x
sample_x = X1
predict = Prediction(theta_final, sample_x)
percent = Judgement(y1, predict) # 准确率
predict[np.where(predict==10)] = 0 # 将10写成0
PrintRandom100ImageWithPredict(sample_x, predict, percent)
结果:
λ=1时:
初值不同,准确率略有不同。