吴恩达机器学习课后习题ex4神经网络（python实现）

最新推荐文章于 2022-09-10 17:22:34 发布

flowerfu

最新推荐文章于 2022-09-10 17:22:34 发布

阅读量1.1k

点赞数

文章标签：神经网络 python numpy 深度学习机器学习

本文链接：https://blog.csdn.net/weixin_41509677/article/details/105228042

版权

神经网络

神经网络
参考

神经网络

在这部分，我们要完成神经网络的反向传播。

#前面和之前一样
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat
import types
data= loadmat("ex4data1.mat") #mat格式转换为dict字典
raw_X=data['X']
raw_y=data['y']
X=np.insert(raw_X,0,1,axis=1)
X.shape  #(5000,401)
theta=loadmat("ex4weights.mat")
theta1=theta['Theta1']  #(25,401)
theta2=theta['Theta2']   #(10,26)

为了训练神经网络，对y预处理进行one-hot编码，比如有y[0]=10,转换为y[0]=[0,0,0,0,0,0,0,0,0,1],所以y也变成了(5000,10)

def one_hot(y):
  result=[]
  for i in y:
    tmp=np.zeros(10)
    tmp[i-1]=1
    result.append(tmp)
    #注意这里需要使用np.array！！！将list转换为ndarray格式
  return np.array(result)

y=one_hot(raw_y)
y.shape #(5000,10)

之所以这么做，而不直接使用 $\theta1$ 和 $\theta2$ 是因为minimize函数要求 $t h e t a$ 是(n,)形式

#序列化
def serialize(theta1,theta2):
  return np.append(theta1.flatten(),theta2.flatten())
  
serialize(theta1,theta2) #(10285,)
#接序列化
def deserialize(theta_seralize):
  theta1=theta_seralize[:25*401].reshape(25,401)
  theta2=theta_seralize[25*401:].reshape(10,26)
  return theta1,theta2

def sigmoid(z):
  return 1/(1+np.exp(-z))

def feed_forward(theta_seralize，x):
  #(25,401)  (10,26)
  theta1,theta2=deserialize(theta_seralize)
  a1=x #(5000,401)
  z2=x@theta1.T  #(5000,25)
  a2=sigmoid(z2)
  a2=np.insert(a2,0,1,axis=1) #a2 (5000,26)
  z3=a2@theta2.T   
  h=sigmoid(z3)  #h(5000,10)
  return a1,z2,a2,z3,h

代价函数计算
在这里插入图片描述
因为我们已经把 $\sum_{k=1}^K y_k$ 转换成（5000，10）的向量了，所以计算和以前一样

#不带正则化的代价函数和之前一样
def cost(theta_seralize,x,y):
  _,_,_,_,h=feed_forward(theta_seralize，x)  
  first=y*np.log(h)  #（5000,10）*(5000,10)
  second=(1-y)*np.log(1-h)
  J=-np.sum(first+second)/len(x)
  return J
 #其中theta第一列不做正则化
def reg_cost(theta_seralize,x,y,lambd):
  J=cost(theta_seralize,x,y)
  reg1=np.sum(np.power(theta1[:,1:],2))
  reg2=np.sum(np.power(theta2[:,1:],2))
  reg=(reg1+reg2)/(lambd/(2*len(x)))
  return J+reg

在这里插入图片描述

#计算梯度
def gra_sigmoid(z):
  return sigmoid(z)*(1-sigmoid(z))
def gradient(theta_seralize,x,y):
#theta1 (25,401) theta2 (10,26)
  theta1,theta2=deserialize(theta_seralize)
  a1,z2,a2,z3,h=feed_forward(theta_seralize，x)
  #a2(5000,26) a1(5000,401)  z2(5000,25)
  d3=h-y  #(5000,10)
  #偏置项不做梯度计算
  d2=d3@theta2[:,1:]*gra_sigmoid(z2)
  D2=(d3.T@a2)/len(x)  #(10,26)
  D1=(d2.T@a1)/len(x)  #(25,401)
  #return D1,D2不行
  return serialize(D1,D2)

def reg_gradient(theta_seralize,x,y,lambd):
  D=gradient(theta_seralize,x,y)
  D1,D2=deserialize(D)
  D1[:,1:]=D1[:,1:]+(lambd/len(x))*theta1[:,1:]
  D2[:,1:]=D2[:,1:]+(lambd/len(x))*theta2[:,1:]
  return serialize(D1,D2)

from scipy.optimize import minimize
def train(x,y,lambd):
  #随机初始化
  init_theta=np.random.uniform(-0.5,0.5,10285)
  res=minimize(fun=reg_cost,x0=init_theta,args=(x,y,lambd),method='TNC',jac=reg_gradient,options={'maxiter':300})
  return res

res=train(X,y,1)
_,_,_,_,h=feed_forward(res.x，X)
y_pre=np.argmax(h,axis=1)+1
acc=np.mean(y_pre==raw_y)

隐藏层可视化

def vis_hidden_layer(theta):
  theta1,_=deserialize(theta)
  hidden_layer=theta1[:,1:] #去掉偏置项 (25,400)
  #5行5列，每个图片像素20*20
  fig,ax=plt.subplots(ncols=5,nrows=5,figsize=(8,8))
  for i in range(5):
    for j in range(5):
      ax[i,j].imshow(hidden_layer[i*5+j].reshape(20,20).T,cmap='gray_r')
   plt.xticks([])
   plt.yticks([])
   plt.show()

plot_hidden_layer(res.x)

越黑的点表示对数字识别的权重越大
在这里插入图片描述

参考

1、numpy.random模块用法总结

https://www.cnblogs.com/JetReily/p/9398148.html
np.random.uniform(low,high,size) 生出size个符合均分布的浮点数,取值范围为[low, high),默认取值范围为[0, 1.0)
np.random.rand(d0, d1, …, dn) 生成一个(d0, d1, …, dn)维的数组，数组的元素取自[0, 1)上的均分布
np.random.randint(low, high=None, size=None) 生成size个整数，取值区间为[low, high)
np.random.random(size=None) 产生[0.0, 1.0)之间的浮点数

2、ravel()与flatten()函数区别