文章目录
1.加载并处理数据
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat
import scipy.optimize as opt
from sklearn.metrics import classification_report
def load_data(path,transpose=True):
data=loadmat(path)
X=data['X']
y=data['y'].flatten()
if transpose:
X=np.array([row.reshape(20,20).T.flatten() for row in X])
return X,y
def serialize(theta1,theta2):
theta=np.concatenate((theta1.flatten(),theta2.flatten()))#对于一维数组拼接,axis的值不影响最后的结果
return theta
def deserialize(theta):
theta1=theta[:25*401].reshape(25,401)
theta2=theta[25*401:].reshape(10,26)
return theta1,theta2
X,y=load_data(path,transpose=False)#权重是根据原始数据(未转置的)来的
y=np.array([(y==k) for k in range(1,11)]).T # y.shape(5000,10)
X=np.insert(X,0,np.ones(X.shape[0]),axis=1) # X.shape(5000,401)
2.随机初始化
def random_init(size):
return np.random.uniform(-0.12,0.12,size)
theta_init=random_init(10285)
3.前向传播
def sigmoid(z):
return 1/(1+np.exp(-z))
def feed_forward(theta,X):
theta1,theta2=deserialize(theta)#theta1(25,401) theta2(10,26)
a1=X#(5000,401)
z2=a1@theta1.T
a2=sigmoid(z2)
a2=np.insert(a2,0,np.ones(a2.shape[0]),axis=1)
z3=a2@theta2.T
h3=sigmoid(z3)
return a1,z2,a2,z3,h3
4.代价函数
(1)无正则化代价函数
J ( Θ ) = 1 m [ ∑ i = 1 m ∑ k = 1 K − y k ( i ) l o g ( h Θ ( x ( i ) ) k ) − ( 1 − y k ( i ) ) l o g ( 1 − h Θ ( x ( i ) ) k ) ] J(\Theta)=\frac{1}{m}[\sum_{i=1}^{m}\sum_{k=1}^{K}-y_k^{(i)}log(h_{\Theta}(x^{(i)})_k)-(1-y_k^{(i)})log(1-h_{\Theta}(x^{(i)})_k)] J(Θ)=m1[i=1∑mk=1∑K−yk(i)log(hΘ(x(i))k)−(1−yk(i))log(1−hΘ(x(i))k)] K K K表示有 K K K个分类
def cost(theta,X,y):
_,_,_,_,h=feed_forward(theta,X)#h(5000,10) y(5000,10)
first=-(y*np.log(h))#数组用*表示对应元素相乘,等价于np.multiply
second=(1-y)*np.log(1-h)
return np.sum(first-second)/len(X)
cost(theta_init,X,y)
#cost:7.237525037084105
(2)正则化代价函数
J ( Θ ) = 1 m [ ∑ i = 1 m ∑ k = 1 K − y k ( i ) l o g ( h Θ ( x ( i ) ) k ) − ( 1 − y k ( i ) ) l o g ( 1 − h Θ ( x ( i ) ) k ) ] + λ 2 m ∑ l = 1 L − 1 ∑ i = 1 s l ∑ j = 1 s l + 1 ( Θ j i ( i ) ) 2 J(\Theta)=\frac{1}{m}[\sum_{i=1}^{m}\sum_{k=1}^{K}-y_k^{(i)}log(h_{\Theta}(x^{(i)})_k)-(1-y_k^{(i)})log(1-h_{\Theta}(x^{(i)})_k)]+\frac{\lambda}{2m}\sum_{l=1}^{L-1}\sum_{i=1}^{s_l}\sum_{j=1}^{s_{l+1}}(\Theta_{ji}^{(i)})^2 J(Θ)=m1[i=1∑mk=1∑K−y