3.0 逻辑回归
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io as sio
from scipy.optimize import minimize
data=sio.loadmat('ex3data1.mat')
raw_X=data['X']
raw_Y=data['y']
print(raw_Y)
def plot_100_image(X):
sample_index=np.random.choice(len(X),100)
images=X[sample_index,:]
fig,ax=plt.subplots(ncols=10,nrows=10,figsize=(8,8),sharex=True,sharey=True)
for r in range(10):
for c in range(10):
ax[r,c].imshow(images[10*r+c].reshape(20,20).T,cmap='gray_r')
plt.xticks([])
plt.yticks([])
plt.show()
def sigmoid(z):
return 1/(1+np.exp(-z))
def computeCost(theta,X,y,lr):
first=np.multiply(y,np.log(sigmoid(X@theta)))
second=np.multiply((1-y),np.log(1-sigmoid(X@theta)))
reg=np.sum(np.power(theta[1:],2))*(lr/(2*len(X)))
return -(np.sum(first+second)/(len(X)))+reg
def gradient_reg(theta,X,y,lamda):
reg=theta[1:]*(lamda/len(X))
reg=np.insert(reg,0,values=0,axis=0)
first=(X.T@(sigmoid(X@theta)-y))/len(X)
return first+reg
X=np.insert(raw_X,0,values=1,axis=1)
y=raw_Y.flatten()
def one_vs_all(X,y,lamda,K):
n=X.shape[1]
theta_all=np.zeros((K,n))
for i in range(1,K+1):
theta_i=np.zeros(n,)
res=minimize(fun=computeCost,x0=theta_i,args=(X,y==i,lamda),method='TNC',jac=gradient_reg)
theta_all[i-1,:]=res.x
return theta_all
lamda=1
K=10
theta_final=one_vs_all(X,y,lamda,K)
print(theta_final)
def predict(X,theta_final):
h=sigmoid(X@theta_final.T)
h_argmax=np.argmax(h,axis=1)
return h_argmax+1
y_pred=predict(X,theta_final)
acc=np.mean(y_pred==y)
print(acc)
3.1 使用神经网络
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io as sio
from scipy.optimize import minimize
data=sio.loadmat('ex3data1.mat')
raw_X=data['X']
raw_Y=data['y']
X=np.insert(raw_X,0,values=1,axis=1)
y=raw_Y.flatten()
theta=sio.loadmat('ex3weights.mat')
theta1=theta['Theta1']
theta2=theta['Theta2']
def sigmoid(z):
return 1/(1+np.exp(-z))
a1=X
z2=a1@theta1.T
a2=sigmoid(z2)
a2=np.insert(a2,0,values=1,axis=1)
z3=a2@theta2.T
a3=sigmoid(z3)
y_pred=np.argmax(a3,axis=1)
y_pred=y_pred+1
acc=np.mean(y_pred==y)
print(acc)
4.0 神经网络反向传播
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io as sio
from scipy.optimize import minimize
data=sio.loadmat('ex4data1.mat')
raw_X=data['X']
raw_Y=data['y']
X=np.insert(raw_X,0,values=1,axis=1)
def one_hot_encoder(raw_y):
result=[]
for i in raw_y:
y_temp=np.zeros(10)
y_temp[i-1]=1
result.append(y_temp)
return np.array(result)
y=one_hot_encoder(raw_Y)
theta=sio.loadmat('ex4weights.mat')
theta1,theta2=theta['Theta1'],theta['Theta2']
def serialize(a,b):
return np.append(a.flatten(),b.flatten())
theta_serialize=serialize(theta1,theta2)
def deserialize(theta_serialize):
theta1=theta_serialize[:25*401].reshape(25,401)
theta2=theta_serialize[25*401:].reshape(10,26)
return theta1,theta2
def sigmoid(z):
return 1/(1+np.exp(-z))
def feed_forward(theta_serialize,X):
theta1, theta2 = deserialize(theta_serialize)
a1 = X
z2 = a1 @ theta1.T
a2 = sigmoid(z2)
a2 = np.insert(a2, 0, values=1, axis=1)
z3 = a2 @ theta2.T
h = sigmoid(z3)
return a1,z2,a2,z3,h
def cost(theta_serialize,X,y):
a1,z2,a2,z3,h=feed_forward(theta_serialize,X)
J=-np.sum(y*np.log(h)+(1-y)*np.log(1-h))/len(X)
return J
def reg_cost(theta_serialize,X,y,lamda):
sum1=np.sum(np.power(theta1[:,1:],2))
sum2=np.sum(np.power(theta2[:,1:],2))
reg=(sum1+sum2)*lamda/(2*len(X))
return reg+cost(theta_serialize,X,y)
def sigmoid_gradient(z):
return sigmoid(z)*(1-sigmoid(z))
def gradient(theta_serialize,X,y):
theta1,theta2=deserialize(theta_serialize)
a1,z2,a2,z3,h=feed_forward(theta_serialize,X)
d3=h-y
d2=d3@theta2[:,1:]*sigmoid_gradient(z2)
D2=(d3.T@a2)/len(X)
D1=(d2.T@a1)/len(X)
return serialize(D1,D2)
def reg_gradient(theta_serialize,X,y,lamda):
D=gradient(theta_serialize,X,y)
D1,D2=deserialize(D)
theta1,theta2=deserialize(theta_serialize)
D1[:,1:]=D1[:,1:]+theta1[:,1:]*lamda/len(X)
D2[:,1:]=D2[:,1:]+theta2[:,1:]*lamda/len(X)
return serialize(D1,D2)
lamda =10
def nn_training(X, y):
init_theta = np.random.uniform(-0.5, 0.5, 10285)
res = minimize(fun=reg_cost, x0=init_theta, args=(X, y, lamda), method='TNC', jac=reg_gradient,
options={'maxiter': 300})
return res
res = nn_training(X, y)
raw_Y = data['y'].reshape(5000, )
_, _, _, _, h = feed_forward(res.x, X)
y_pred = np.argmax(h, axis=1) + 1
acc = np.mean(y_pred == raw_Y)
print(acc)
def plot_hidden_layer(theta):
theta1,_=deserialize(theta)
hidden_layer=theta1[:,1:]
fig,ax=plt.subplots(ncols=5,nrows=5,figsize=(8,8),sharex=True,sharey=True)
for r in range(5):
for c in range(5):
ax[r,c].imshow(hidden_layer[5*r+c].reshape(20,20).T,cmap='gray_r')
plt.xticks([])
plt.yticks([])
plt.show()
plot_hidden_layer(res.x)