这个部分需要你实现手写数字(0到9)的识别。你需要扩展之前的逻辑回归,并将其应用于一对多的分类。
ex3-神经网络-前向传播
import numpy as np
import scipy.io as sio
data = sio.loadmat('ex3data1.mat')
raw_X = data['X']
raw_y = data['y']
X = np.insert(raw_X,0,values=1,axis=1)
X.shape
# (5000,401)
y = raw_y.flatten()
y.shape
#(5000,)
theta = sio.loadmat('ex3weights.mat')
theta.keys()
#dict_keys(['__header__', '__version__', '__globals__', 'Theta1', 'Theta2'])
theta1 = theta['Theta1']
theta2 = theta['Theta2']
theta1.shape,theta2.shape
#((25, 401), (10, 26))
def sigmoid(z):
return 1/(1 + np.exp(-z))
a1 = X
z2 = X @ theta1.T
a2 = sigmoid(z2)
a2.shape
a2 = np.insert(a2,0,values=1,axis=1)
z3 = a2 @ theta2.T
a3 = sigmoid(z3)
y_pred = np.argmax(a3,axis=1)
y_pred =y_pred+1
acc = np.mean(y_pred==y)
acc
# 0.9752
ex3-逻辑回归解决分类问题
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.io as sio
data = sio.loadmat('ex3data1.mat')
raw_X = data['X']
raw_y = data['y']
显示其中的一张图片
def plot_an_image(X):
pick_one = np.random.randint(5000)
image = X[pick_one,:]
fig,ax = plt.subplots(figsize=(1,1))
ax.imshow(image.reshape(20,20),cmap = 'gray_r')
plt.xticks([]) #无刻度
plt.yticks([])
plot_an_image(raw_X)
随机显示100张图片
def plot_100_image(X):
sample_index = np.random.choice(len(X),100)
images = X[sample_index,:]
print(images.shape)
fig,ax = plt.subplots(ncols=10,nrows=10,figsize=(8,8),sharex=True,sharey=True)
for r in range(10):
for c in range(10):
ax[r,c].imshow(images[10*r+c].reshape(20,20),cmap='gray_r')
plt.xticks([]) #无刻度
plt.yticks([])
plt.show()
plot_100_image(raw_X)
def sigmoid(z):
return 1/(1 + np.exp(-z))
def costFunction(theta,X,y,lamda):
A = sigmoid(X @ theta)
first = y * np.log(A)
second = (1-y) * np.log(1-A)
#reg = np.sum(np.power(theta[1:],2))*(lamda/(2 * len(X)))
reg = theta[1:] @ theta[1:]*(lamda/(2 * len(X)))
return -np.sum(first+second)/len(X) + reg
def gradient_reg(theta,X,y,lamda):
reg = theta[1:]*(lamda/len(X))
reg = np.insert(reg,0,values=0,axis=0)
first = (X.T @ (sigmoid(X @ theta)-y))/len(X)
return first + reg
X = np.insert(raw_X,0,values=1,axis=1)
y = raw_y.flatten()
from scipy.optimize import minimize
def one_vs_all(X,y,lamda,K):
n = X.shape[1]
theta_all = np.zeros((K,n))
for i in range(1,K+1):
theta_i = np.zeros(n,)
res = minimize(fun=costFunction,
x0 = theta_i,
args=(X,y==i,lamda),
method='TNC',
jac = gradient_reg)
theta_all[i-1,:] = res.x
return theta_all
lamda = 1
K = 10
theta_final = one_vs_all(X,y,lamda,K)
def predict(X,theta_final):
h = sigmoid(X @ theta_final.T) #(5000,401)(10,401)=>(5000,10)
h_argmax = np.argmax(h,axis=1)
return h_argmax+1
y_pred = predict(X,theta_final)
acc = np.mean(y_pred==y)
acc
# 0.9446