import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import scipy.optimize as opt
from sklearn.metrics import classification_report
path = 'C:/Users/liang/Desktop/Coursera-ML-AndrewNg-Notes/ex4data1.mat'
def load_mat(path):
data = loadmat(path)
X = data['X']
y = data['y'].flatten()
return X,y
def plot_100_image(X):
index = np.random.choice(range(5000),100)
images = X[index]
fig, ax_array = plt.subplots(10,10,sharey=True,sharex=True,figsize=(8,8))
for r in range(10):
for c in range(10):
ax_array[r, c].matshow(images[r*10 + c].reshape(20,20), cmap='gray_r')
plt.xticks([])
plt.yticks([])
plt.show()
X,y = load_mat(path)
plot_100_image(X)
def expend_y(y):
result = [];
for i in y:
y_array = np.zeros(10)
y_array[i-1] = 1
result.append(y_array)
return np.array(result)
raw_X, raw_y = load_mat(path)
X = np.insert(raw_X, 0, 1, axis=1)
y = expend_y(raw_y)
X.shape, y.shape
def load_weight(path):
data = loadmat(path)
return data['Theta1'], data['Theta2']
path2 = 'C:/Users/liang/Desktop/Coursera-ML-AndrewNg-Notes/ex4weights.mat'
t1, t2 = load_weight(path2)
t1.shape, t2.shape
def serialize(a, b):
return np.r_[a.flatten(),b.flatten()]
theta = serialize(t1,t2)
theta.shape
def deserialize(seq):
return seq[:25*401].reshape(25, 401), seq[25*401:].reshape(10, 26)
def sigmoid(z):
return 1/(1+np.exp(z))
def feed_forward(theta,X):
t1, t2 = deserialize(theta)
a1 = X
z2 = a1 @ t1.T
a2 = np.insert(sigmoid(z2), 0, 1, axis=1)
z3 = a2 @ t2.T
a3 = sigmoid(z3)
return a1, z2, a2, z3, a3
a1, z2, a2, z3, h = feed_forward(theta, X)
def cost(theta, X, y):
a1, z2, a2, z3, h = feed_forward(theta, X)
J = 0
for i in range(len(X)):
first = - y[i] * np.log(h[i])
second = (1 - y[i]) * np.log(1 - h[i])
J = J + np.sum(first - second)
J = J / len(X)
return J
cost(theta, X, y)
def regularized_cost(theta, X, y, l=1):
'''正则化时忽略每层的偏置项,也就是参数矩阵的第一列'''
t1, t2 = deserialize(theta)
reg = np.sum(t1[:,1:] ** 2) + np.sum(t2[:,1:] ** 2)
return l / (2 * len(X)) * reg + cost(theta, X, y)
regularized_cost(theta, X, y, 1)
def sigmoid_gradient(z):
return sigmoid(z) * (1 - sigmoid(z))
def random_init(size):
'''从服从的均匀分布的范围中随机返回size大小的值'''
return np.random.uniform(-0.12, 0.12, size)
print('a1', a1.shape,'t1', t1.shape)
print('z2', z2.shape)
print('a2', a2.shape, 't2', t2.shape)
print('z3', z3.shape)
print('a3', h.shape)
def gradient(theta, X, y):
'''
unregularized gradient, notice no d1 since the input layer has no error
return 所有参数theta的梯度,故梯度D(i)和参数theta(i)同shape,重要。
'''
t1, t2 = deserialize(theta)
a1, z2, a2, z3, h = feed_forward(theta, X)
d3 = h - y
d2 = d3 @ t2[:,1:] * sigmoid_gradient(z2)
D2 = d3.T @ a2
D1 = d2.T @ a1
D = (1 / len(X)) * serialize(D1, D2)
return D
print('If your backpropagation implementation is correct,\nthe relative difference will be smaller than 10e-9 (assume epsilon=0.0001).\nRelative Difference: {}\n'.format(diff))
gradient_checking(theta, X, y, 0.0001)
def regularized_gradient(theta, X, y, l=1):
"""不惩罚偏置单元的参数"""
a1, z2, a2, z3, h = feed_forward(theta, X)
D1, D2 = deserialize(gradient(theta, X, y))
t1[:,0] = 0
t2[:,0] = 0
reg_D1 = D1 + (l / len(X)) * t1
reg_D2 = D2 + (l / len(X)) * t2
return serialize(reg_D1, reg_D2)
def nn_training(X, y):
init_theta = random_init(10285)
res = opt.minimize(fun=regularized_cost,
x0=init_theta,
args=(X, y, 1),
method='TNC',
jac=regularized_gradient,
options={'maxiter': 400})
return res
res = nn_training(X, y)
res
def accuracy(theta, X, y):
_, _, _, _, h = feed_forward(res.x, X)
y_pred = np.argmax(h, axis=1) + 1
print(classification_report(y, y_pred))
accuracy(res.x, X, raw_y)
def plot_hidden(theta):
t1, _ = deserialize(theta)
t1 = t1[:, 1:]
fig,ax_array = plt.subplots(5, 5, sharex=True, sharey=True, figsize=(6,6))
for r in range(5):
for c in range(5):
ax_array[r, c].matshow(t1[r * 5 + c].reshape(20, 20), cmap='gray_r')
plt.xticks([])
plt.yticks([])
plt.show()
plot_hidden(res.x)