代码
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import math
def displayData(X, *example_width):
if example_width == ():
example_width = round(np.sqrt(X.shape[1]))
m, n = X.shape
rows = math.floor(np.sqrt(m)) # 设定每行显示多少张图片
cols = math.ceil(m / rows) # 设定每列显示多少张图片
fig, ax = plt.subplots(
nrows=rows, ncols=cols, sharey=True, sharex=True, figsize=(8, 8))
for row in range(rows):
for column in range(cols):
ax[row, column].matshow(
X[rows*row+column].reshape((20, 20)), cmap='gray_r')
plt.xticks([])
plt.yticks([])
plt.show()
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def predict(Theta1, Theta2, X):
m, n = X.shape
num_labels = Theta2.shape[0]
p = np.zeros((m, 1))
# Add ones to the X data matrix
X = np.hstack((np.ones((m, 1)), X)) # 注意这里的hstack两个括号
a2 = sigmoid(X.dot(Theta1.T)) # (m, 25)
a2 = np.hstack((np.ones((m, 1)), a2)) # (m, 26)
a3 = sigmoid(a2.dot(Theta2.T)) # (m, 10)
p = np.argmax(a3, axis=1)
return p
if __name__ == '__main__':
# Setup the parameters you will use for this exercise
input_layer_size = 400 # 20x20 Input Images of Digits
hidden_layer_size = 25 # 25 hidden units
num_labels = 10 # 10 labels, from 1 to 10
# (note that we have mapped '0' to label '10')
print('='*18, 'Beginning', '='*18)
# =========== Part 1: Loading and Visualizing Data =============
# We start the exercise by first loading and visualizing the dataset.
# You will be working with a dataset that contains handwritten digits.
# load Training Data
print('Loading and Visualizing Data ... ')
file = 'ex3data1.mat'
data = loadmat(file) # data 是字典类型
X = data['X']
y = data['y']
m, n = X.shape
# Randomly select 100 data points to display
index = np.random.choice(m, size=100, replace=False)
# displayData(X[index])
print('='*40)
# ================ Part 2: Loading Pameters ================
# In this part of the exercise, we load some pre-initialized
# neural network parameters.
# Load the weights into variables Theta1 and Theta2
print('Loading Saved Neural Network Parameters ...')
para = loadmat('ex3weights.mat')
Theta1 = para['Theta1'] # (25, 401)
Theta2 = para['Theta2'] # (10, 26)
print('='*40)
# ================= Part 3: Implement Predict =================
# After training the neural network, we would like to use it to predict
# the labels.
pred = predict(Theta1, Theta2, X) + 1 # 预测出的标签值为0~9,表示1~10,因此要加1修正
acc = np.mean(pred == y.flatten()) # y是(m,1),要展开为(m, )才能与pred比较
print('Training Set Accuracy: ', acc)
print('='*40)
# To give you an idea of the network's output, you can also run
# through the examples one at the a time to see what it is predicting.
# Randomly permute examples
rp = np.random.choice(m, size=10, replace=False)
for i in range(10): # 测试10次
# display
print('Displaying Example Image')
x = X[rp[i], :].reshape(1, n) # (1, 400)
pred = predict(Theta1, Theta2, x) + 1
print('Neural Network Prediction:', np.mod(pred, 10))
print('The Label is: ', y[rp[i]])
fig = plt.figure()
plt.imshow(x.reshape(20, 20), cmap='binary')
plt.show()
s = input('输入q退出,回车继续:')
if s == 'q':
break
print('='*40)
运行结果
踩到的坑
1、在定义损失函数:lrCostFunction时,theta[0]不应参与正则化。在处理时应注意:
theta = theta_t.copy(),此处如果不加copy(),结果就是将theta指向theta_t的内存单元,即共享同一个内存单元,如果修改theta:
theta[0] = 0 ,则theta_t[0]也会变成0。
2、在运行oneVsAll函数时,报错:“operands could not be broadcast together with shapes (401,5000) (401,) ”
报错位置:lrCostFunction_grad( )里的grad=...那行
报错原因:h.shape=(5000, )
解决办法:将h reshape为5000x1的
3、报错:“invalid gradient vector from minimized function”
报错位置:op.minimize()函数所在行
报错原因:在运行oneVsAll( ) 函数时,传入到 lrCostFunction( )的 theta_t 的shape发生变化,变为(401, ),应为(401, 1)
解决办法:将theta_t reshape为(401, 1) :theta_t = theta_t.reshape(theta_t.shape[0], 1)
应加深对numpy中参数广播的理解。
4、在执行优化op.minimize()时,由于是分别训练10个分类器的参数theta,在每次训练时忘记处理标签,即未把需要训练的分类器的标签置为1,其余类别置为0
导致训练出来的参数全为0。
method='TNC' 指的是用牛顿截断(truncated Newton)法优化
https://docs.scipy.org/doc/scipy/reference/optimize.minimize-tnc.html#optimize-minimize-tnc