写了好长时间的驼峰命名,最近有点恶心了,决定python用下划线,C++用驼峰。
这次作业是对手写数字的数据集进行训练。多元分类的一个任务。
参考:https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes/tree/master/code
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import matplotlib
import scipy.optimize as opt
from sklearn.metrics import classification_report
def load_data(path, transpose=True):
'''载入数据'''
data = sio.loadmat(path)
y = data.get('y')
y = y.reshape(y.shape[0])
X = data.get('X')
if transpose:
X = np.array([im.reshape((20, 20)).T for im in X])
X = np.array([im.reshape(400) for im in X])
return X, y
raw_X, raw_y = load_data('ex3data1.mat')
#def plot_image(image):
# '''随机显示一张图片'''
# fig, ax = plt.subplots(figsize=(1, 1))
# ax.matshow(image.reshape((20, 20)), cmap=matplotlib.cm.binary)
# plt.xticks(np.array([]))
# plt.yticks(np.array([]))
#pick_one = np.random.randint(0, 5000) #在(0,5000)氛围内随机取一个数
#plot_image(X[pick_one, :])
#plt.show()
#print("this should be {}".format(y[pick_one]))
#def plot_100_image(X):
# '''显示100张图片'''
# size = int(np.sqrt(X.shape[1]))
#
# sample_idx = np.random.choice(np.arange(X.shape[0]), 100)
# sample_images = X[sample_idx, :]
#
# fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(8, 8))
#
# for r in range(10):
# for c in range(10):
# ax_array[r, c].matshow(sample_images[10 * r + c].reshape((size, size)),
# cmap=matplotlib.cm.binary)
#
# plt.xticks(np.array([]))
# plt.yticks(np.array([]))
#准备数据
X = np.insert(raw_X, 0, values=np.ones(raw_X.shape[0]), axis=1) #插入第一列,全部为1
y_matrix = []
for k in range(1, 11):
y_matrix.append((raw_y == k).astype(int))
y_matrix = [y_matrix[-1]] + y_matrix[:-1]
y = np.array(y_matrix)
#print(y.shape)
#训练一维模型
def sigmoid(z):
'''激活函数'''
return 1 / (1 + np.exp(-z))
def gradient(theta, X, y):
'''一次梯度下降'''
return (1 / len(X)) * X.T @ (sigmoid(X@theta) - y)
def cost(theta, X, y):
'''代价函数'''
h = sigmoid(X@theta)
inner = y.T * np.log(h) + (1-y).T*np.log(1-h)
return - (np.sum(inner)/len(X))
#return np.mean(-y * np.log(sigmoid(X*theta.T)) - (1-y)*np.log(1-sigmoid(X*theta.T)))
def regularized_cost(theta, X, y, l=1):
'''正则化'''
theta_j1_to_n = theta[1:]
regularized_term = (1 / (2 * len(X))) * np.power(theta_j1_to_n, 2).sum()
return cost(theta, X, y) + regularized_term
def regularized_gradient(theta, X, y, l=1):
theta_j1_to_n = theta[:-1]
regularized_theta = (l / len(X)) * theta_j1_to_n
regularized_term = np.concatenate([np.array([0]), regularized_theta])
return gradient(theta, X, y) + regularized_term
def logistic_regression(X, y, l=1):
theta = np.zeros(X.shape[1])
res = opt.minimize(fun=regularized_cost,
x0=theta,
args=(X, y, l),
method='TNC',
jac=regularized_gradient,
options={'disp': True})
final_theta = res.x
return final_theta
def predict(x, theta):
'''预测函数'''
prob = sigmoid(X @ theta)
return (prob >= 0.5).astype(int)
t0 = logistic_regression(X, y[0])
#print(t0.shape)
#y_pred = predict(X, t0)
#
#print('Accuracy = {}'.format(np.mean(y[0] == y_pred)))
#训练k维模型
k_theta = np.array([logistic_regression(X, y[k]) for k in range(10)])
#print(k_theta.shape)
prob_matrix = sigmoid(X@k_theta.T)
np.set_printoptions(suppress=True)
y_pred = np.argmax(prob_matrix, axis=1)
#返回沿轴axis最大值的索引,axis=1代表行
y_answer = raw_y.copy()
y_answer[y_answer == 10] = 0
print(classification_report(y_answer, y_pred))