import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat
data = loadmat('ex3data1.mat')
data
data['X'].shape, data['y'].shape
defsigmoid(z):return1/(1+ np.exp(-z))
defcost(theta, X, y, learningRate):
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
first = np.multiply(-y, np.log(sigmoid(X * theta.T)))
second = np.multiply((1- y), np.log(1- sigmoid(X * theta.T)))
reg =(learningRate /(2*len(X)))* np.sum(np.power(theta[:,1:theta.shape[1]],2))return np.sum(first - second)/len(X)+ reg
#以下是原始代码是使用for循环的梯度函数:defgradient_with_loop(theta, X, y, learningRate):
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
parameters =int(theta.ravel().shape[1])
grad = np.zeros(parameters)
error = sigmoid(X * theta.T)- y
for i inrange(parameters):
term = np.multiply(error, X[:,i])if(i ==0):
grad[i]= np.sum(term)/len(X)else:
grad[i]=(np.sum(term)/len(X))+((learningRate /len(X))* theta[:,i])return grad
#向量化的梯度函数defgradient(theta, X, y, learningRate):
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
parameters =int(theta.ravel().shape[1])
error = sigmoid(X * theta.T)- y
grad =((X.T * error)/len(X)).T +((learningRate /len(X))* theta)# intercept gradient is not regularized
grad[0,0]= np.sum(np.multiply(error, X[:,0]))/len(X)return np.array(grad).ravel()
#现在是构建分类器的时候了。 对于这个任务,我们有10个可能的类,并且由于逻辑回归只能一次在2个类之间进行分类,我们需要多类分类的策略。 在本练习中,我们的任务是实现一对一全分类方法,其中具有k个不同类的标签就有k个分类器,每个分类器在“类别 i”和“不是 i”之间决定。 我们将把分类器训练包含在一个函数中,该函数计算10个分类器中的每个分类器的最终权重,并将权重返回为k X(n + 1)数组,其中n是参数数量。from scipy.optimize import minimize
defone_vs_all(X, y, num_labels, learning_rate):
rows = X.shape[0]
params = X.shape[1]# k X (n + 1) array for the parameters of each of the k classifiers
all_theta = np.zeros((num_labels, params +1))# insert a column of ones at the beginning for the intercept term
X = np.insert(X,0, values=np.ones(rows), axis=1)# labels are 1-indexed instead of 0-indexedfor i inrange(1, num_labels +1):
theta = np.zeros(params +1)
y_i = np.array([1if label == i else0for label in y])
y_i = np.reshape(y_i,(rows,1))# minimize the objective function
fmin = minimize(fun=cost, x0=theta, args=(X, y_i, learning_rate), method='TNC', jac=gradient)
all_theta[i-1,:]= fmin.x
return all_theta
#我们现在准备好最后一步 - 使用训练完毕的分类器预测每个图像的标签。 对于这一步,我们将计算每个类的类概率,对于每个训练样本(使用当然的向量化代码),并将输出类标签为具有最高概率的类。defpredict_all(X, all_theta):
rows = X.shape[0]
params = X.shape[1]
num_labels = all_theta.shape[0]# same as before, insert ones to match the shape
X = np.insert(X,0, values=np.ones(rows), axis=1)# convert to matrices
X = np.matrix(X)
all_theta = np.matrix(all_theta)# compute the class probability for each class on each training instance
h = sigmoid(X * all_theta.T)# create array of the index with the maximum probability
h_argmax = np.argmax(h, axis=1)# because our array was zero-indexed we need to add one for the true label prediction
h_argmax = h_argmax +1return h_argmax
#现在我们可以使用predict_all函数为每个实例生成类预测,看看我们的分类器是如何工作的。
y_pred = predict_all(data['X'], all_theta)
correct =[1if a == b else0for(a, b)inzip(y_pred, data['y'])]
accuracy =(sum(map(int, correct))/float(len(correct)))print('accuracy = {0}%'.format(accuracy *100))