这个项目包含了吴恩达机器学习ex3的python实现,主要知识点为多类别逻辑回归、神经网络
1.多分类
这个部分需要你实现手写数字(0到9)的识别。你需要扩展之前的逻辑回归,并将其应用于一对多的分类。
1.1读取数据
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from scipy.io import loadmat
from sklearn.metrics import classification_report
data=loadmat(r'C:\Users\xxx\Desktop\机器学习\machine-learning-ex3\machine-learning-ex3\ex3\ex3data1.mat')
data
{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
'__version__': '1.0',
'__globals__': [],
'X': array([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]),
'y': array([[10],
[10],
[10],
...,
[ 9],
[ 9],
[ 9]], dtype=uint8)}
data['X'].shape,data['y'].shape
print(data['y'])
[[10]
[10]
[10]
...
[ 9]
[ 9]
[ 9]]
1.2数据可视化
sample_idx=np.random.choice(np.arange(data['X'].shape[0]),100)
print(sample_idx)
sample_images=data['X'][sample_idx, :]
sample_images
[4104 3905 3710 3160 2934 2928 3564 2093 1751 2326 2314 3642 3020 2981
791 4597 158 2346 929 910 448 1866 2141 2041 501 4387 694 2070
3656 3075 1808 592 4226 1772 3231 2608 3786 1427 1765 4823 919 4628
3328 4337 620 1171 3258 2868 776 2603 4803 299 3174 1503 4149 620
3364 4578 2672 3010 1312 2127 2826 3632 4861 4683 1697 115 4043 869
4500 2089 4244 1806 1957 4211 1244 4427 3030 437 3805 2609 314 4338
1456 915 851 206 2436 992 2686 1482 881 3934 2274 4370 1702 572
2505 2799]
array([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]])
fig,ax_array=plt.subplots(nrows=10,ncols=10,sharey=True,sharex=True,figsize=(12,12))
for r in range(10):
for c in range(10):
#matshow 把矩阵或者数组绘制成图像的函数 参数cmap是选择颜色 ax_array[r,c].matshow(np.array(sample_images[10*r+c].reshape((20,20))).T,cmap=matplotlib.cm.binary)
plt.xticks(np.array([]))#设置刻度
plt.yticks(np.array([]))
1.3将逻辑回归向量化
定义sigmoid函数
def sigmoid(z):
return 1/(1+np.exp(-z))
定义cost函数
def cost(theta ,X,y,learningRate):
theta=np.matrix(theta)
X=np.matrix(X)
y=np.matrix(y)
first=np.multiply(-y,np.log(sigmoid(X*theta.T)))
second=np.multiply((1-y),np.log(1-sigmoid(X*theta.T)))
reg=(learningRate/(2*len(X)))*np.sum(np.power(theta[:,1:theta.shape[1]],2))
return np.sum(first-second)/len(X)+reg
向量化梯度
def gradient(theta,X,y,learningRate):
theta=np.matrix(theta)
X=np.matrix(X)
y=np.matrix(y)
parameters=int(theta.ravel().shape[1])
error=sigmoid(X*theta.T)-y
grad=((X.T*error)/len(X)).T+((learningRate/len(X))*theta)
#thet0不需要正则化
grad[0,0]=np.sum(np.multiply(error,X[:,0]))/len(X)
return np.array(grad).ravel()
# for i in range(parameters):
# term = np.multiply(error, X[:,i])
# if (i == 0):
# grad[i] = np.sum(term) / len(X)
# else:
# grad[i] = (np.sum(term) / len(X)) + ((learningRate / len(X)) * theta[:,i])
# return grad
1.4一对多分类器
定义训练模型
from scipy.optimize import minimize
def one_vs_all(X,y,num_labels,learning_rate):
rows=X.shape[0]
params=X.shape[1]
#之所以是params+1是因为之后还要插入theta0
all_theta=np.zeros((num_labels,params+1))
X=np.insert(X,0,values=np.ones(rows),axis=1)
for i in range(1,num_labels+1):
theta=np.zeros(params+1)
y_i=np.array([1 if label==i else 0 for label in y])
y_i =np.reshape(y_i,(rows,1))
fmin=minimize(fun=cost,x0=theta,args=(X,y_i,learning_rate),method='TNC',jac=gradient)
all_theta[i-1,:]=fmin.x
return all_theta
数据预处理,主要是处理y
rows=data['X'].shape[0]
params=data['X'].shape[1]
all_theta=np.zeros((10,params+1))
X=np.insert(data['X'],0,values=np.ones(rows),axis=1)
theta=np.zeros(params+1)
y_0=np.array([1 if label==0 else 0 for label in data['y'] ])
print(y_0.shape)
y_0=np.reshape(y_0,(rows,1))
X.shape,y_0.shape,theta.shape,all_theta.shape,
(5000,)
((5000, 401), (5000, 1), (401,), (10, 401))
np.unique(data['y'])
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=uint8)
利用模型训练theta
all_theta=one_vs_all(data['X'],data['y'],10,1)
all_theta
array([[-2.38318550e+00, 0.00000000e+00, 0.00000000e+00, ...,
1.30405699e-03, -6.70953714e-10, 0.00000000e+00],
[-3.18325496e+00, 0.00000000e+00, 0.00000000e+00, ...,
4.45659505e-03, -5.08109220e-04, 0.00000000e+00],
[-4.79627526e+00, 0.00000000e+00, 0.00000000e+00, ...,
-2.87789088e-05, -2.48011286e-07, 0.00000000e+00],
...,
[-7.98901828e+00, 0.00000000e+00, 0.00000000e+00, ...,
-8.94589615e-05, 7.21263539e-06, 0.00000000e+00],
[-4.57343099e+00, 0.00000000e+00, 0.00000000e+00, ...,
-1.33555921e-03, 9.98155754e-05, 0.00000000e+00],
[-5.40070238e+00, 0.00000000e+00, 0.00000000e+00, ...,
-1.16422488e-04, 7.87937989e-06, 0.00000000e+00]])
定义预测函数
def predict_all(X,all_theta):
rows=X.shape[0]
params=X.shape[1]
num_labels=all_theta.shape[0]
X=np.insert(X,0,values=np.ones(rows),axis=1)
X=np.matrix(X)
all_theta=np.matrix(all_theta)
h=sigmoid(X*all_theta.T)
h_argmax=np.argmax(h,axis=1)
print(h_argmax.shape)
h_argmax+=1
#这里+1是因为原来是从0-9
return h_argmax
(5000, 1)
预测结果
y_pred=predict_all(data['X'],all_theta)
print(classification_report(data['y'],y_pred))
precision recall f1-score support
1 0.95 0.99 0.97 500
2 0.95 0.92 0.93 500
3 0.95 0.91 0.93 500
4 0.95 0.95 0.95 500
5 0.92 0.92 0.92 500
6 0.97 0.98 0.97 500
7 0.95 0.95 0.95 500
8 0.93 0.92 0.92 500
9 0.92 0.92 0.92 500
10 0.97 0.99 0.98 500
accuracy 0.94 5000
macro avg 0.94 0.94 0.94 5000
weighted avg 0.94 0.94 0.94 5000
support表示出现的次数
2神经网络
2.1读取数据和参数
weight=loadmat(r'C:\Users\xxx\Desktop\机器学习\machine-learning-ex3\machine-learning-ex3\ex3\ex3weights.mat')
theta1,theta2 =weight['Theta1'],weight['Theta2']
theta1.shape,theta2.shape
((25, 401), (10, 26))
X2=np.matrix(np.insert(data['X'],0,values=np.ones(X.shape[0]),axis=1))
y2=np.matrix(data['y'])
X2.shape,y2.shape
((5000, 401), (5000, 1))
2.2 前馈神经网络和预测
a1=X2
z2=a1*theta1.T
z2.shape
(5000, 25)
a2=sigmoid(z2)
a2.shape
(5000, 25)
a2=np.insert(a2,0,values=np.ones(a2.shape[0]),axis=1)
z3=a2*theta2.T
z3.shape
(5000, 10)
a3=sigmoid(z3)
a3
matrix([[1.12661530e-04, 1.74127856e-03, 2.52696959e-03, ...,
4.01468105e-04, 6.48072305e-03, 9.95734012e-01],
[4.79026796e-04, 2.41495958e-03, 3.44755685e-03, ...,
2.39107046e-03, 1.97025086e-03, 9.95696931e-01],
[8.85702310e-05, 3.24266731e-03, 2.55419797e-02, ...,
6.22892325e-02, 5.49803551e-03, 9.28008397e-01],
...,
[5.17641791e-02, 3.81715020e-03, 2.96297510e-02, ...,
2.15667361e-03, 6.49826950e-01, 2.42384687e-05],
[8.30631310e-04, 6.22003774e-04, 3.14518512e-04, ...,
1.19366192e-02, 9.71410499e-01, 2.06173648e-04],
[4.81465717e-05, 4.58821829e-04, 2.15146201e-05, ...,
5.73434571e-03, 6.96288990e-01, 8.18576980e-02]])
y_pred2=np.argmax(a3,axis=1)+1
y_pred2.shape
(5000, 1)
print(classification_report(y2,y_pred))
precision recall f1-score support
1 0.95 0.99 0.97 500
2 0.95 0.92 0.93 500
3 0.95 0.91 0.93 500
4 0.95 0.95 0.95 500
5 0.92 0.92 0.92 500
6 0.97 0.98 0.97 500
7 0.95 0.95 0.95 500
8 0.93 0.92 0.92 500
9 0.92 0.92 0.92 500
10 0.97 0.99 0.98 500
accuracy 0.94 5000
macro avg 0.94 0.94 0.94 5000
weighted avg 0.94 0.94 0.94 5000
总结
- np.random.choice(arg1,arg2)函数从arg1中取出arg2个元素,返回一个一维数组,若arg2未指明,则默认值为1;
- plt.subplots函数中sharex, sharey:设置为 True 或者 ‘all’ 时,所有子图共享 x 轴或者 y 轴,设置为 False or ‘none’ 时,所有子图的 x,y 轴均为独立;
- minimize中参数method:求解的算法,选择TNC则和fmin_tnc()类似,都是有约束的多元函数问题,提供梯度信息,使用截断牛顿法;
- 宏平均 macro avg: 对每个类别的精准、召回和F1 加和求平均。
- 微平均 micro avg: 不区分样本类别,计算整体的精准、召回和F1
- 加权平均 weighted avg:是对宏平均的一种改进,考虑了每个类别样本数量在总样本中占比