import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from scipy.io import loadmat
filename1=r'F:\jypternotebook\吴恩达机器学习python作业代码\code\ex3-neural network\ex3data1.mat'
ex3data1=loadmat(filename1)
X=ex3data1.get('X')
y=ex3data1.get('y')
1数据可视化
随机选取100个数字来进行可视化
import random
index=random.sample(list(range(X.shape[0])),100)
vis_data=X[index,:]
vis=np.zeros((200,200))
for i in range(10):
for j in range(10):
vis[i*20:(i+1)*20,j*20:(j+1)*20]=vis_data[i*10+j].reshape((20,20))
plt.imshow(vis)
<matplotlib.image.AxesImage at 0x2a1260a8e88>
2向量化逻辑回归
2.1向量化代价函数
J
(
θ
)
=
1
m
∑
i
=
1
m
[
−
y
(
i
)
log
(
h
θ
(
x
(
i
)
)
)
−
(
1
−
y
(
i
)
)
log
(
1
−
h
θ
(
x
(
i
)
)
)
]
J(\theta)=\frac{1}{m} \sum_{i=1}^{m}\left[-y^{(i)} \log \left(h_{\theta}\left(x^{(i)}\right)\right)-\left(1-y^{(i)}\right) \log \left(1-h_{\theta}\left(x^{(i)}\right)\right)\right]
J(θ)=m1i=1∑m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]
以上是没有正则化的代价函数,其中
h
θ
(
x
(
i
)
)
=
g
(
θ
T
x
i
)
h_{\theta} (x^{(i)})=g({\theta}^{T}x^{i})
hθ(x(i))=g(θTxi)且
g
(
z
)
=
1
1
+
e
−
z
g(z)=\frac{1}{1+e^{-z}}
g(z)=1+e−z1
定义如下
X
X
X与
θ
\theta
θ:
X
=
[
−
(
x
(
1
)
)
T
−
−
(
x
(
2
)
)
T
−
⋮
−
(
x
(
m
)
)
T
−
]
and
θ
=
[
θ
0
θ
1
⋮
θ
n
]
X=\left[\begin{array}{c} -\left(x^{(1)}\right)^{T}- \\ -\left(x^{(2)}\right)^{T}- \\ \vdots \\ -\left(x^{(m)}\right)^{T}- \end{array}\right] \text { and } \theta=\left[\begin{array}{c} \theta_{0} \\ \theta_{1} \\ \vdots \\ \theta_{n} \end{array}\right]
X=⎣⎢⎢⎢⎢⎡−(x(1))T−−(x(2))T−⋮−(x(m))T−⎦⎥⎥⎥⎥⎤ and θ=⎣⎢⎢⎢⎡θ0θ1⋮θn⎦⎥⎥⎥⎤
def sigmod(z):
return 1/(1+np.exp(-z))
def cost_vector(theta,x,y):
m,n=x.shape
first=-(y*np.log(sigmod(x.dot(theta))))
second=(1-y)*np.log(1-sigmod(x.dot(theta)))
J=np.mean(first-second)
return J
def cost_reg(theta,x,y,learningrate):
m,n=x.shape
first=-(y*np.log(sigmod(x.dot(theta.T))))
second=(1-y)*np.log(1-sigmod(x.dot(theta.T)))
J=(np.sum(first-second))/m+(learningrate/(2*m))*np.sum(theta[1:]**2)
return J
2.2向量化梯度
对每个
θ
j
\theta_j
θj来说有,
∂
J
∂
θ
j
=
1
m
∑
i
=
1
m
(
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
j
(
i
)
)
\frac{\partial J}{\partial \theta_{j}}=\frac{1}{m} \sum_{i=1}^{m}\left(\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{j}^{(i)}\right)
∂θj∂J=m1i=1∑m((hθ(x(i))−y(i))xj(i))
将其写成向量化矩阵形式如下:
[
∂
J
∂
θ
0
∂
J
∂
θ
1
∂
J
∂
θ
2
⋮
∂
J
∂
θ
n
]
=
1
m
[
∑
i
=
1
m
(
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
0
(
i
)
)
∑
i
=
1
m
(
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
1
(
i
)
)
∑
i
=
1
m
(
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
2
(
i
)
)
⋮
∑
i
=
1
m
(
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
n
(
i
)
)
]
=
1
m
∑
i
=
1
m
(
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
(
i
)
)
=
1
m
X
T
(
h
θ
(
x
)
−
y
)
\left[\begin{array}{c} \frac{\partial J}{\partial \theta_{0}} \\ \frac{\partial J}{\partial \theta_{1}} \\ \frac{\partial J}{\partial \theta_{2}} \\ \vdots \\ \frac{\partial J}{\partial \theta_{n}} \end{array}\right]=\frac{1}{m}\left[\begin{array}{c} \sum_{i=1}^{m}\left(\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{0}^{(i)}\right) \\ \sum_{i=1}^{m}\left(\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{1}^{(i)}\right) \\ \sum_{i=1}^{m}\left(\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{2}^{(i)}\right) \\ \vdots \\ \sum_{i=1}^{m}\left(\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{n}^{(i)}\right) \end{array}\right]\\ =\frac{1}{m}\sum_{i=1}^{m}\left(\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x^{(i)}\right)\\ =\frac{1}{m}X^{T}(h_{\theta}(x)-y)
⎣⎢⎢⎢⎢⎢⎢⎡∂θ0∂J∂θ1∂J∂θ2∂J⋮∂θn∂J⎦⎥⎥⎥⎥⎥⎥⎤=m1⎣⎢⎢⎢⎢⎢⎢⎢⎢⎢⎡∑i=1m((hθ(x(i))−y(i))x0(i))∑i=1m((hθ(x(i))−y(i))x1(i))∑i=1m((hθ(x(i))−y(i))x2(i))⋮∑i=1m((hθ(x(i))−y(i))xn(i))⎦⎥⎥⎥⎥⎥⎥⎥⎥⎥⎤=m1i=1∑m((hθ(x(i))−y(i))x(i))=m1XT(hθ(x)−y)
记 β = h θ ( x ) − y \beta=h_{\theta}(x)-y β=hθ(x)−y
def gradient_vector(theta,x,y):
m,n=x.shape
error=sigmod(x.dot(theta))-y
grad=(x.T).dot(error)/m
return grad
X.shape
(5000, 400)
y.shape
(5000, 1)
y=y.flatten()
y.shape
(5000,)
初始化 θ \theta θ计算初始代价函数
X = np.insert(X, 0, values=np.ones(X.shape[0]), axis=1)#插入了第一列(全部为1)
theta=np.zeros(X.shape[1])
print(cost_reg(theta,X,y,1))
160.39425758157134
grad0=gradient_vector(theta,X,y)
grad0.size
401
2.3向量正交化逻辑回归
以下为正交化代价函数
J
(
θ
)
=
1
m
∑
i
=
1
m
[
−
y
(
i
)
log
(
h
θ
(
x
(
i
)
)
)
−
(
1
−
y
(
i
)
)
log
(
1
−
h
θ
(
x
(
i
)
)
)
]
+
λ
2
m
∑
j
=
1
n
θ
j
2
J(\theta)=\frac{1}{m} \sum_{i=1}^{m}\left[-y^{(i)} \log \left(h_{\theta}\left(x^{(i)}\right)\right)-\left(1-y^{(i)}\right) \log \left(1-h_{\theta}\left(x^{(i)}\right)\right)\right]+\frac{\lambda}{2 m} \sum_{j=1}^{n} \theta_{j}^{2}
J(θ)=m1i=1∑m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]+2mλj=1∑nθj2
其中
θ
0
\theta_0
θ0是不需要正交化的
def cost_reg(theta,x,y,learningrate):
m,n=x.shape
first=-(y*np.log(sigmod(x.dot(theta))))
second=(1-y)*np.log(1-sigmod(x.dot(theta)))
J=np.sum(first-second)/m+(learningrate/2*m)*np.sum(theta[1:]**2)
return J
cost_reg(theta,X,y,1)
160.39425758157134
梯度下降法:
∂
J
(
θ
)
∂
θ
0
=
1
m
∑
i
=
1
m
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
j
(
i
)
for
j
=
0
∂
J
(
θ
)
∂
θ
j
=
(
1
m
∑
i
=
1
m
(
h
θ
(
x
(
i
)
)
−
y
(
i
)
)
x
j
(
i
)
)
+
λ
m
θ
j
for
j
≥
1
\begin{aligned} \frac{\partial J(\theta)}{\partial \theta_{0}} &=\frac{1}{m} \sum_{i=1}^{m}\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{j}^{(i)} \quad \text { for } j=0 \\ \frac{\partial J(\theta)}{\partial \theta_{j}} &=\left(\frac{1}{m} \sum_{i=1}^{m}\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{j}^{(i)}\right)+\frac{\lambda}{m} \theta_{j} \quad \text { for } j \geq 1 \end{aligned}
∂θ0∂J(θ)∂θj∂J(θ)=m1i=1∑m(hθ(x(i))−y(i))xj(i) for j=0=(m1i=1∑m(hθ(x(i))−y(i))xj(i))+mλθj for j≥1
def gradient_reg(theta,x,y,learningrate):
m,n=x.shape
error=sigmod(x.dot(theta))-y
grad=((x.T).dot(error))/m
reg=(learningrate/m)*theta
reg[0]=0
grad_reg=grad+reg
return grad_reg
gradient_reg(theta,X,y,1).shape
(401,)
2.4 一对多分类
label=np.unique(y)
label
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=uint8)
theta_label=np.zeros((label.size,X.shape[1]))
theta_label.shape#每行代表为每个类别训练的分类器对应的401个theta参数
(10, 401)
temp=theta_label[0,:]
from scipy.optimize import minimize
def one_vs_All(x,y,learningrate):
label=np.unique(y)
theta_label=np.zeros((label.size,x.shape[1]))
for i in label:
y_i=(y==i).astype('int')
theta_i=theta_label[i-1,:]
fmin = minimize(fun=cost_reg, x0=theta_i, args=(x, y_i, learningrate), method='TNC', jac=gradient_reg)
theta_label[i-1,:]=fmin.x
return theta_label
all_theat=one_vs_All(X,y,0.5)
2.5输出预测结果
def predict_all(all_theta,X):
prob_allclass=sigmod(X.dot(all_theta.T))
result_class=np.argmax(prob_allclass,axis=1)+1
return result_class
predict_class=predict_all(all_theat,X)
accuracy=np.sum(predict_class==y)
accuracy
3730
3 神经网络
filename2=r'F:\jypternotebook\吴恩达机器学习python作业代码\code\ex3-neural network\ex3weights.mat'
dict_weight=loadmat(filename2)
dict_weight
{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Tue Oct 18 14:57:02 2011',
'__version__': '1.0',
'__globals__': [],
'Theta1': array([[-2.25623899e-02, -1.05624163e-08, 2.19414684e-09, ...,
-1.30529929e-05, -5.04175101e-06, 2.80464449e-09],
[-9.83811294e-02, 7.66168682e-09, -9.75873689e-09, ...,
-5.60134007e-05, 2.00940969e-07, 3.54422854e-09],
[ 1.16156052e-01, -8.77654466e-09, 8.16037764e-09, ...,
-1.20951657e-04, -2.33669661e-06, -7.50668099e-09],
...,
[-1.83220638e-01, -8.89272060e-09, -9.81968100e-09, ...,
2.35311186e-05, -3.25484493e-06, 9.02499060e-09],
[-7.02096331e-01, 3.05178374e-10, 2.56061008e-09, ...,
-8.61759744e-04, 9.43449909e-05, 3.83761998e-09],
[-3.50933229e-01, 8.85876862e-09, -6.57515140e-10, ...,
-1.80365926e-06, -8.14464807e-06, 8.79454531e-09]]),
'Theta2': array([[-0.76100352, -1.21244498, -0.10187131, -2.36850085, -1.05778129,
-2.20823629, 0.56383834, 1.21105294, 2.21030997, 0.44456156,
-1.18244872, 1.04289112, -1.60558756, 1.30419943, 1.37175046,
1.74825095, -0.23365648, -1.52014483, 1.15324176, 0.10368082,
-0.37207719, -0.61530019, -0.1256836 , -2.27193038, -0.71836208,
-1.29690315],
[-0.61785176, 0.61559207, -1.26550639, 1.85745418, -0.91853319,
-0.05502589, -0.38589806, 1.29520853, -1.56843297, -0.97026419,
-2.18334895, -2.85033578, -2.07733086, 1.63163164, 0.3490229 ,
1.82789117, -2.44174379, -0.8563034 , -0.2982564 , -2.07947873,
-1.2933238 , 0.89982032, 0.28306578, 2.31180525, -2.46444086,
1.45656548],
[-0.68934072, -1.94538151, 2.01360618, -3.12316188, -0.2361763 ,
1.38680947, 0.90982429, -1.54774416, -0.79830896, -0.65599834,
0.7353833 , -2.58593294, 0.47210839, 0.55349499, 2.51255453,
-2.4167454 , -1.63898627, 1.2027302 , -1.20245851, -1.83445959,
-1.88013027, -0.34056098, 0.23692483, -1.06137919, 1.02759232,
-0.47690832],
[-0.67832479, 0.46299226, 0.58492321, -0.1650184 , 1.93264192,
-0.22965765, -1.84731492, 0.49011768, 1.07146054, -3.31905643,
1.54113507, 0.37371947, -0.86484681, -2.58273522, 0.97062447,
-0.51021867, -0.68427897, -1.64713607, 0.21153145, -0.27422442,
1.72599755, 1.32418658, -2.63984479, -0.08055871, -2.03510803,
-1.46123776],
[-0.59664339, -2.04481799, 2.05698407, 1.95100909, 0.17637699,
-2.16141218, -0.40394736, 1.80157532, -1.56278739, -0.25253004,
0.23586497, 0.71656699, 1.07689092, -0.35457279, -1.67743058,
-0.12939255, -0.67488849, 1.14066535, 1.32431237, 3.21158484,
-2.15888898, -2.60164082, -3.2226466 , -1.89612906, -0.87488068,
2.51038628],
[-0.87794907, 0.4344112 , -0.93161049, 0.18390778, -0.36078216,
0.61958137, 0.38624948, -2.65150343, 2.29710773, -2.08818098,
-1.86382323, 1.06057836, 0.77562146, 2.1346861 , -1.14973702,
-0.52081426, 0.99743429, -1.48309353, -2.3139424 , 0.29517333,
-0.38704879, -2.20607697, 0.30702191, -1.17646114, -1.63462966,
-0.82467661],
[-0.52746527, 1.21564288, -1.50095981, -2.03195359, -1.52366734,
-2.43732079, -2.37570311, -1.39987277, -0.88735315, -0.63278873,
1.50450176, -1.580763 , 0.58599217, -0.77540416, 0.94257331,
2.10919653, 0.54479132, 0.43773612, -1.28024228, -0.04360994,
1.4774997 , -1.13276949, -0.72846904, 0.04734716, 1.6574566 ,
1.68540944],
[-0.7490154 , -0.72249056, -3.15228173, 0.36577778, 0.19811362,
-0.73059946, 1.65263918, -2.300357 , -1.87468162, 0.98095387,
-1.58825159, 1.35434142, 2.17895331, -1.99239762, -2.00371362,
-0.388613 , -2.33992976, -2.91719062, 0.99398645, -2.70476768,
-1.27139772, 1.86091461, -1.20519404, -0.38014194, 0.7087181 ,
-2.11014003],
[-0.6665468 , 0.53601845, 1.30307573, -1.03372714, -4.03084753,
0.58173469, -2.65717902, 0.80379994, -1.09241928, 2.49910058,
0.362008 , 0.66195337, -0.92160534, -0.83123666, -2.00200952,
-2.94897501, 0.64564202, -1.10114694, 0.74510309, 0.58506717,
-1.99545251, 0.62591105, 1.80596103, -0.22309315, -1.40442136,
-2.1319153 ],
[-0.46089119, -1.43944954, -1.21809509, 0.71093011, 0.45216919,
-0.35953381, 0.62284954, -0.67005297, -0.7069138 , 0.06311351,
-1.23199074, -1.74645233, -2.71960897, -2.21437178, -1.69307505,
-0.90927394, 0.87852311, 1.18664814, -1.87041262, 0.39796295,
1.72113872, -1.36934055, 0.8580668 , -0.24779579, 1.28009118,
-1.32752042]])}
theta1=dict_weight.get('Theta1')
theta2=dict_weight.get('Theta2')
theta1.shape
(25, 401)
theta2.shape
(10, 26)
a=sigmod(X.dot(theta1.T))
a1=np.insert(a, 0, values=np.ones(a.shape[0]), axis=1)
h_allclass=sigmod(a1.dot(theta2.T))
result_class_1=np.argmax(h_allclass,axis=1)+1
accuracy_nn=np.sum(result_class_1==y)/y.size
accuracy_nn
0.9752