python实现吴恩达机器学习练习3(多元分类器和神经网络)

Programming Exercise 3:

Multi-class Classification and Neural Networks

吴恩达机器学习教程练习3,练习数据是5000个手写数字(0-9)图片,每个图片分辨率为20*20像素,本次练习有两个任务:(1)用课程数据建立一个基于逻辑回归的多元分类器模型,对应0-9数字。(2)用课程给的神经网络参数进行前向传播运算。

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.io as io
import scipy.misc
import scipy.optimize as opt

1 Multi-class Classification

1.1 dataset

data = io.loadmat('D:/python/practise/sample/machine-learning-ex3/data/ex3data1.mat')
X, y = data['X'], data['y']
X = np.insert(X, 0, 1, axis = 1) # 加入x0 = 1
print('X shape : {}'.format(X.shape))
print('y shape : {}'.format(y.shape))
X shape : (5000, 401)
y shape : (5000, 1)
np.unique(y)
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=uint8)

1.2 visualizing the data

#先显示一个数
def show_1_number(num):
    testImgarr = X[num,1:].reshape(20, 20).T
    testImgPIL = scipy.misc.toimage(testImgarr)
    plt.figure(figsize = (3, 3))
    plt.imshow(testImgPIL)
show_1_number(1251)

在这里插入图片描述

参考文章:https://blog.csdn.net/Cowry5/article/details/80367832

# 完全参照Cowry5师傅的方法,自己实在懒得写了
def plot_100_image(X): #随机画100个数字
    sample_idx = np.random.choice(np.arange(X.shape[0]), 100)  # 随机选100个样本
    sample_images = X[sample_idx, :]  # (100,400)
    
    fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(8, 8))

    for row in range(10):
        for column in range(10):
            ax_array[row, column].matshow(sample_images[10 * row + column].reshape((20, 20)).T, cmap='gray_r')
    plt.xticks([])
    plt.yticks([])        
    plt.show()
plot_100_image(X[:, 1:])

在这里插入图片描述

1.3 Vectorizing Logistic Regression

1.3.3 vectorizing regularized logistic regression

sigmoid函数的两种方法

def sigmoid(z):
    sigmoid = 1 / (1 + np.exp(-z))
    return sigmoid
from scipy.special import expit
def sigmoid_2(z):
    return expit(z)

逻辑回归cost function: J ( θ ) = − 1 m [ ∑ i = 1 m y ( i ) ln ⁡ h θ ( x ( i ) ) + ( 1 − y ( i ) ) ln ⁡ ( 1 − h θ ( x ( i ) ) ) ] + λ 2 m ∑ j = 1 n θ j 2 J(\theta)=-\frac{1}{m}[\sum^m_{i=1}y^{(i)}\ln{h_{\theta}}(x^{(i)})+(1-y^{(i)})\ln{(1-h_{\theta}(x^{(i)})})]+\frac{\lambda}{2m}\sum^n_{j=1}\theta_j^2 J(θ)=m1[i=1my(i)lnhθ(x(i))+(1y(i))ln(1hθ(x(i)))]+2mλj=1nθj2

def J_function(theta, X, y):
    cost = -y * np.log(sigmoid(X.dot(theta.T))) - (1-y) * np.log(1-sigmoid(X.dot(theta.T)))
    J = cost.mean()
    return J
def J_function_reg(theta, X, y, c=1):
    _theta = theta[1:]
    reg = (c/2*len(X)) * (_theta.dot(_theta.T))
    return J_function(theta, X, y) + reg

改变梯度下降公式Gradient descent(因为 θ 0 \theta_0 θ0不需要正则化,所以): r e p e a t { repeat\{ repeat{ θ 0 : = θ 0 − α 1 m ∑ i = 1 m [ h θ ( x ( i ) ) − y ( i ) ] x 0 ( i ) \theta_0 := \theta_0-\alpha\frac{1}{m}\sum_{i=1}^m[h_{\theta}(x^{(i)})-y^{(i)}]x_0^{(i)} θ0:=θ0αm1i=1m[hθ(x(i))y(i)]x0(i) θ j : = θ j − α { 1 m ∑ i = 1 m [ h θ ( x ( i ) ) − y ( i ) ] x j ( i ) + λ m θ j } \theta_j := \theta_j-\alpha\{\frac{1}{m}\sum_{i=1}^m[h_{\theta}(x^{(i)})-y^{(i)}]x_j^{(i)}+\frac{\lambda}{m}\theta_j\} θj:=θjα{m1i=1m[hθ(x(i))y(i)]xj(i)+mλθj} } \} }
gradient项变为: ∂ ∂ θ j J ( θ ) = 1 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) x j ( i ) + λ m θ j \frac{\partial}{\partial\theta_j}J(\theta)=\frac{1}{m}\sum_{i=1}^m(h_{\theta}(x^{(i)})-y^{(i)})x_j^{(i)}+\frac{\lambda}{m}\theta_j θjJ(θ)=m1i=1m(hθ(x(i))y(i))xj(i)+mλθj ( j = 1 , 2 , 3 , . . . , n ) (j = 1,2,3,...,n) (j=1,2,3,...,n)

def gradient(theta, X, y):
    gra = X.T.dot(sigmoid(X.dot(theta.T))-y) / len(X)
    return gra    # n*1维
def gradient_reg(theta, X, y, c=1):
    reg = (c/len(X))*theta
    reg[0] = 0
    return gradient(theta, X, y) + reg

1.4 One-vs-all Classification

theta = np.zeros(401)
lambda_01 = 0.8

逻辑回归多元分类的核心:根据每一个类别分别创建分类器,即分别进行二元逻辑回归运算,(单独把每一类挑出来做正样本,其余九类全做负样本训练多个回归模型,再根据训练出来的十组参数计算每一种分类的概率,挑选概率最大者作为预测结果)

def tnc_resolver(J_function_reg, theta, gradient_reg, X, y, lambda_n):
    return opt.fmin_tnc(J_function_reg, x0 = theta, fprime = gradient_reg, args = (X, y, lambda_n))
def cg_resolver(J_function_reg, theta, gradient_reg, X, y, lambda_n):
    return opt.fmin_cg(J_function_reg, x0 = theta, fprime = gradient_reg, args = (X, y, lambda_n), maxiter = 50, disp=False, full_output=True)
y[y == 10] = 0  #把原来结果值里的10改成零,因为在手写训练数据里就是0表示10
_y = y.reshape(-1) #把y从向量变成一维数组
def make_multiclassifier(lambda_01):
    ten_thetas = np.zeros((10, 401)) # 准备空白数组用来装载各分类器参数
    for i in range(10):
        reload_y = np.where(_y == i, 1, 0)
        result = tnc_resolver(J_function_reg, theta, gradient_reg,  X, reload_y, lambda_01)#opt.fmin_tnc(J_function_reg, x0=theta, fprime=gradient_reg, args=(X, reload_y, lambda_01))
        theta_i = result[0]
        ten_thetas[i] = theta_i
        print('Optimizing for handwritten number {}'.format(i))
    print('Done!')
    return ten_thetas
ten_thetas_01 = make_multiclassifier(lambda_01)
Optimizing for handwritten number 0
Optimizing for handwritten number 1
Optimizing for handwritten number 2
Optimizing for handwritten number 3
Optimizing for handwritten number 4
Optimizing for handwritten number 5
Optimizing for handwritten number 6
Optimizing for handwritten number 7
Optimizing for handwritten number 8
Optimizing for handwritten number 9
Done!

1.4.1 one-vs-all prediction

def logistic_1vsAll_single(x, ten_thetas):  # 求一个样本的预测值
    list_prob = []
    for theta_i in ten_thetas:
        probability_i = sigmoid(x.dot(theta_i.T))
        list_prob.append(probability_i)
    series_prob = pd.Series(list_prob)
    most_like = series_prob.values.argmax() #这里的numpy的argmax()方法求最大值出现位置
    return most_like, series_prob
def logistic_1vsAll_more(X, ten_thetas):  # 一次性求所有样本的预测值
    result = X.dot(ten_thetas.T) #5000*10 这里是5000个例子的10个分类器的概率值
    df_result = pd.DataFrame(result)
    y_predict = df_result.idxmax(axis = 1) #pandas的argmax()方法用不了,用idxmax()方法求最大值位置
    return y_predict
y_predict_01 = logistic_1vsAll_more(X,ten_thetas_01)
y_predict_01.value_counts()
8    1068
0     546
6     533
7     528
1     499
4     499
3     466
2     399
9     321
5     141
dtype: int64
(y_predict_01.values == _y).mean()
0.766

去掉正则化,准确率提高,但泛化率降低

lambda_02 = 0
ten_thetas_02 = make_multiclassifier(lambda_02)
D:\Program Files (x86)\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in log
  
D:\Program Files (x86)\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in multiply
  


Optimizing for handwritten number 0
Optimizing for handwritten number 1
Optimizing for handwritten number 2
Optimizing for handwritten number 3
Optimizing for handwritten number 4
Optimizing for handwritten number 5
Optimizing for handwritten number 6
Optimizing for handwritten number 7
Optimizing for handwritten number 8
Optimizing for handwritten number 9
Done!
y_predict_02 = logistic_1vsAll_more(X,ten_thetas_02)
y_predict_02.value_counts()
6    504
7    503
4    503
9    502
8    502
0    502
1    500
2    499
3    493
5    492
dtype: int64
(y_predict_02.values == _y).mean()
0.9736

2 Neural Networks

在这里插入图片描述
利用老师提供的计算好的theta矩阵,前向传播计算推导y

# 读取老师已经提供的成品theta矩阵(而非随机初始化theta矩阵)
theta_neurons = io.loadmat('D:/python/practise/sample/machine-learning-ex3/data/ex3weights.mat')
theta_neurons['Theta1'].shape
(25, 401)
theta_neurons['Theta2'].shape
(10, 26)
theta_neu_1 = theta_neurons['Theta1']
theta_neu_2 = theta_neurons['Theta2']
def forwardpropa_single(a_neu_1):
    z_neu_2 = theta_neu_1.dot(a_neu_1)
    a_neu_2 = sigmoid(z_neu_2)
    a_neu_2_bias = np.insert(a_neu_2, 0, 1)
    z_neu_3 = theta_neu_2.dot(a_neu_2_bias)
    a_neu_3 = sigmoid(z_neu_3)
    result = a_neu_3.argmax()
    if result == 9:
        return 0
    else:
        return result + 1
def forwardpropa_more(A_neu_1):
    Z_neu_2 = A_neu_1.dot(theta_neu_1.T) #5000*401 dot 401*25 = 5000*25
    A_neu_2 = sigmoid(Z_neu_2)
    A_neu_2_bias = np.insert(A_neu_2, 0, 1, axis=1) #加入bias列 值为1,5000*26
    Z_neu_3 = A_neu_2_bias.dot(theta_neu_2.T) # 5000*26 dot 26*10 = 5000*10
    A_neu_3 = sigmoid(Z_neu_3)
    y = A_neu_3.argmax(axis=1) + 1 
    y[y == 10] = 0
    return y
forwardpropa_single(X[3251])
6
show_1_number(3251) # 预测值是6 图像显示也是6

在这里插入图片描述

y_neu_pre = forwardpropa_more(X)
(y_neu_pre == y.T).mean()
0.9752

自己手写图片测试

from PIL import Image

用photoshop画一个黑底白色数字,分辨率20*20,保存为(.png或.jpg)

# 把图像转化为数组
img = Image.open('D:/python/practise/sample/machine-learning-ex3/8.png')
scipy.misc.toimage(np.array(img))

在这里插入图片描述

# 把数组化的图像扁平化
img_arr = np.array(img).ravel()
img_arr = np.insert(img_arr, 0, 1)
like = logistic_1vsAll_single(img_arr, ten_thetas_01)
like[0] # 用多元分类器的结果
8
forwardpropa_single(img_arr) # 用前向传播的结果
8

两个模型预测结果与实际一致,性能还可以


  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值