【机器学习】吴恩达作业3.0，python实现逻辑回归手写多分类问题

—Xi—

已于 2022-04-01 22:16:42 修改

阅读量4.1k

点赞数 11

分类专栏：机器学习文章标签：机器学习 python 逻辑回归大数据

于 2022-04-01 12:09:41 首次发布

本文链接：https://blog.csdn.net/m0_51933492/article/details/123892518

版权

机器学习专栏收录该内容

19 篇文章 42 订阅

订阅专栏

3.0 多元逻辑回归案例：手写多分类问题

使用逻辑回归和神经网络来识别手写数字(从0到9)。逻辑回归，并将其应用于one-vs-all分类。

数据：数据以.mat格式储存，mat格式是matlab的数据存储格式，按照矩阵保存，与numpy数据格式兼容，适合于各种数学运算，因此主要使用numpy进行运算。

ex3data1中有5000个训练样例，其中每个训练样例是一个20像素×20像素灰度图像的数字，每个像素由一个浮点数表示，该浮点数表示该位置的灰度强度。每个20×20像素的网格被展开成一个400维的向量。这些每个训练样例都变成数据矩阵X中的一行。这就得到了一个5000×400矩阵X，其中每一行都是手写数字图像的训练样例。

训练集的第二部分是一个包含训练集标签的5000维向量y，“0”的数字标记为“10”，而“1”到“9”的数字按自然顺序标记为“1”到“9”。

one-vs_all

补充

涉及python语法

1, a=np.insert(arr, obj, values, axis)

arr原始数组，可一可多，obj插入元素位置，values是插入内容，axis是按行按列插入（0：行、1：列）。

2, a.flatten() 降维

3, .shape[0]矩阵的行数,.shape[1]矩阵的列数

4, for i in range ()：

range()是一个函数， for i in range () 就是给i赋值：

range(start, stop[, step])，分别是起始、终止和步长，range（3）即：从0到3，不包含3，即0,1,2

5 ,np.argmax()

取出数组中元素最大值所对应的索引（索引值默认从0开始）

对二维矩阵来讲a[0][1]会有两个索引方向，第一个方向为a[0]，默认按列方向搜索最大

6,np.power(x, y) 函数，计算 x 的 y 次方。

7,from scipy.optimize import minimize 内置函数，传入其中的参数必须是一维

代码实现

1读取数据：sio.loadmat 读取mat后，为dict类型

import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio

# 读入数据
path = 'ex3data1.mat'
data = sio.loadmat(path)
print(data)
print(type(data))
print(data.keys())
raw_X = data['X']
raw_Y = data['y']
# (5000, 400)
print(raw_X.shape)
# (5000, 1)
print(raw_Y.shape)

2 画出数据集里的数字图片

def plot_an_image(X):
    pick_one = np.random.randint(5000)
    image = X[pick_one, :]
    fig, ax = plt.subplots(figsize=(1, 1))#设置图片尺寸
    ax.imshow(image.reshape(20, 20).T, cmap='gray_r')
    plt.xticks([])
    plt.yticks([])


plot_an_image(raw_X)
plt.show()


def plot_100_images(X):
    sample_index = np.random.choice(len(X), 100)#随机选取数据集里100个数据
    images = X[sample_index, :]
    print(images.shape)
    #定义10*10的子画布
    fig, ax = plt.subplots(ncols=10, nrows=10, figsize=(8, 8), sharex=True, sharey=True)
    #在每个子画布中画出一个数字
    for r in range(10):#行
        for c in range(10):#列
            ax[r, c].imshow(images[10 * r + c].reshape(20, 20).T, cmap='gray_r')
    #去掉坐标轴
    plt.xticks([])
    plt.yticks([])
    plt.show()

plot_100_images(raw_X)
plt.show()

3 损失函数梯度

# 损失函数，找出最小的损失函数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def Cost_Function(theta, X, y, lamda):
    A = sigmoid(X @ theta)
    first = y * np.log(A)
    second = (1 - y) * np.log(1 - A)
    reg = np.sum(np.power(theta[1:], 2)) * (lamda / (2 * len(X)))
    return -np.sum(first + second) / len(X) + reg


def gradient_reg(theta, X, y, lamda):
    reg = theta[1:] * (lamda / len(X))
    reg = np.insert(reg, 0, values=0, axis=0)#插入第一行0
    first = (X.T @ (sigmoid(X @ theta) - y)) / len(X)
    return first + reg

4 数据处理

X = np.insert(raw_X, 0, values=1, axis=1)#在X中插入一列1
# (5000, 401)
print(X.shape)
y = raw_Y.flatten()#对y进行降维
# (5000,)
print(y.shape)

5 一对多分类

利用for循环对每种数字习得一个带正则的逻辑回归分类器，然后将10个分类器的参数组成一个参数矩阵theta_all返回

# 利用内置函数求最优化
from scipy.optimize import minimize

# K为标签个数
def one_vs_all(X, y, lamda, K):
    n = X.shape[1]#X的列数401
    theta_all = np.zeros((K, n))#（10，401）
   #第0列到第9列分别对应类别1到10
    for i in range(1, K + 1):#遍历到k 1-k 对应1-10
        
        theta_i = np.zeros(n, )#传入minimize的必须是一维（401，）

        res = minimize(fun=Cost_Function,
                       x0=theta_i,
                       args=(X, y == i, lamda),
                       method='TNC',
                       jac=gradient_reg
                       )
        theta_all[i - 1, :] = res.x #将字典中x（theta）的值赋给theta
        #[i-1,:]与索引对应（0，9）
    return theta_all


lamda = 1
K = 10
theta_final = one_vs_all(X, y, lamda, K)
print(theta_final)

6 预测

得到一个5000乘10的预测概率矩阵，找到每一行的概率最大的值位置，得到预测的类别，再和期望值y比较得到精度。

def predict(X, theta_final):
    # (5000,401) (10,401) => (5000,10)
    h = sigmoid(X @ theta_final.T)#假设函数，输出h为1的概率
    h_argmax = np.argmax(h, axis=1)#按行返回概率最大的数字索引
    return h_argmax + #索引+1对应数字


y_pred = predict(X, theta_final)
acc = np.mean(y_pred == y)
# 0.9446
print(acc)

完整代码

import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio

# 读入数据
path = 'ex3data1.mat'
data = sio.loadmat(path)
print(data)
print(type(data))
print(data.keys())
raw_X = data['X']
raw_Y = data['y']
# (5000, 400)
print(raw_X.shape)
# (5000, 1)
print(raw_Y.shape)


def plot_an_image(X):
    pick_one = np.random.randint(5000)
    image = X[pick_one, :]
    fig, ax = plt.subplots(figsize=(1, 1))#设置图片尺寸
    ax.imshow(image.reshape(20, 20).T, cmap='gray_r')
    plt.xticks([])
    plt.yticks([])


plot_an_image(raw_X)
plt.show()


def plot_100_images(X):
    sample_index = np.random.choice(len(X), 100)#随机选取数据集里100个数据
    images = X[sample_index, :]
    print(images.shape)
    #定义10*10的子画布
    fig, ax = plt.subplots(ncols=10, nrows=10, figsize=(8, 8), sharex=True, sharey=True)
    #在每个子画布中画出一个数字
    for r in range(10):#行
        for c in range(10):#列
            ax[r, c].imshow(images[10 * r + c].reshape(20, 20).T, cmap='gray_r')
    #去掉坐标轴
    plt.xticks([])
    plt.yticks([])
    plt.show()


plot_100_images(raw_X)
plt.show()


# 损失函数，找出最小的损失函数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def Cost_Function(theta, X, y, lamda):
    A = sigmoid(X @ theta)
    first = y * np.log(A)
    second = (1 - y) * np.log(1 - A)
    reg = np.sum(np.power(theta[1:], 2)) * (lamda / (2 * len(X)))
    return -np.sum(first + second) / len(X) + reg


def gradient_reg(theta, X, y, lamda):
    reg = theta[1:] * (lamda / len(X))
    reg = np.insert(reg, 0, values=0, axis=0)#插入第一行0
    first = (X.T @ (sigmoid(X @ theta) - y)) / len(X)
    return first + reg


X = np.insert(raw_X, 0, values=1, axis=1)
# (5000, 401)
print(X.shape)
y = raw_Y.flatten()
# (5000,)
print(y.shape)

# 利用内置函数求最优化
from scipy.optimize import minimize


# K为标签个数
def one_vs_all(X, y, lamda, K):
    n = X.shape[1]#X的列数401
    theta_all = np.zeros((K, n))#（10，401）
   #第0列到第9列分别对应类别1到10
    for i in range(1, K + 1):#遍历到k 1-k 对应1-10
        
        theta_i = np.zeros(n, )#传入minimize的必须是一维（401，）

        res = minimize(fun=Cost_Function,
                       x0=theta_i,
                       args=(X, y == i, lamda),
                       method='TNC',
                       jac=gradient_reg
                       )
        theta_all[i - 1, :] = res.x #将字典中x（theta）的值赋给theta
        #[i-1,:]与索引对应（0，9）
    return theta_all


lamda = 1
K = 10
theta_final = one_vs_all(X, y, lamda, K)
print(theta_final)


def predict(X, theta_final):
    # (5000,401) (10,401) => (5000,10)
    h = sigmoid(X @ theta_final.T)#假设函数，输出h为1的概率
    h_argmax = np.argmax(h, axis=1)#按行返回概率最大的数字索引
    return h_argmax + #索引+1对应数字


y_pred = predict(X, theta_final)
acc = np.mean(y_pred == y)
# 0.9446
print(acc)

总结

读取数据——可视化数据集——损失函数——梯度——数据处理（X加偏置项，y降维）——一对多分类器——利用最优函数得到最优参数——预测