长知识啦——自己动手写分类模型


本次博文是向大家伙分享一下自己写的关于分类模型的Python代码,阅读时长可能比较长,如果文中出现错误也请大家及时批评指正,共同学习,另外,自己写的模型,尤其是softmax模型正确率很低,本人也期望得到大家的指点,共同完善之,还希望不吝赐教。
如果您是机器学习方面的小白,建议您还是先看看二分类和多分类算法的原理,先把公式都捯饬明白,不然看Python代码会头大的。我展示了自己的公式推导,这算是对代码的一个公式演绎。其实,我们在看别人的代码的时候一定要先熟悉他的程序逻辑(算法),然后再看他的对该程序的自定义数据部分,理清了这些再看具体的代码会事半功倍。
我感觉这两段程序锻炼了自己对python中numpy的使用以及使我有了一种向量化编程的思想。

二分类模型

在这里插入图片描述
这个程序有两个不同的实现方式
第一个是简单版本,很简单的numpy矩阵代码,引自猫图片的识别问题
可以当做范式去记忆。

# 
import numpy as np
import h5py
import matplotlib.pyplot as plt

train_data = h5py.File('./datasets/train_catvnoncat.h5', 'r')
test_data = h5py.File('./datasets/test_catvnoncat.h5', 'r')

train_data_img = train_data['train_set_x'][:]
train_data_labels = train_data['train_set_y'][:]
test_data_img = test_data['test_set_x'][:]
test_data_labels = test_data['test_set_y'][:]

print(train_data_img.shape)
print(train_data_labels.shape)
print(test_data_img.shape)
print(test_data_labels.shape)

# 获取图像的数量,定义样本X矩阵的列数
m_train = train_data_img.shape[0]
m_test = test_data_img.shape[0]
# 把原来的图像3通道矩阵数据拉伸成一个列向量
# reshape()之后的矩阵元素个数要和之前的一致,一般规模会被指定在参数位置
# 如果有一个位置是-1,系统会自动的为其计算该维度值
# 应该根据实际情况指定-1的位置
train_data_img_one = train_data_img.reshape(m_train, -1).T
print(train_data_img_one.shape)
test_data_img_one = test_data_img.reshape(m_test, -1).T

# 数据标准化
train_data_sta = train_data_img_one / 255
test_data_sta = test_data_img_one / 255


# 定义sigmoid函数
def sigmoid(z):
    return 1.0 / (1 + np.exp(-z))


n_dim = train_data_sta.shape[0]
w = np.zeros((n_dim, 1))
b = 0


# 前向传播和误差计算
def propagate(w, b, X, y, lambd):
    Z = np.dot(w.T, X) + b
    A = sigmoid(Z)

    m = X.shape[1]
    # 在该步加入L2正则化项,需要计算w的F2范数,使用了np.dot(w.T,w)方法得到的结果是对的但是结果是(1,1)的矩阵,J应该是一个数
    # J = - 1.0 / m * (np.sum(y * np.log(A) + (1 - y) * np.log(1 - A)) - lambd / 2 * np.sum(w ** 2))
    J = - 1.0 / m * (np.sum(y * np.log(A) + (1 - y) * np.log(1 - A)) - lambd / 2 * np.dot(w.T, w))[0]

    dw = 1.0 / m * (np.dot(X, (A - y).T) + lambd * w)
    db = 1.0 / m * np.sum(A - y)
    grands = {'dw': dw, 'db': db}

    return grands, J


# 迭代的过程
def iterprocess(w, b, X_train, y, alpha, iters, print_cost, lambd):
    cost = []
    for i in range(iters):
        grands, J = propagate(w, b, X_train, y, lambd)
        dw = grands['dw']
        db = grands['db']

        w = w - alpha * dw
        b = b - alpha * db

        if i % 100 == 0:
            cost.append(J)
            if print_cost:
                print('iters are ', i, ' cost is', J)

    grands = {'dw': dw, 'db': db}
    parameters = {'w': w, 'b': b}

    return grands, parameters, cost


# 使用学习好的模型来预测测试集,得到误差
def predict(w, b, X_test):
    Z = np.dot(w.T, X_test) + b
    A = sigmoid(Z)

    m = X_test.shape[1]
    y_pred = np.zeros((1, m))

    for i in range(m):
        if A[:, i] > 0.5:
            y_pred[:, i] = 1
        else:
            y_pred[:, i] = 0

    return y_pred


# 封装成模型
def model_simple(w, b, X_train, y_train, X_test, y_test, alpha, iters, print_cost, lambd):
    grands, parameters, cost = iterprocess(w, b, X_train, y_train, alpha, iters, print_cost, lambd)
    w = parameters['w']
    b = parameters['b']
    y_pred_train = predict(w, b, X_train)
    y_pred_test = predict(w, b, X_test)

    # mean()函数用来求平均值,该处结合了逻辑语句实现了求满足条件的比例
    print('the train acc is ', np.mean(y_pred_train == y_train) * 100, '%')
    print('the test acc is ', np.mean(y_pred_test == y_test) * 100, '%')

    d = {
        'w': w,
        'b': b,
        'cost': cost,
        'y_pred_train': y_pred_train,
        'y_pred_test': y_pred_test,
        'alpha': alpha
    }

    return d


d = model_simple(w, b, train_data_sta, train_data_labels, test_data_sta, test_data_labels, alpha=0.005, iters=3000, print_cost=True, lambd=0.1)

print(d['cost'])

plt.plot(d['cost'])
plt.xlabel('per hundred iters')
plt.ylabel('cost')
plt.show()

第二个版本,我加入了验证集(dev set),把输入集随机的按照比例分开,一部分做训练,一部分做验证,根据反馈的训练集准确率、训练集损失率、验证集准确率、验证集损失率来调整参数,可以让大家体会一下调参的过程,四种曲线也在同一个画布上绘制出来了。
这段程序的主要的要点在于要把原来的训练集随机的抽取出来,我在该程序中使用了shuffle函数,没接触过的百度一下就能明白。
真是不由得感慨,人生苦短,我用Python。

# 
import numpy as np
import h5py
import matplotlib.pyplot as plt
import math

train_data = h5py.File('./datasets/train_catvnoncat.h5', 'r')
test_data = h5py.File('./datasets/test_catvnoncat.h5', 'r')

train_data_img = train_data['train_set_x'][:]
train_data_labels = train_data['train_set_y'][:]
test_data_img = test_data['test_set_x'][:]
test_data_labels = test_data['test_set_y'][:]

print(train_data_img.shape)
print(train_data_labels.shape)
print(test_data_img.shape)
print(test_data_labels.shape)

# 获取图像的数量,定义样本X矩阵的列数
m_train = train_data_img.shape[0]
m_test = test_data_img.shape[0]
# 把原来的图像3通道矩阵数据拉伸成一个列向量
train_data_img_one = train_data_img.reshape(m_train, -1).T
print(train_data_img_one.shape)
test_data_img_one = test_data_img.reshape(m_test, -1).T

# 数据标准化
train_data_sta = train_data_img_one / 255
test_data_sta = test_data_img_one / 255


# 定义sigmoid函数
def sigmoid(z):
    return 1.0 / (1 + np.exp(-z))


n_dim = train_data_sta.shape[0]
w = np.zeros((n_dim, 1))
b = 0


# 前向传播和误差计算
def propagate(w, b, X, y):
    Z = np.dot(w.T, X) + b
    A = sigmoid(Z)

    m = X.shape[1]
    J = - 1.0 / m * (np.sum(y * np.log(A) + (1 - y) * np.log(1 - A)))

    dw = 1.0 / m * np.dot(X, (A - y).T)
    db = 1.0 / m * np.sum(A - y)
    grands = {'dw': dw, 'db': db}

    return grands, J


# 使用学习好的模型来预测测试集,得到误差
def predict(w, b, X_test):
    Z = np.dot(w.T, X_test) + b
    A = sigmoid(Z)

    m = X_test.shape[1]
    y_pred = np.zeros((1, m))

    for i in range(m):
        if A[:, i] > 0.5:
            y_pred[:, i] = 1
        else:
            y_pred[:, i] = 0

    return y_pred


# 迭代的过程
def iterprocess(w, b, X_train, y, ratio, times, alpha, iters):

    train_cost = []
    dev_cost = []
    train_acc = []
    dev_acc = []
    m = X_train.shape[1]
    random_index = np.arange(m)
    dev_row = math.floor(m * ratio)   # 取下整
    train_row = m - dev_row

    for j in range(times):  # 交叉验证的次数

        np.random.shuffle(random_index)     # 洗牌函数
        train = X_train[:, random_index[0:train_row]]
        train_labels = y[random_index[0:train_row]]
        dev = X_train[:, random_index[train_row:m]]
        dev_labels = y[random_index[train_row:m]]

        for i in range(iters):  # 每次训练迭代的次数
            grands, J = propagate(w, b, train, train_labels)
            dw = grands['dw']
            db = grands['db']

            w = w - alpha * dw
            b = b - alpha * db

        y_pred_train = predict(w, b, train)
        y_pred_dev = predict(w, b, dev)
        train_acc.append(np.mean(y_pred_train == train_labels))
        dev_acc.append(np.mean(y_pred_dev == dev_labels))
        train_cost.append(1.0 / m * np.sum(np.sqrt((y_pred_train - train_labels)**2)))
        dev_cost.append(1.0 / m * np.sum(np.sqrt((y_pred_dev - dev_labels) ** 2)))

    grands = {'dw': dw, 'db': db}
    parameters = {'w': w, 'b': b}
    estimate = {'train_acc': train_acc,
                'train_cost': train_cost,
                'dev_acc': dev_acc,
                'dev_cost': dev_cost}

    return grands, parameters, estimate


# 封装成模型
def model_complex(w, b, X_train, y_train, X_test, y_test, ratio, times, alpha, iters):
    grands, parameters, estimate = iterprocess(w, b, X_train, y_train, ratio, times, alpha, iters)
    w = parameters['w']
    b = parameters['b']

    y_pred_test = predict(w, b, X_test)
    print('the test acc is ', np.mean(y_pred_test == y_test) * 100, '%')

    d = {
        'w': w,
        'b': b,
        'estimate': estimate,
        'y_pred_test': y_pred_test,
        'alpha': alpha
    }

    return d


d = model_complex(w, b, train_data_sta, train_data_labels, test_data_sta, test_data_labels,
                  ratio=0.5, times=30, alpha=0.005, iters=100)

plt.title('Result Analysis')
plt.plot(d['estimate']['train_acc'], color='red', label='train_acc')
plt.plot(d['estimate']['dev_acc'], color='blue', label='dev_acc')
plt.plot(d['estimate']['train_cost'], color='red', label='train_cost')
plt.plot(d['estimate']['dev_cost'], color='blue', label='dev_cost')
plt.xlabel('times')
plt.ylabel('ratio')
plt.legend()    # 绘制图例,为每条曲线指定label之后必须要写这条语句,不然图例不能显示
plt.show()

多分类模型

在这里插入图片描述

这个程序的正确率惨不忍睹,只有10%左右,所以还希望大家可以提提意见,小生不胜感激。

import os
import struct
import numpy as np
import matplotlib.pyplot as plt


def load_mnist(path, kind):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels.idx1-ubyte'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images.idx3-ubyte'
                               % kind)
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',
                                 lbpath.read(8))
        labels = np.fromfile(lbpath,
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII',
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(len(labels), 784)

    return images.T, labels


train_images, train_labels = load_mnist('../softmax/data_set/', 'train')
test_images, test_labels = load_mnist('../softmax/data_set/', 't10k')


def normalization(x):   # 这是对输入矩阵的高斯标准化操作
    mu = np.mean(x, axis=0)     # 一定要带着axis=0这条限制,不然系统会计算整个矩阵的平均值,而不是各列的平均值
    delta = np.mean((x - mu) ** 2, axis=0) + 0.01   # 计算方差,带着0.01这个系数是防止全0的列经过运算得到nan值
    return (x - mu) / delta  # 一定要加括号,不然优先计算后面的除法操作


train_std_images = normalization(train_images)
test_std_images = normalization(test_images)

print(train_std_images.shape)
print(train_labels.shape)

k = 10
w = np.random.randn(train_std_images.shape[0], k) * np.sqrt(1 / train_std_images.shape[0])
# w = np.zeros((train_std_images.shape[0], k))
b = np.random.randn(k, 1)
mini_batch = 1024

np.savetxt("weight.txt", w, header="w", delimiter=",")
np.savetxt("b.txt", b, header="b", delimiter=",")


def onehot(y):

    m = y.shape[0]
    one = np.zeros((m, k))
    one[np.arange(0, m), y] = 1
    return one


def try_softmax(z):

    temp = np.exp(z)
    A = np.sum(temp)
    return temp / A


def propagate(w, b, X, y, lambd):

    m = X.shape[1]
    n = X.shape[0]

    s = try_softmax(np.dot(w.T, X))

    J = - 1 / m * (np.dot(y, np.log(s[y, np.arange(0, m)])) - lambd / 2 * np.sum(w ** 2))

    dw = - 1 / m * (np.dot(X, (onehot(y) - s.T) * y.reshape(y.shape[0], 1)) - lambd * w)
    db = - 1 / m * np.dot((onehot(y) - s.T).T, y.reshape(y.shape[0], 1))

    grands = {'dw': dw, 'db': db}
    return grands, J


def iterprocess(w, b, X, y, iters, alpha, lambd, beta):

    times = X.shape[1] // mini_batch
    Vdw = Vdb = 0
    cost = []

    for i in range(iters):
        for j in range(times):

            start = j * mini_batch
            end = (j + 1) * mini_batch if (j + 1) * mini_batch < X.shape[1] else X.shape[1]

            grands, J = propagate(w, b, X[:, start:end], y[start:end], lambd)

            Vdw = beta * Vdw + (1 - beta) * grands["dw"]
            Vdb = beta * Vdb + (1 - beta) * grands["db"]

            w = w - alpha * Vdw
            b = b - alpha * Vdb

        if i % 10 == 0:
            cost.append(J)
            print("loss is %f" % J)

    return cost


def predict(w, b, test, labels):

    z = try_softmax(np.dot(w.T, test) + b)
    test_l = np.argmax(z, axis=0)
    print(np.mean(test_l == labels))


cost = iterprocess(w, b, train_std_images, train_labels, iters=100, alpha=0.001, lambd=100, beta=0.9)

plt.title("Loss")
plt.xlabel("iters")
plt.ylabel("loss")
plt.plot(np.arange(0, len(cost)), cost)
plt.show()

predict(w, b, test_std_images, test_labels)

写这个程序还是费了点功夫的,期间出现了很多错误,最终得到了现在的程序,不过在此提醒大家两点注意事项:
1.一定要注意保存每一次训练的参数结果,因为说不定哪一次就可能出现比较优越的结果,等到结果出来的时刻没保存参数数据,也等于没做。
2.注意在运算过程中出现一些失误,比如程序得到nan或者inf,此时应该要有敏锐的数学感觉,及时调整输入。

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值