本次博文是向大家伙分享一下自己写的关于分类模型的Python代码,阅读时长可能比较长,如果文中出现错误也请大家及时批评指正,共同学习,另外,自己写的模型,尤其是softmax模型正确率很低,本人也期望得到大家的指点,共同完善之,还希望不吝赐教。
如果您是机器学习方面的小白,建议您还是先看看二分类和多分类算法的原理,先把公式都捯饬明白,不然看Python代码会头大的。我展示了自己的公式推导,这算是对代码的一个公式演绎。其实,我们在看别人的代码的时候一定要先熟悉他的程序逻辑(算法),然后再看他的对该程序的自定义数据部分,理清了这些再看具体的代码会事半功倍。
我感觉这两段程序锻炼了自己对python中numpy的使用以及使我有了一种向量化编程的思想。
二分类模型
这个程序有两个不同的实现方式
第一个是简单版本,很简单的numpy矩阵代码,引自猫图片的识别问题
可以当做范式去记忆。
#
import numpy as np
import h5py
import matplotlib.pyplot as plt
train_data = h5py.File('./datasets/train_catvnoncat.h5', 'r')
test_data = h5py.File('./datasets/test_catvnoncat.h5', 'r')
train_data_img = train_data['train_set_x'][:]
train_data_labels = train_data['train_set_y'][:]
test_data_img = test_data['test_set_x'][:]
test_data_labels = test_data['test_set_y'][:]
print(train_data_img.shape)
print(train_data_labels.shape)
print(test_data_img.shape)
print(test_data_labels.shape)
# 获取图像的数量,定义样本X矩阵的列数
m_train = train_data_img.shape[0]
m_test = test_data_img.shape[0]
# 把原来的图像3通道矩阵数据拉伸成一个列向量
# reshape()之后的矩阵元素个数要和之前的一致,一般规模会被指定在参数位置
# 如果有一个位置是-1,系统会自动的为其计算该维度值
# 应该根据实际情况指定-1的位置
train_data_img_one = train_data_img.reshape(m_train, -1).T
print(train_data_img_one.shape)
test_data_img_one = test_data_img.reshape(m_test, -1).T
# 数据标准化
train_data_sta = train_data_img_one / 255
test_data_sta = test_data_img_one / 255
# 定义sigmoid函数
def sigmoid(z):
return 1.0 / (1 + np.exp(-z))
n_dim = train_data_sta.shape[0]
w = np.zeros((n_dim, 1))
b = 0
# 前向传播和误差计算
def propagate(w, b, X, y, lambd):
Z = np.dot(w.T, X) + b
A = sigmoid(Z)
m = X.shape[1]
# 在该步加入L2正则化项,需要计算w的F2范数,使用了np.dot(w.T,w)方法得到的结果是对的但是结果是(1,1)的矩阵,J应该是一个数
# J = - 1.0 / m * (np.sum(y * np.log(A) + (1 - y) * np.log(1 - A)) - lambd / 2 * np.sum(w ** 2))
J = - 1.0 / m * (np.sum(y * np.log(A) + (1 - y) * np.log(1 - A)) - lambd / 2 * np.dot(w.T, w))[0]
dw = 1.0 / m * (np.dot(X, (A - y).T) + lambd * w)
db = 1.0 / m * np.sum(A - y)
grands = {'dw': dw, 'db': db}
return grands, J
# 迭代的过程
def iterprocess(w, b, X_train, y, alpha, iters, print_cost, lambd):
cost = []
for i in range(iters):
grands, J = propagate(w, b, X_train, y, lambd)
dw = grands['dw']
db = grands['db']
w = w - alpha * dw
b = b - alpha * db
if i % 100 == 0:
cost.append(J)
if print_cost:
print('iters are ', i, ' cost is', J)
grands = {'dw': dw, 'db': db}
parameters = {'w': w, 'b': b}
return grands, parameters, cost
# 使用学习好的模型来预测测试集,得到误差
def predict(w, b, X_test):
Z = np.dot(w.T, X_test) + b
A = sigmoid(Z)
m = X_test.shape[1]
y_pred = np.zeros((1, m))
for i in range(m):
if A[:, i] > 0.5:
y_pred[:, i] = 1
else:
y_pred[:, i] = 0
return y_pred
# 封装成模型
def model_simple(w, b, X_train, y_train, X_test, y_test, alpha, iters, print_cost, lambd):
grands, parameters, cost = iterprocess(w, b, X_train, y_train, alpha, iters, print_cost, lambd)
w = parameters['w']
b = parameters['b']
y_pred_train = predict(w, b, X_train)
y_pred_test = predict(w, b, X_test)
# mean()函数用来求平均值,该处结合了逻辑语句实现了求满足条件的比例
print('the train acc is ', np.mean(y_pred_train == y_train) * 100, '%')
print('the test acc is ', np.mean(y_pred_test == y_test) * 100, '%')
d = {
'w': w,
'b': b,
'cost': cost,
'y_pred_train': y_pred_train,
'y_pred_test': y_pred_test,
'alpha': alpha
}
return d
d = model_simple(w, b, train_data_sta, train_data_labels, test_data_sta, test_data_labels, alpha=0.005, iters=3000, print_cost=True, lambd=0.1)
print(d['cost'])
plt.plot(d['cost'])
plt.xlabel('per hundred iters')
plt.ylabel('cost')
plt.show()
第二个版本,我加入了验证集(dev set),把输入集随机的按照比例分开,一部分做训练,一部分做验证,根据反馈的训练集准确率、训练集损失率、验证集准确率、验证集损失率来调整参数,可以让大家体会一下调参的过程,四种曲线也在同一个画布上绘制出来了。
这段程序的主要的要点在于要把原来的训练集随机的抽取出来,我在该程序中使用了shuffle函数,没接触过的百度一下就能明白。
真是不由得感慨,人生苦短,我用Python。
#
import numpy as np
import h5py
import matplotlib.pyplot as plt
import math
train_data = h5py.File('./datasets/train_catvnoncat.h5', 'r')
test_data = h5py.File('./datasets/test_catvnoncat.h5', 'r')
train_data_img = train_data['train_set_x'][:]
train_data_labels = train_data['train_set_y'][:]
test_data_img = test_data['test_set_x'][:]
test_data_labels = test_data['test_set_y'][:]
print(train_data_img.shape)
print(train_data_labels.shape)
print(test_data_img.shape)
print(test_data_labels.shape)
# 获取图像的数量,定义样本X矩阵的列数
m_train = train_data_img.shape[0]
m_test = test_data_img.shape[0]
# 把原来的图像3通道矩阵数据拉伸成一个列向量
train_data_img_one = train_data_img.reshape(m_train, -1).T
print(train_data_img_one.shape)
test_data_img_one = test_data_img.reshape(m_test, -1).T
# 数据标准化
train_data_sta = train_data_img_one / 255
test_data_sta = test_data_img_one / 255
# 定义sigmoid函数
def sigmoid(z):
return 1.0 / (1 + np.exp(-z))
n_dim = train_data_sta.shape[0]
w = np.zeros((n_dim, 1))
b = 0
# 前向传播和误差计算
def propagate(w, b, X, y):
Z = np.dot(w.T, X) + b
A = sigmoid(Z)
m = X.shape[1]
J = - 1.0 / m * (np.sum(y * np.log(A) + (1 - y) * np.log(1 - A)))
dw = 1.0 / m * np.dot(X, (A - y).T)
db = 1.0 / m * np.sum(A - y)
grands = {'dw': dw, 'db': db}
return grands, J
# 使用学习好的模型来预测测试集,得到误差
def predict(w, b, X_test):
Z = np.dot(w.T, X_test) + b
A = sigmoid(Z)
m = X_test.shape[1]
y_pred = np.zeros((1, m))
for i in range(m):
if A[:, i] > 0.5:
y_pred[:, i] = 1
else:
y_pred[:, i] = 0
return y_pred
# 迭代的过程
def iterprocess(w, b, X_train, y, ratio, times, alpha, iters):
train_cost = []
dev_cost = []
train_acc = []
dev_acc = []
m = X_train.shape[1]
random_index = np.arange(m)
dev_row = math.floor(m * ratio) # 取下整
train_row = m - dev_row
for j in range(times): # 交叉验证的次数
np.random.shuffle(random_index) # 洗牌函数
train = X_train[:, random_index[0:train_row]]
train_labels = y[random_index[0:train_row]]
dev = X_train[:, random_index[train_row:m]]
dev_labels = y[random_index[train_row:m]]
for i in range(iters): # 每次训练迭代的次数
grands, J = propagate(w, b, train, train_labels)
dw = grands['dw']
db = grands['db']
w = w - alpha * dw
b = b - alpha * db
y_pred_train = predict(w, b, train)
y_pred_dev = predict(w, b, dev)
train_acc.append(np.mean(y_pred_train == train_labels))
dev_acc.append(np.mean(y_pred_dev == dev_labels))
train_cost.append(1.0 / m * np.sum(np.sqrt((y_pred_train - train_labels)**2)))
dev_cost.append(1.0 / m * np.sum(np.sqrt((y_pred_dev - dev_labels) ** 2)))
grands = {'dw': dw, 'db': db}
parameters = {'w': w, 'b': b}
estimate = {'train_acc': train_acc,
'train_cost': train_cost,
'dev_acc': dev_acc,
'dev_cost': dev_cost}
return grands, parameters, estimate
# 封装成模型
def model_complex(w, b, X_train, y_train, X_test, y_test, ratio, times, alpha, iters):
grands, parameters, estimate = iterprocess(w, b, X_train, y_train, ratio, times, alpha, iters)
w = parameters['w']
b = parameters['b']
y_pred_test = predict(w, b, X_test)
print('the test acc is ', np.mean(y_pred_test == y_test) * 100, '%')
d = {
'w': w,
'b': b,
'estimate': estimate,
'y_pred_test': y_pred_test,
'alpha': alpha
}
return d
d = model_complex(w, b, train_data_sta, train_data_labels, test_data_sta, test_data_labels,
ratio=0.5, times=30, alpha=0.005, iters=100)
plt.title('Result Analysis')
plt.plot(d['estimate']['train_acc'], color='red', label='train_acc')
plt.plot(d['estimate']['dev_acc'], color='blue', label='dev_acc')
plt.plot(d['estimate']['train_cost'], color='red', label='train_cost')
plt.plot(d['estimate']['dev_cost'], color='blue', label='dev_cost')
plt.xlabel('times')
plt.ylabel('ratio')
plt.legend() # 绘制图例,为每条曲线指定label之后必须要写这条语句,不然图例不能显示
plt.show()
多分类模型
这个程序的正确率惨不忍睹,只有10%左右,所以还希望大家可以提提意见,小生不胜感激。
import os
import struct
import numpy as np
import matplotlib.pyplot as plt
def load_mnist(path, kind):
"""Load MNIST data from `path`"""
labels_path = os.path.join(path,
'%s-labels.idx1-ubyte'
% kind)
images_path = os.path.join(path,
'%s-images.idx3-ubyte'
% kind)
with open(labels_path, 'rb') as lbpath:
magic, n = struct.unpack('>II',
lbpath.read(8))
labels = np.fromfile(lbpath,
dtype=np.uint8)
with open(images_path, 'rb') as imgpath:
magic, num, rows, cols = struct.unpack('>IIII',
imgpath.read(16))
images = np.fromfile(imgpath,
dtype=np.uint8).reshape(len(labels), 784)
return images.T, labels
train_images, train_labels = load_mnist('../softmax/data_set/', 'train')
test_images, test_labels = load_mnist('../softmax/data_set/', 't10k')
def normalization(x): # 这是对输入矩阵的高斯标准化操作
mu = np.mean(x, axis=0) # 一定要带着axis=0这条限制,不然系统会计算整个矩阵的平均值,而不是各列的平均值
delta = np.mean((x - mu) ** 2, axis=0) + 0.01 # 计算方差,带着0.01这个系数是防止全0的列经过运算得到nan值
return (x - mu) / delta # 一定要加括号,不然优先计算后面的除法操作
train_std_images = normalization(train_images)
test_std_images = normalization(test_images)
print(train_std_images.shape)
print(train_labels.shape)
k = 10
w = np.random.randn(train_std_images.shape[0], k) * np.sqrt(1 / train_std_images.shape[0])
# w = np.zeros((train_std_images.shape[0], k))
b = np.random.randn(k, 1)
mini_batch = 1024
np.savetxt("weight.txt", w, header="w", delimiter=",")
np.savetxt("b.txt", b, header="b", delimiter=",")
def onehot(y):
m = y.shape[0]
one = np.zeros((m, k))
one[np.arange(0, m), y] = 1
return one
def try_softmax(z):
temp = np.exp(z)
A = np.sum(temp)
return temp / A
def propagate(w, b, X, y, lambd):
m = X.shape[1]
n = X.shape[0]
s = try_softmax(np.dot(w.T, X))
J = - 1 / m * (np.dot(y, np.log(s[y, np.arange(0, m)])) - lambd / 2 * np.sum(w ** 2))
dw = - 1 / m * (np.dot(X, (onehot(y) - s.T) * y.reshape(y.shape[0], 1)) - lambd * w)
db = - 1 / m * np.dot((onehot(y) - s.T).T, y.reshape(y.shape[0], 1))
grands = {'dw': dw, 'db': db}
return grands, J
def iterprocess(w, b, X, y, iters, alpha, lambd, beta):
times = X.shape[1] // mini_batch
Vdw = Vdb = 0
cost = []
for i in range(iters):
for j in range(times):
start = j * mini_batch
end = (j + 1) * mini_batch if (j + 1) * mini_batch < X.shape[1] else X.shape[1]
grands, J = propagate(w, b, X[:, start:end], y[start:end], lambd)
Vdw = beta * Vdw + (1 - beta) * grands["dw"]
Vdb = beta * Vdb + (1 - beta) * grands["db"]
w = w - alpha * Vdw
b = b - alpha * Vdb
if i % 10 == 0:
cost.append(J)
print("loss is %f" % J)
return cost
def predict(w, b, test, labels):
z = try_softmax(np.dot(w.T, test) + b)
test_l = np.argmax(z, axis=0)
print(np.mean(test_l == labels))
cost = iterprocess(w, b, train_std_images, train_labels, iters=100, alpha=0.001, lambd=100, beta=0.9)
plt.title("Loss")
plt.xlabel("iters")
plt.ylabel("loss")
plt.plot(np.arange(0, len(cost)), cost)
plt.show()
predict(w, b, test_std_images, test_labels)
写这个程序还是费了点功夫的,期间出现了很多错误,最终得到了现在的程序,不过在此提醒大家两点注意事项:
1.一定要注意保存每一次训练的参数结果,因为说不定哪一次就可能出现比较优越的结果,等到结果出来的时刻没保存参数数据,也等于没做。
2.注意在运算过程中出现一些失误,比如程序得到nan或者inf,此时应该要有敏锐的数学感觉,及时调整输入。