前言:
这里主要是结合《python机器学习》手写数字识别的例子
介绍加L2正规化,BP算法的实现
目录
- 损失函数
- CODE 实现
- 测试效果
一 损失函数
二 CODE 实现
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 12 17:13:34 2020
@author: chengxf2
"""
import os
import struct
import numpy as np
import matplotlib.pyplot as plt
"""
绘制图形
Args
x_train: 数据集
y_train: 标签集
"""
def draw(x_train, y_trian):
fig, ax = plt.subplots(nrows= 2, ncols=5, sharex= True, sharey= True)
ax = ax.flatten()
for i in range(10):
img = x_train[y_train==i][0].reshape(28,28)
ax[i].imshow(img, cmap='Greys')
ax[0].set_xticks([])
ax[0].set_yticks([])
plt.tight_layout()
plt.show()
def load_minist(kind='train'):
label_path = os.path.abspath(kind+"_label")
image_path = os.path.abspath(kind+"_image")
with open(label_path, 'rb') as lbpath:
magic,n = struct.unpack('>II',lbpath.read(8))
labels = np.fromfile(lbpath, dtype = np.uint8)
with open(image_path, 'rb') as imgpath:
magic,n,rows,cols = struct.unpack(">IIII", imgpath.read(16))
images = np.fromfile(imgpath, dtype = np.uint8).reshape(len(labels),784)
images=((images/255)-0.5)*2 ##归一化
return images, labels
##m:60000, n 784, {0,1,2,3,4,5,6,7,8,9}
#x_train ,y_train = load_minist(kind='train')
#m,n = np.shape(x_train)
#cls = set(y_train)
#print(x_train[0])
#draw(x_train, y_train)
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 11 10:17:33 2020
@author: chengxf2
"""
import numpy as np
import sys
from LoadImg import *
import matplotlib.pyplot as plt
class NetBP():
"""
Parameters:
nHidden: 隐藏层神经元个数
L2: 正规化参数
epochs: 迭代次数
eta: 学习率
shuffle: 迭代前洗牌
seed: 随机种子
"""
def __init__(self, nHidden = 30, L2 = 0.01, epochs = 2000, eta = 0.0005,shuffle = True, minibatch_size = 200, seed = 1):
self.random = np.random.RandomState(seed)
self.n_hidden = nHidden
self.epochs = epochs
self.eta = eta
self.shuffle = shuffle
self.minibatch_size = minibatch_size
self.l2 = L2
print("\n __init___")
"""
模型在输出层上面的输出
Args
y: 训练集上的输出
n_out 输出层输出的种类
return
[m, n_classes] m代表样本个数,n_class 输出层个数
"""
def _onehot(self, y, n_out):
m = y.shape[0]
onehot = np.zeros((n_out, m)) #[10, 55000]
for idx, val in enumerate(y.astype(int)):
onehot[val, idx] =1
return onehot.T
"""
神经元的激活函数
Args
z: 输入值
return
y: 输出值
"""
def _sigmoid(self,z):
y = 1.0/(1.0+np.exp(-np.clip(z,-250,250)))
return y
"""
前向传播
Args:
X: 样本 [m, dimension]
w_h: [输入层维度n_features, 隐藏层个数n_Hidden]
w_out: [隐藏层个数隐藏层个数n_Hidden, 输出层个数n_output]
return
z_h: 隐藏层的输入 [m, n_Hidden]
a_h: 隐藏层输出 [m, n_Hidden]
z_out 输出层输入 [m, n_out]
a_out_ 输出层输入 [m,n_out]
"""
def _forward(self,X):
#step1
z_h = np.dot(X, self.w_h)+self.b_h
#step2 激活函数
a_h = self._sigmoid(z_h)
#step3 输出层的输入
z_out = np.dot(a_h, self.w_out)+self.b_out
#step4 输出层激活函数
a_out = self._sigmoid(z_out)
return z_h, a_h, z_out, a_out
"""
计算代价函数,这里以2为代价
Args
y_enc:[0,0,1,0,0,0,0,0,0,0]
output: 模型算出来的[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
损失函数模型为ylog(a_out)+(1-y)log(1-a_out)+L2(w**2)
"""
def _comput_cost(self, y_enc, output):
L2_term = self.l2*(np.sum(self.w_h**2)+np.sum(self.w_out**2))
#print("\n sp ", np.shape(y_enc))
#print("\n sp2 ", output[0])
term1 = -y_enc*np.log(output)
term2 = (1-y_enc)*np.log(1-output)
cost = np.sum(term1-term2)+L2_term
return cost
"""
预测
"""
def predict(self, X):
z_h, a_h,z_out,a_out= self._forward(X)
y_pred = np.argmax(z_out, axis=1)
return y_pred,a_out
def fit(self, x_train,y_train,x_valid, y_valid):
output = np.unique(y_train) #输出层的个数 #该函数是去除数组中的重复数字,并进行排序之后输出
n_output = output.shape[0]
n_features = x_train.shape[1]
print("\n x_train ",np.shape(x_train)) ##[55000,784]
#weights : input->hidden 输入层到隐藏层
self.b_h = np.zeros(self.n_hidden) #隐藏层B
self.w_h = self.random.normal(loc=0.0,scale = 0.1, size=(n_features, self.n_hidden)) #每一列代表一个Cell
#weights: hidden-->output 隐藏层到输出层
self.b_out = np.zeros(n_output)
self.w_out = self.random.normal(loc=0.0, scale=0.1, size=(self.n_hidden,n_output))
self.eval_={'cost':[], 'train_acc':[],'valid_acc':[]}
y_train_enc = self._onehot(y_train,n_output)
indices = np.arange(x_train.shape[0]) #指数
iter = 0
for i in range(self.epochs): #打乱顺序
if self.shuffle:
self.random.shuffle(indices)
m = indices.shape[0]
#print("\n *************iter:*********** \t",iter)
iter += 1
for start_idx in range(0, m- self.minibatch_size+1, self.minibatch_size):
batch_idx = indices[start_idx:start_idx+self.minibatch_size]
z_h,a_h, z_out,a_out = self._forward(x_train[batch_idx]) #前向传播
###############
#反向传播,计算梯度
##############
grad_a_out = a_out- y_train_enc[batch_idx] ##样本和真是标签的差别,负梯度[100,10]
grad_z_out = np.multiply(a_out,1.0-a_out) # [100,10]
delta_out = np.multiply(grad_a_out, grad_z_out) #[100,10] 10 代表out
grad_w_out= np.dot(a_h.T, grad_a_out) ##[a_h :[100,30] grad_w_out[30,10]
grad_b_out = np.sum(delta_out, axis= 0) ##[a_h, w_out]+b_out 对b 求b求偏导数时候为0
grad_a_h = np.dot(self.w_out,delta_out.T) ##[30,100] 这个要重点看看
grad_z_h = np.multiply(a_h, 1.0-a_h) ##[100,30]
delta_h = np.multiply(grad_z_h,grad_a_h.T) #[100,30]
grad_w_h = np.dot(x_train[batch_idx].T, delta_h) ##[784,30]
grad_b_h = np.sum(delta_h, axis=0)
##更新参数##
delta_w_h = grad_w_h+self.l2*self.w_h
delta_b_h = grad_b_h
self.w_h -= self.eta*delta_w_h
self.b_h -= self.eta*delta_b_h
delta_w_out = grad_w_out+ self.l2*self.w_out
delta_b_out = grad_b_out
self.w_out -= self.eta*delta_w_out
self.b_out -= self.eta*delta_b_out
y_train_pred,train_out = self.predict(x_train)
y_valid_pred,valid_out = self.predict(x_valid)
train_acc = ((np.sum(y_train==y_train_pred)).astype(np.float))/(x_train.shape[0])
valid_acc = ((np.sum(y_valid ==y_valid_pred)).astype(np.float))/(x_valid.shape[0])
cost = self._comput_cost(y_train_enc,train_out)
#print("\n iter: ",iter, "\t train_acc ",np.round(train_acc,2), "\t valid_acc: ", np.round(valid_acc,2))
self.eval_['cost'].append(cost)
self.eval_['train_acc'].append(train_acc)
self.eval_['valid_acc'].append(valid_acc)
print("\n iter: ",iter)
print("\n ****************trainEnd****************\n")
x_train ,y_train = load_minist(kind='train')
# 数据集60000, n= 784
print("y ",y_train.shape[0])
nn = NetBP()
nn.fit(x_train = x_train[0:55000], y_train= y_train[0:55000], x_valid = x_train[55000:],y_valid=y_train[55000:])
plt.plot(range(nn.epochs), nn.eval_['cost'], c='r')
plt.ylabel('cost')
plt.xlabel('Epochs')
plt.show()
plt.plot(range(nn.epochs), nn.eval_['train_acc'],label='trainning',c='g')
plt.plot(range(nn.epochs), nn.eval_['valid_acc'],label='validation',linestyle='--',c='r')
plt.xlabel('Accuracy')
plt.ylabel('Epochs')
plt.legend()
plt.show()
三 测试效果