BP 算法2_ 机器学习

最新推荐文章于 2022-02-25 14:12:08 发布

明朝百晓生

最新推荐文章于 2022-02-25 14:12:08 发布

阅读量287

点赞数

分类专栏：人工智能

4AM_明朝百晓生

本文链接：https://blog.csdn.net/chengxf2/article/details/104949311

版权

人工智能专栏收录该内容

102 篇文章 3 订阅

订阅专栏

前言：

这里主要是结合《python机器学习》手写数字识别的例子

介绍加L2正规化，BP算法的实现

损失函数
CODE 实现
测试效果

一损失函数

$L = \sum (y- a_{out})^2/2+ L_2*(\sum w_{out}^2+\sum w_{hide}^2)/2$

二 CODE 实现

# -*- coding: utf-8 -*-
"""
Created on Thu Mar 12 17:13:34 2020

@author: chengxf2
"""

import os
import struct
import numpy as np
import matplotlib.pyplot as plt

"""
绘制图形
Args
   x_train: 数据集
   y_train: 标签集
"""
def draw(x_train, y_trian):
    
    
    fig, ax = plt.subplots(nrows= 2, ncols=5, sharex= True, sharey= True)
    ax = ax.flatten()
  
    
    for i in range(10):
        img = x_train[y_train==i][0].reshape(28,28)
        ax[i].imshow(img, cmap='Greys')
    
    ax[0].set_xticks([])
    ax[0].set_yticks([])
    plt.tight_layout()
    plt.show()

def load_minist(kind='train'):
    
    label_path = os.path.abspath(kind+"_label")
    image_path = os.path.abspath(kind+"_image")
    
    with open(label_path, 'rb') as lbpath:
        
        magic,n = struct.unpack('>II',lbpath.read(8))
        labels = np.fromfile(lbpath, dtype = np.uint8)
    
    
    with open(image_path, 'rb') as imgpath:
        
        magic,n,rows,cols = struct.unpack(">IIII", imgpath.read(16))
        images = np.fromfile(imgpath, dtype = np.uint8).reshape(len(labels),784)
        
        images=((images/255)-0.5)*2  ##归一化
    
    return images, labels

##m:60000, n 784, {0,1,2,3,4,5,6,7,8,9}
#x_train ,y_train = load_minist(kind='train')
#m,n = np.shape(x_train)
#cls = set(y_train)

#print(x_train[0])
#draw(x_train, y_train)

# -*- coding: utf-8 -*-
"""
Created on Wed Mar 11 10:17:33 2020

@author: chengxf2
"""

import numpy as np
import sys
from LoadImg import *
import matplotlib.pyplot as plt


class NetBP():
    
    
    """
    Parameters:
        nHidden: 隐藏层神经元个数
        L2: 正规化参数
        epochs: 迭代次数
        eta: 学习率
        shuffle: 迭代前洗牌
        seed: 随机种子
    """
    def __init__(self, nHidden = 30, L2 = 0.01, epochs = 2000, eta = 0.0005,shuffle = True, minibatch_size = 200, seed = 1):
        
        self.random = np.random.RandomState(seed)
        self.n_hidden = nHidden
        self.epochs = epochs
        self.eta = eta
        self.shuffle = shuffle
        self.minibatch_size = minibatch_size
        self.l2 = L2
        print("\n __init___")
    """
    模型在输出层上面的输出
    Args
        y: 训练集上的输出
        n_out 输出层输出的种类
    return
        [m, n_classes] m代表样本个数，n_class 输出层个数
    """
    def _onehot(self, y, n_out):
        
        m = y.shape[0]
        onehot = np.zeros((n_out, m))  #[10, 55000]
        
        for idx, val in enumerate(y.astype(int)):
            onehot[val, idx] =1
        return onehot.T
    
    """
    神经元的激活函数
    Args
      z: 输入值
    return
      y: 输出值
    """
    def _sigmoid(self,z):
        
        y = 1.0/(1.0+np.exp(-np.clip(z,-250,250)))
        
        return y
    
    """
    前向传播
    Args:
        X: 样本 [m, dimension]
        w_h: [输入层维度n_features， 隐藏层个数n_Hidden]
        w_out: [隐藏层个数隐藏层个数n_Hidden， 输出层个数n_output]
    return
        z_h: 隐藏层的输入  [m, n_Hidden]
        a_h: 隐藏层输出  [m, n_Hidden]
        z_out 输出层输入 [m, n_out]
        a_out_ 输出层输入 [m,n_out]
    """
    def _forward(self,X):
         
        #step1 
        z_h = np.dot(X, self.w_h)+self.b_h
        
        #step2 激活函数
        a_h = self._sigmoid(z_h)
        
        #step3 输出层的输入
        z_out = np.dot(a_h, self.w_out)+self.b_out
        
        #step4 输出层激活函数
        a_out = self._sigmoid(z_out)
        
        return z_h, a_h, z_out, a_out
    
    """
    计算代价函数,这里以2为代价
    Args
        y_enc:[0,0,1,0,0,0,0,0,0,0]
        output: 模型算出来的[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
        
        损失函数模型为ylog(a_out)+(1-y)log(1-a_out)+L2(w**2)
    """
    def _comput_cost(self, y_enc, output):
        L2_term = self.l2*(np.sum(self.w_h**2)+np.sum(self.w_out**2))
        
        #print("\n sp ", np.shape(y_enc))
        #print("\n sp2 ", output[0])

        term1 = -y_enc*np.log(output)
        term2 = (1-y_enc)*np.log(1-output)
        
        cost = np.sum(term1-term2)+L2_term
        return cost
    
    """
    预测
    
    """
    def predict(self, X):
        z_h, a_h,z_out,a_out= self._forward(X)
        
        y_pred = np.argmax(z_out, axis=1)
        
        return y_pred,a_out
    
    def fit(self, x_train,y_train,x_valid, y_valid):
        
        output = np.unique(y_train)  #输出层的个数 #该函数是去除数组中的重复数字，并进行排序之后输出
        n_output = output.shape[0]
        n_features = x_train.shape[1]
        
        print("\n x_train ",np.shape(x_train))  ##[55000,784]

      
        
        #weights : input->hidden 输入层到隐藏层
        self.b_h = np.zeros(self.n_hidden) #隐藏层B
        self.w_h = self.random.normal(loc=0.0,scale = 0.1, size=(n_features, self.n_hidden)) #每一列代表一个Cell
        
        #weights: hidden-->output 隐藏层到输出层
        self.b_out = np.zeros(n_output)
        self.w_out = self.random.normal(loc=0.0, scale=0.1, size=(self.n_hidden,n_output))
        
        
       
        self.eval_={'cost':[], 'train_acc':[],'valid_acc':[]}
        y_train_enc = self._onehot(y_train,n_output)
        
     
        indices = np.arange(x_train.shape[0]) #指数
        iter = 0
        for i in range(self.epochs): #打乱顺序
            if self.shuffle:
                self.random.shuffle(indices)
        
            m = indices.shape[0]
            #print("\n *************iter:*********** \t",iter)
            iter += 1
            for start_idx in range(0, m- self.minibatch_size+1, self.minibatch_size):
          
                batch_idx = indices[start_idx:start_idx+self.minibatch_size]
                z_h,a_h, z_out,a_out = self._forward(x_train[batch_idx]) #前向传播
           
                
                ###############
                #反向传播，计算梯度
                ##############
                grad_a_out =  a_out- y_train_enc[batch_idx]   ##样本和真是标签的差别，负梯度[100,10]
                grad_z_out =  np.multiply(a_out,1.0-a_out) # [100,10]
                delta_out = np.multiply(grad_a_out, grad_z_out) #[100,10] 10 代表out
                grad_w_out= np.dot(a_h.T, grad_a_out)  ##[a_h :[100,30] grad_w_out[30,10]
                grad_b_out = np.sum(delta_out, axis= 0) ##[a_h, w_out]+b_out 对b 求b求偏导数时候为0
                
                
               
                grad_a_h = np.dot(self.w_out,delta_out.T) ##[30,100] 这个要重点看看
                grad_z_h = np.multiply(a_h, 1.0-a_h) ##[100,30]
                delta_h =  np.multiply(grad_z_h,grad_a_h.T) #[100,30]
                grad_w_h = np.dot(x_train[batch_idx].T, delta_h) ##[784,30]
                grad_b_h = np.sum(delta_h, axis=0)
                

                ##更新参数##
                delta_w_h = grad_w_h+self.l2*self.w_h
                delta_b_h = grad_b_h
                self.w_h -= self.eta*delta_w_h
                self.b_h -= self.eta*delta_b_h
                
                delta_w_out = grad_w_out+ self.l2*self.w_out
                delta_b_out = grad_b_out
                self.w_out -= self.eta*delta_w_out
                self.b_out -= self.eta*delta_b_out
                
            y_train_pred,train_out = self.predict(x_train)
            y_valid_pred,valid_out = self.predict(x_valid)
            train_acc = ((np.sum(y_train==y_train_pred)).astype(np.float))/(x_train.shape[0])
            valid_acc = ((np.sum(y_valid ==y_valid_pred)).astype(np.float))/(x_valid.shape[0])
            
            cost = self._comput_cost(y_train_enc,train_out)
            #print("\n iter: ",iter, "\t train_acc ",np.round(train_acc,2), "\t valid_acc: ", np.round(valid_acc,2))                          
            self.eval_['cost'].append(cost)
            self.eval_['train_acc'].append(train_acc)
            self.eval_['valid_acc'].append(valid_acc)
            print("\n iter: ",iter)
            
           
            
            
          
        print("\n ****************trainEnd****************\n")
          
        
        
            
            
            

x_train ,y_train = load_minist(kind='train')
# 数据集60000， n= 784   

print("y ",y_train.shape[0])



nn = NetBP()
nn.fit(x_train = x_train[0:55000], y_train= y_train[0:55000], x_valid = x_train[55000:],y_valid=y_train[55000:])

plt.plot(range(nn.epochs), nn.eval_['cost'], c='r')
plt.ylabel('cost')
plt.xlabel('Epochs')
plt.show() 
        

plt.plot(range(nn.epochs), nn.eval_['train_acc'],label='trainning',c='g')
plt.plot(range(nn.epochs), nn.eval_['valid_acc'],label='validation',linestyle='--',c='r')
plt.xlabel('Accuracy')
plt.ylabel('Epochs')
plt.legend()
plt.show()

三测试效果

明朝百晓生

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
BP 算法2_ 机器学习

前言：这里主要是结合《python机器学习》手写数字识别的例子介绍加L2正规化，BP算法的实现目录损失函数 CODE 实现测试效果一损失函数二 CODE 实现# -*- coding: utf-8 -*-"""Created on Thu Mar 12 17:13:34 2020...
复制链接

扫一扫