神经网络框架代码(python)

本文主要搭建了一个神经网络的框架,并基于这个框架实现了手写字体的分类任务,可供初学者很好的理解神经网络。

本代码主要分为5个模块:

layer模块:其中包含数据输入层,全连接层,激活函数层,损失函数层

function_for_layer模块:主要定义了不同的激活函数和损失函数

update_method模块: 主要定义了学习率和梯度更新的方法

net模块: 定义了神经网络的结构

train模块: 用于训练数据

数据来源:来自Yann LeCun 等人维护一个手写数字集,训练样本包括60000个,测试样本为10000个,可以在官网http://yann.lecun.com/exdb/mnist/index.html下载

from __future__ import division  # 精确除法
import numpy as np
import random
import update_method
import function_for_layer as ffl

update_function = update_method.batch_gradient_descent   # 全局变量参数:
# 下面这两个变量可以在net定义的时候改变它.
weights_decay = 0.01  # 惩罚项
batch_size = 64

class data:
    """数据输入层"""
    def __init__(self):
        self.data_sample = 0
        self.data_label = 0
        self.output_sample = 0
        self.output_label = 0
        self.point = 0  # 下一次pull数据的位置

    # 获取数据
    def get_data(self, sample, label):
        self.data_sample = sample  # 数据样本
        self.data_label = label    # 数据标签

    # 打乱数据顺序
    def shuffle(self):
        random_sequence = random.sample(np.arange(self.data_sample.shape[0]), self.data_sample.shape[0])  # 随机采样
        self.data_sample = self.data_sample[random_sequence]    # 随机后样本数据
        self.data_label = self.data_label[random_sequence]      # 随机后样本标签

    # 推出数据
    def pull_data(self):
        start = self.point
        end = start + batch_size
        output_index = np.arange(start, end)
        if end > self.data_sample.shape[0]:
            end = end - self.data_sample.shape[0]
            output_index = np.append(np.arange(start, self.data_sample.shape[0]), np.arange(0, end))
        self.output_sample = self.data_sample[output_index]
        self.output_label = self.data_label[output_index]
        self.point = end % self.data_sample.shape[0]


class fully_connected_layer:
    """全连接层"""
    def __init__(self, num_neuron_inputs, num_neuron_outputs):
        self.num_neuron_inputs = num_neuron_inputs  # 输入层神经元个数
        self.num_neuron_outputs = num_neuron_outputs  # 本层神经元个数
        self.inputs = np.zeros((batch_size, num_neuron_inputs))  # 输入
        self.outputs = np.zeros((batch_size, num_neuron_outputs))  # 输出
        self.weights = np.zeros((num_neuron_inputs, num_neuron_outputs))  # 权值
        self.bias = np.zeros(num_neuron_outputs)  # 偏置
        self.weights_previous_direction = np.zeros((num_neuron_inputs, num_neuron_outputs))  # 权值前次下降方向
        self.bias_previous_direction = np.zeros(num_neuron_outputs)  # 偏置前次相加方向
        self.grad_weights = np.zeros((batch_size, num_neuron_inputs, num_neuron_outputs))  # 权值梯度
        self.grad_bias = np.zeros((batch_size, num_neuron_outputs))     # 偏置梯度
        self.grad_inputs = np.zeros((batch_size, num_neuron_inputs))   # 输入梯度
        self.grad_outputs = np.zeros((batch_size, num_neuron_outputs))   # 输出梯度

    # 初始化权值
    def initialize_weights(self):
        self.weights = ffl.xavier(self.num_neuron_inputs, self.num_neuron_outputs)

    # 在正向传播过程中,用于获取输入;
    def get_inputs_for_forward(self, inputs):
        self.inputs = inputs

    # 前向传播输出
    def forward(self):
        self.outputs = self.inputs.dot(self.weights) + np.tile(self.bias, (batch_size, 1))

    # 在反向传播过程中,用于获取输入;
    def get_inputs_for_backward(self, grad_outputs):
        self.grad_outputs = grad_outputs

    # 权值和偏置的梯度
    def backward(self):
        # 求权值的梯度,求得的结果是一个三维的数组,因为有多个样本;
        for i in np.arange(batch_size):
            self.grad_weights[i, :] = np.tile(self.inputs[i, :], (1, 1)).T.dot(np.tile(self.grad_outputs[i, :], (1, 1))) + self.weights * weights_decay
        # 求求偏置的梯度;
        self.grad_bias = self.grad_outputs
        # 求 输入的梯度;
        self.grad_inputs = self.grad_outputs.dot(self.weights.T)

    # 权值与偏置更新
    def update(self):
        grad_weights_average = np.mean(self.grad_weights, 0)
        grad_bias_average = np.mean(self.grad_bias, 0)
        (self.weights, self.weights_previous_direction) = update_function(self.weights,
                                                                          grad_weights_average,
                                                                          self.weights_previous_direction)
        (self.bias, self.bias_previous_direction) = update_function(self.bias,
                                                                    grad_bias_average,
                                                                    self.bias_previous_direction)


class activation_layer:
    """激活函数层"""
    def __init__(self, activation_function_name):
        if activation_function_name == 'sigmoid':
            self.activation_function = ffl.sigmoid
            self.der_activation_function = ffl.der_sigmoid
        elif activation_function_name == 'tanh':
            self.activation_function = ffl.tanh
            self.der_activation_function = ffl.der_tanh
        elif activation_function_name == 'relu':
            self.activation_function = ffl.relu
            self.der_activation_function = ffl.der_relu
        else:
            print('输入的激活函数不对啊')
        self.inputs = 0
        self.outputs = 0
        self.grad_inputs = 0
        self.grad_outputs = 0

    # 正向传播输入
    def get_inputs_for_forward(self, inputs):
        self.inputs = inputs

    # 正向输出
    def forward(self):
        self.outputs = self.activation_function(self.inputs)

    # 反向传播输入
    def get_inputs_for_backward(self, grad_outputs):
        self.grad_outputs = grad_outputs

    # 反向传播求导
    def backward(self):
        self.grad_inputs = self.grad_outputs * self.der_activation_function(self.inputs)


class loss_layer:
    """损失函数层"""
    def __init__(self, loss_function_name):
        self.inputs = 0  # 输入
        self.loss = 0   # 误差
        self.accuracy = 0  # 正确率
        self.label = 0  # 标签
        self.grad_inputs = 0  # 输入梯度

        if loss_function_name == 'SoftmaxWithLoss':
            self.loss_function = ffl.softmaxwithloss
            self.der_loss_function = ffl.der_softmaxwithloss
        elif loss_function_name == 'LeastSquareError':
            self.loss_function = ffl.least_square_error
            self.der_loss_function = ffl.der_least_square_error
        else:
            print('损失函数错误,请重新输入')

    def get_label_for_loss(self, label):
        self.label = label

    # 获取输入
    def get_inputs_for_loss(self, inputs):
        self.inputs = inputs

    # 计算训练误差与正确率
    def compute_loss_and_accuracy(self):
        # 计算正确率
        if_equal = np.argmax(self.inputs, 1) == np.argmax(self.label, 1)
        self.accuracy = np.sum(if_equal) / batch_size
        # 计算训练误差
        self.loss = self.loss_function(self.inputs, self.label)

    # 计算梯度
    def compute_gradient(self):
        self.grad_inputs = self.der_loss_function(self.inputs, self.label)
from scipy import stats
import numpy as np
###################################激活函数##########################################
# 1.sigmoid函数及其导数
def sigmoid(x):
    """sigmoid函数"""
    return 1 / (1 + np.exp(-x))

def der_sigmoid(x):
    """sigmoid函数的导数"""
    return sigmoid() * (1 - sigmoid(x))
# 2.tanh函数及其导数
def tanh(x):
    """tanh函数"""
    return ((np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x)))

def der_tanh(x):
    """tanh函数的导数"""
    return 1 - tanh(x) * tanh(x)
# 3.relu函数及其导数
def relu(x):
    """relu函数"""
    temp = np.zeros_like(x)
    if_bigger_zero = (x > temp)
    return x * if_bigger_zero

def der_relu(x):
    """relu函数的导数"""
    temp = np.zeros_like(x)
    if_bigger_equal_zero = (x >= temp)
    return if_bigger_equal_zero * np.ones_like(x)

####################################损失函数###########################################
# softmax函数及其导数
def softmaxwithloss(inputs, label):
    temp1 = np.exp(inputs)
    probality = temp1 / (np.tile(np.sum(temp1, 1), (inputs.shape[1], 1))).T
    temp2 = np.argmax(label, 1)
    temp3 = [probality[i, j] for (i, j) in zip(np.arange(label.shape[0]), temp2)]
    loss = -1 * np.mean(np.log(temp3))
    return loss

def der_softmaxwithloss(inputs, label):
    temp1 = np.exp(inputs)
    temp2 = np.sum(temp1, 1)
    probability = temp1/(np.tile(temp2, (inputs.shape[1], 1))).T
    gradient = probability - label
    return gradient

def xavier(num_neuron_inputs, num_neuron_outputs):
    temp1 = np.sqrt(6) / np.sqrt(num_neuron_inputs+ num_neuron_outputs + 1)
    weights = stats.uniform.rvs(-temp1, 2 * temp1, (num_neuron_inputs, num_neuron_outputs))
    return weights
import numpy as np
# 定义了权值更新过程中的动量项因子
momentum = 0.9
# 初始学习率
base_lr = 0
# 迭代次数
iteration = -1

# 学习率变化机制
def inv(gama = 0.0005, power = 0.75):
    if iteration == -1:
        assert False
    return base_lr * np.power((1 + gama * iteration), - power)

def fixed():
    return base_lr


# 批量梯度下降算法
def batch_gradient_descent(weights, grad_weights, previous_direction):
    lr = inv()
    direction = momentum * previous_direction + lr*grad_weights
    weights_now = weights - direction
    return (weights_now, direction)
import layer

class net:
    def __init__(self, batch_size, lr, weights_decay):
        layer.batch_size = batch_size
        layer.update_method.base_lr = lr
        layer.weights_decay = weights_decay

        # 搭建一个四层的神经网络;
        self.inputs_train = layer.data()  # 训练样本的输入层
        self.inputs_test = layer.data()  # 测试样本的输入层

        self.fc1 = layer.fully_connected_layer(784, 50)
        self.ac1 = layer.activation_layer('tanh')
        self.fc2 = layer.fully_connected_layer(50, 50)
        self.ac2 = layer.activation_layer('tanh')
        self.fc3 = layer.fully_connected_layer(50, 10)
        self.loss = layer.loss_layer('SoftmaxWithLoss')

    def load_sample_and_label_train(self, sample, label):
        self.inputs_train.get_data(sample, label)

    def load_sample_and_label_test(self, sample, label):
        self.inputs_test.get_data(sample, label)

    def initial(self):
        self.fc1.initialize_weights()
        self.fc2.initialize_weights()
        self.fc3.initialize_weights()

    def forward_train(self):
        self.inputs_train.pull_data()

        self.fc1.get_inputs_for_forward(self.inputs_train.output_sample)
        self.fc1.forward()
        self.ac1.get_inputs_for_forward(self.fc1.outputs)
        self.ac1.forward()

        self.fc2.get_inputs_for_forward(self.ac1.outputs)
        self.fc2.forward()
        self.ac2.get_inputs_for_forward(self.fc2.outputs)
        self.ac2.forward()

        self.fc3.get_inputs_for_forward(self.ac2.outputs)
        self.fc3.forward()

        self.loss.get_inputs_for_loss(self.fc3.outputs)
        self.loss.get_label_for_loss(self.inputs_train.output_label)
        self.loss.compute_loss_and_accuracy()

    # 可能训练时的bathsize与测试时的batchsize不一样,所以加了下面四条代码;
    def turn_to_test(self, batch_size_test):
        layer.batch_size = batch_size_test

    def turn_to_train(self, batch_size_train):
        layer.batch_size = batch_size_train

    def forward_test(self):
        self.inputs_test.pull_data()

        self.fc1.get_inputs_for_forward(self.inputs_test.output_sample)
        self.fc1.forward()
        self.ac1.get_inputs_for_forward(self.fc1.outputs)
        self.ac1.forward()

        self.fc2.get_inputs_for_forward(self.ac1.outputs)
        self.fc2.forward()
        self.ac2.get_inputs_for_forward(self.fc2.outputs)
        self.ac2.forward()

        self.fc3.get_inputs_for_forward(self.ac2.outputs)
        self.fc3.forward()

        self.loss.get_inputs_for_loss(self.fc3.outputs)
        self.loss.get_label_for_loss(self.inputs_test.output_label)
        self.loss.compute_loss_and_accuracy()

    def backward_train(self):
        self.loss.compute_gradient()
        self.fc3.get_inputs_for_backward(self.loss.grad_inputs)
        self.fc3.backward()
        self.ac2.get_inputs_for_backward(self.fc3.grad_inputs)
        self.ac2.backward()
        self.fc2.get_inputs_for_backward(self.ac2.grad_inputs)
        self.fc2.backward()
        self.ac1.get_inputs_for_backward(self.fc2.grad_inputs)
        self.ac1.backward()
        self.fc1.get_inputs_for_backward(self.ac1.grad_inputs)
        self.fc1.backward()

    def update(self):
        self.fc1.update()
        self.fc2.update()
        self.fc3.update()
import scipy.io
import random
import net
import numpy as np
import matplotlib.pyplot as plt

# 导入数据;
data = scipy.io.loadmat('data.mat')
train_label = data['train_label']
train_data = data['train_data']
test_label = data['test_label']
test_data = data['test_data']

# 相关参数
num_train = 800
lr = 0.1
weight_decay = 0.001
train_batch_size = 100
test_batch_size = 10000

# 创建网络
solver = net.net(train_batch_size, lr, weight_decay)
# 加载样本
solver.load_sample_and_label_train(train_data, train_label)
solver.load_sample_and_label_test(test_data, test_label)
# 初始化权值;
solver.initial()

# 存放训练误差
train_error = np.zeros(num_train)
# 训练
for i in range(num_train):
	print ('第', i, '次迭代')
	net.layer.update_method.iteration  = i
	solver.forward_train()
	solver.backward_train()
	solver.update()
	train_error[i] = solver.loss.loss

plt.plot(train_error)
plt.show()
# 测试
solver.turn_to_test(test_batch_size)
solver.forward_test()
print ('测试样本的识别率为:', solver.loss.accuracy)

本代码仅供学习者参考学习,未经作者同意,不得转载或用于其他。禁止用于商业目的,否则后果自负。

  • 1
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值