使用python实现两层神经网络(HW3)
记录一下手动实现一个两层的全连接神经网络,主要针对于李宏毅老师HW3的7分类问题。本来是想拿NN SVG画一下这个结构,结果维度太高,它死机了。。(好吧,是我太菜了,,不太会用),所以用一个小的结构代替一下,就是一个简单的FCNN
bp算法就不写在这里了,其他的大神都讲的很不错的。
网络大概结构:
源代码:
import numpy as np
import matplotlib.pyplot as plt
from keras.utils import to_categorical
# 加载笑脸图片
file_path = r'C:/Users/Desktop/2020kaoyan/ml/2017MLSpring_Hung-yi-Lee-master/2017MLSpring_Hung-yi-Lee-master/HW3/data.csv'
with open(file_path, encoding='utf8') as f:
data = np.loadtxt(f, float, delimiter=",", skiprows=1)
label_path = r'C:/Users/Desktop/2020kaoyan/ml/2017MLSpring_Hung-yi-Lee-master/2017MLSpring_Hung-yi-Lee-master/HW3/label.csv'
with open(label_path, encoding='utf8') as labels_file:
labels = np.loadtxt(labels_file, float, delimiter=",", skiprows=1)
# 处理相应的矩阵数据
data = np.array(data, dtype=float)
labels = to_categorical(np.array(labels, dtype=float), 7)
# 求sigmoid的值
def sigmoid(input_x):
return 1 / (1 + np.exp(-input_x))
# 计算sigmoid函数的gradient值
def sigmoid_gradient(x):
return sigmoid(x) * (1 - sigmoid(x))
# 定义 softmax 函数
def softmax(x):
exps = np.exp(x - np.max(x))
return exps / np.sum(exps)
# 定义cross_entropy loss函数
def cross_entropy(x, y):
return np.sum(np.nan_to_num(-y*np.log(x)-(1-y)*np.log(1-x)))
# 初始化各个参数 神经网主要有两层,只有一层隐藏层
def init_each_params(input_nums, hidden_nums, output_nums):
# 初试话两层的bias
bias_one = np.random.randint(-5, 5, (hidden_nums, 1)).astype(np.float)
bias_two = np.random.randint(-5, 5, (output_nums, 1)).astype(np.float)
# 初始化两层的weight
weight_one = np.random.randint(-5, 5, (hidden_nums, input_nums)).astype(np.float)
weight_two = np.random.randint(-5, 5, (output_nums, hidden_nums)).astype(np.float)
return bias_one, bias_two, weight_one, weight_two
# 训练数据
def trainning(dataset, labelset, weight1, weight2, bias1, bias2):
# 设置学习率
lr = 0.02
for i in range(len(dataset)):
# feedforward pass
a_one = np.transpose(dataset[i, :]).reshape(2304, 1)
z_one = (np.matmul(weight1, a_one).astype(np.float)) + bias1
# hidden layer 输出
a_two = sigmoid(z_one).astype(np.float)
# 输出层的输入
z_two = (np.matmul(weight2, a_two).astype(np.float)) + bias2
# 输出层输出
outputset = softmax(z_two).astype(np.float)
loss = cross_entropy(outputset, np.transpose(labelset[i, :])).astype(np.float)
# backpropgate pass
# 更新两层误差项,由于会出现broadcast的问题,所以直接reshape了一下
theta_out = outputset - np.transpose(labelset[i, :]).reshape(7, 1)
theta_first = sigmoid_gradient(z_one) * (np.matmul(weight2.T, theta_out))
# 更新第二层的 weight 和 bias
weight2 = weight2 - lr * (np.matmul(theta_out, a_two.T).astype(np.float))
bias2 = bias2 - lr * theta_out
# 更新第一层的 weight 和 bias
weight1 = weight1 - lr * (np.matmul(theta_first, a_one.T).astype(np.float))
bias1 = bias1 - lr * theta_first
print("The loss of %d times trainning is:%f" % (i, loss))
return weight1, weight2, bias1, bias2
# 定义初始化的参数
# 输入层是一个 48*48 的灰白图像的 flatten ,没有降维,所以输入层的dim为2304
# 隐藏层自定义了3000个神经元,最后输出层有7个神经元,因为是7分类模型
# 整个网络是模拟全连接神经网
bias_one, bias_two, weight_one, weight_two = init_each_params(2304, 3000, 7)
model_weight1, model_weight2, model_bias1, model_bias2 = trainning(data, labels, weight_one, weight_two, bias_one, bias_two)