手写神经网络来拟合函数y = x1^2 + x2 + 100 (1)
摘要
本文首先生成数据,然后搭建包含一层隐藏层的神经网络训练数据,最后利用测试数据评价模型的拟合效果。本文没有讲神经网络的原理以及推导过程,适合读过神经网络原理,想要动手实现的同学参考。
数据生成
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# y = x1^2 + x2 + 100
# 生成训练数据
def generate_data(size=1000, seed_num=7):
x1 = np.linspace(10, 20, size)
x2 = np.linspace(100, 400, size)
y = x1 ** 2 + x2 + 100
# 绘制3d图
# figure = plt.figure()
# ax = Axes3D(figure)
# xx1, xx2 = np.meshgrid(x1, x2)
# yy = xx1 ** 2 + xx2 + 100
# ax.plot_surface(xx1, xx2, yy, cmap="rainbow")
# plt.show()
# 返回组合后的数据
data = np.vstack((x1, x2, y)).T
return data
随机生成1000组(x1, x2, y),其中y = x1^2 + x2 + 100,其3D分布图如下所示:
搭建网络模型
本文将搭建221型神经网络,来学习y = x1^2 + x2 + 100,其示意图如下:
网络使用均方误差作为损失函数,分批次训练。代码如下:
class Network(object):
def __init__(self, seed_num=7):
# 构建 2 2 1 型神经网络
# 随机生成参数
np.random.seed(seed_num)
self.w1 = np.random.randn(2, 2)
self.b1 = np.random.randn(1, 2)
self.w2 = np.random.randn(2, 1)
self.b2 = np.random.randn(1, 1)
def forward(self, x):
# 前向计算
self.x2 = np.dot(x, self.w1) + self.b1
self.x3 = np.dot(self.x2, self.w2) + self.b2
return self.x3
def loss(self, y):
# 使用均方误差作为损失函数
error = self.x3 - y
cost = np.power(error, 2)
return np.mean(cost)
def update(self, x, y, learn_rate):
# 用numpy并行计算w2中每个分量的误差偏导,注意是*而不是dot
gradient_w2 = (self.x3 - y) * self.x2 * 2
# 求和 + 平均,使每组输入都对w2产生作用
gradient_w2 = np.mean(gradient_w2, axis=0)
gradient_w2 = gradient_w2[:, np.newaxis]
gradient_b2 = (self.x3 - y) * 2
gradient_b2 = np.mean(gradient_b2)
gradient_w1 = (self.x3 - y) * 2 * x
gradient_w1 = np.mean(gradient_w1, axis=0)
gradient_w1 = gradient_w1[:, np.newaxis].T
gradient_w1 = np.dot(self.w2, gradient_w1).T
gradient_b1 = (self.x3 - y) * 2
gradient_b1 = np.mean(gradient_b1)
gradient_b1 = gradient_b1 * self.w2.T
# 更新参数
self.w1 -= learn_rate * gradient_w1
self.b1 -= learn_rate * gradient_b1
self.w2 -= learn_rate * gradient_w2
self.b2 -= learn_rate * gradient_b2
def train(self, x, y, iterations=50, learn_rate=0.01):
losses = []
data_size = x.shape[0]
batch_size = 50
# 每次取batch_size个训练
for i in range(iterations):
for k in range(0, data_size, batch_size):
mini_x = x[k: k+batch_size, :]
mini_y = y[k: k+batch_size, :]
self.forward(mini_x)
l = self.loss(mini_y)
self.update(mini_x, mini_y, learn_rate)
losses.append(l)
return losses
训练并画出损失值下降图
# 构建模型
train_x = train_data[:, :-1]
train_y = train_data[:, -1:]
network = Network()
# 迭代20次
iterations = 20
learn_rate = 0.01
losses = network.train(train_x, train_y, iterations, learn_rate)
# 画梯度下降图
plot_x = np.arange(len(losses))
plot_y = np.array(losses)
plt.plot(plot_x, plot_y)
plt.show()
测试集测试
# 测试集测试
test_x = test_data[:, :-1]
test_y = test_data[:, -1:]
predict_y = network.forward(test_x)
# 展平并反归一化
scale_factor = maximums[-1] - minimums[-1]
test_y = test_y.reshape(-1) * scale_factor + minimums[-1]
predict_y = predict_y.reshape(-1) * scale_factor + minimums[-1]
# 排序并画图
fig, ax = plt.subplots()
plot_test_x = np.arange(test_x.shape[0])
ax.plot(plot_test_x, np.sort(test_y), label='real')
ax.plot(plot_test_x, np.sort(predict_y), label='predict')
ax.legend()
plt.show()
模型预测与实际值的对比图如下:
存在的问题
模型的预测并不准确且泛化性不好,可能是每层都没激活函数的缘故。
# 预测函数 输入x1, x2 返回预测y
def predict_function(x1, x2):
input_x = np.array([x1, x2])
input_x = input_x.reshape(1, 2)
# 归一化
for i in range(2):
input_x[:, i] = (input_x[:, i] - minimums[i]) / (maximums[i] - minimums[i])
output_y = network.forward(input_x)
# 反归一化
scale_factor = maximums[-1] - minimums[-1]
output_y = output_y.reshape(-1)[0] * scale_factor + minimums[-1]
return output_y
print(predict_function(20, 400))
# 288.02699335793426 != 300
完整代码
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# y = x1^2 + x2 + 100
# 生成训练数据
def generate_data(size=1000, seed_num=7):
x1 = np.linspace(10, 20, size)
x2 = np.linspace(100, 400, size)
y = x1 ** 2 + x2 + 100
# 绘制3d图
# figure = plt.figure()
# ax = Axes3D(figure)
# xx1, xx2 = np.meshgrid(x1, x2)
# yy = xx1 ** 2 + xx2 + 100
# ax.plot_surface(xx1, xx2, yy, cmap="rainbow")
# plt.show()
# 返回组合后的数据
data = np.vstack((x1, x2, y)).T
return data
class Network(object):
def __init__(self, seed_num=7):
# 构建 2 2 1 型神经网络
# 随机生成参数
np.random.seed(seed_num)
self.w1 = np.random.randn(2, 2)
self.b1 = np.random.randn(1, 2)
self.w2 = np.random.randn(2, 1)
self.b2 = np.random.randn(1, 1)
def forward(self, x):
# 前向计算
self.x2 = np.dot(x, self.w1) + self.b1
self.x3 = np.dot(self.x2, self.w2) + self.b2
return self.x3
def loss(self, y):
# 使用均方误差作为损失函数
error = self.x3 - y
cost = np.power(error, 2)
return np.mean(cost)
def update(self, x, y, learn_rate):
# 用numpy并行计算w2中每个分量的误差偏导,注意是*而不是dot
gradient_w2 = (self.x3 - y) * self.x2 * 2
# 求和 + 平均,使每组输入都对w2产生作用
gradient_w2 = np.mean(gradient_w2, axis=0)
gradient_w2 = gradient_w2[:, np.newaxis]
gradient_b2 = (self.x3 - y) * 2
gradient_b2 = np.mean(gradient_b2)
gradient_w1 = (self.x3 - y) * 2 * x
gradient_w1 = np.mean(gradient_w1, axis=0)
gradient_w1 = gradient_w1[:, np.newaxis].T
gradient_w1 = np.dot(self.w2, gradient_w1).T
gradient_b1 = (self.x3 - y) * 2
gradient_b1 = np.mean(gradient_b1)
gradient_b1 = gradient_b1 * self.w2.T
# 更新参数
self.w1 -= learn_rate * gradient_w1
self.b1 -= learn_rate * gradient_b1
self.w2 -= learn_rate * gradient_w2
self.b2 -= learn_rate * gradient_b2
def train(self, x, y, iterations=50, learn_rate=0.01):
losses = []
data_size = x.shape[0]
batch_size = 50
# 每次取batch_size个训练
for i in range(iterations):
for k in range(0, data_size, batch_size):
mini_x = x[k: k + batch_size, :]
mini_y = y[k: k + batch_size, :]
self.forward(mini_x)
l = self.loss(mini_y)
self.update(mini_x, mini_y, learn_rate)
losses.append(l)
return losses
if __name__ == '__main__':
# 生成数据 shape = (1000, 3)
data = generate_data(1000)
# 数据归一化
maximums = np.max(data, axis=0)
minimums = np.min(data, axis=0)
for i in range(data.shape[1]):
data[:, i] = (data[:, i] - minimums[i]) / (maximums[i] - minimums[i])
# 打乱数据,前一百条作为测试集,剩下为训练集
np.random.shuffle(data)
test_data = data[:100, :]
train_data = data[100:, :]
# 构建模型
train_x = train_data[:, :-1]
train_y = train_data[:, -1:]
network = Network()
iterations = 20
learn_rate = 0.01
losses = network.train(train_x, train_y, iterations, learn_rate)
# 画梯度下降图
# plot_x = np.arange(len(losses))
# plot_y = np.array(losses)
# plt.plot(plot_x, plot_y)
# plt.show()
# # 测试集测试
test_x = test_data[:, :-1]
test_y = test_data[:, -1:]
predict_y = network.forward(test_x)
# 展平并反归一化
scale_factor = maximums[-1] - minimums[-1]
test_y = test_y.reshape(-1) * scale_factor + minimums[-1]
predict_y = predict_y.reshape(-1) * scale_factor + minimums[-1]
# 排序并画图
fig, ax = plt.subplots()
plot_test_x = np.arange(test_x.shape[0])
ax.plot(plot_test_x, np.sort(test_y), label='real')
ax.plot(plot_test_x, np.sort(predict_y), label='predict')
ax.legend()
plt.show()
# 预测函数 输入x1, x2 返回预测y
def predict_function(x1, x2):
input_x = np.array([x1, x2])
input_x = input_x.reshape(1, 2)
# 归一化
for i in range(2):
input_x[:, i] = (input_x[:, i] - minimums[i]) / (maximums[i] - minimums[i])
output_y = network.forward(input_x)
# 反归一化
scale_factor = maximums[-1] - minimums[-1]
output_y = output_y.reshape(-1)[0] * scale_factor + minimums[-1]
return output_y
print(predict_function(20, 400))
# 288.02699335793426 != 300