手写两层神经网络来拟合函数y = x1^2 + x2 + 100 (1)

手写神经网络来拟合函数y = x1^2 + x2 + 100 (1)

摘要

本文首先生成数据,然后搭建包含一层隐藏层的神经网络训练数据,最后利用测试数据评价模型的拟合效果。本文没有讲神经网络的原理以及推导过程,适合读过神经网络原理,想要动手实现的同学参考。

数据生成

import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


# y = x1^2 + x2 + 100
# 生成训练数据
def generate_data(size=1000, seed_num=7):
    x1 = np.linspace(10, 20, size)
    x2 = np.linspace(100, 400, size)
    y = x1 ** 2 + x2 + 100
    # 绘制3d图
    # figure = plt.figure()
    # ax = Axes3D(figure)
    # xx1, xx2 = np.meshgrid(x1, x2)
    # yy = xx1 ** 2 + xx2 + 100
    # ax.plot_surface(xx1, xx2, yy, cmap="rainbow")
    # plt.show()
    # 返回组合后的数据
    data = np.vstack((x1, x2, y)).T
    return data

随机生成1000组(x1, x2, y),其中y = x1^2 + x2 + 100,其3D分布图如下所示:

在这里插入图片描述

搭建网络模型

本文将搭建221型神经网络,来学习y = x1^2 + x2 + 100,其示意图如下:

在这里插入图片描述

网络使用均方误差作为损失函数,分批次训练。代码如下:

class Network(object):
    def __init__(self, seed_num=7):
        # 构建 2 2 1 型神经网络
        # 随机生成参数
        np.random.seed(seed_num)
        self.w1 = np.random.randn(2, 2)
        self.b1 = np.random.randn(1, 2)
        self.w2 = np.random.randn(2, 1)
        self.b2 = np.random.randn(1, 1)

    def forward(self, x):
        # 前向计算
        self.x2 = np.dot(x, self.w1) + self.b1
        self.x3 = np.dot(self.x2, self.w2) + self.b2
        return self.x3

    def loss(self, y):
        # 使用均方误差作为损失函数
        error = self.x3 - y
        cost = np.power(error, 2)
        return np.mean(cost)

    def update(self, x, y, learn_rate):
        # 用numpy并行计算w2中每个分量的误差偏导,注意是*而不是dot
        gradient_w2 = (self.x3 - y) * self.x2 * 2
        # 求和 + 平均,使每组输入都对w2产生作用
        gradient_w2 = np.mean(gradient_w2, axis=0)
        gradient_w2 = gradient_w2[:, np.newaxis]

        gradient_b2 = (self.x3 - y) * 2
        gradient_b2 = np.mean(gradient_b2)

        gradient_w1 = (self.x3 - y) * 2 * x
        gradient_w1 = np.mean(gradient_w1, axis=0)
        gradient_w1 = gradient_w1[:, np.newaxis].T
        gradient_w1 = np.dot(self.w2, gradient_w1).T
        
        gradient_b1 = (self.x3 - y) * 2
        gradient_b1 = np.mean(gradient_b1)
        gradient_b1 = gradient_b1 * self.w2.T
        # 更新参数
        self.w1 -= learn_rate * gradient_w1
        self.b1 -= learn_rate * gradient_b1
        self.w2 -= learn_rate * gradient_w2
        self.b2 -= learn_rate * gradient_b2

    def train(self, x, y, iterations=50, learn_rate=0.01):
        losses = []
        data_size = x.shape[0]
        batch_size = 50
        # 每次取batch_size个训练
        for i in range(iterations):
            for k in range(0, data_size, batch_size):
                mini_x = x[k: k+batch_size, :]
                mini_y = y[k: k+batch_size, :]
                self.forward(mini_x)
                l = self.loss(mini_y)
                self.update(mini_x, mini_y, learn_rate)
                losses.append(l)
        return losses

训练并画出损失值下降图

# 构建模型
train_x = train_data[:, :-1]
train_y = train_data[:, -1:]
network = Network()
# 迭代20次
iterations = 20
learn_rate = 0.01
losses = network.train(train_x, train_y, iterations, learn_rate)

# 画梯度下降图
plot_x = np.arange(len(losses))
plot_y = np.array(losses)
plt.plot(plot_x, plot_y)
plt.show()

在这里插入图片描述

测试集测试

# 测试集测试
test_x = test_data[:, :-1]
test_y = test_data[:, -1:]
predict_y = network.forward(test_x)
# 展平并反归一化
scale_factor = maximums[-1] - minimums[-1]
test_y = test_y.reshape(-1) * scale_factor + minimums[-1]
predict_y = predict_y.reshape(-1) * scale_factor + minimums[-1]
# 排序并画图
fig, ax = plt.subplots()
plot_test_x = np.arange(test_x.shape[0])
ax.plot(plot_test_x, np.sort(test_y), label='real')
ax.plot(plot_test_x, np.sort(predict_y), label='predict')
ax.legend()
plt.show()

模型预测与实际值的对比图如下:

在这里插入图片描述

存在的问题

模型的预测并不准确且泛化性不好,可能是每层都没激活函数的缘故。

# 预测函数 输入x1, x2 返回预测y
def predict_function(x1, x2):
    input_x = np.array([x1, x2])
    input_x = input_x.reshape(1, 2)
    # 归一化
    for i in range(2):
        input_x[:, i] = (input_x[:, i] - minimums[i]) / (maximums[i] - minimums[i])
    output_y = network.forward(input_x)
    # 反归一化
    scale_factor = maximums[-1] - minimums[-1]
    output_y = output_y.reshape(-1)[0] * scale_factor + minimums[-1]
    return output_y

print(predict_function(20, 400))
# 288.02699335793426 != 300

完整代码

import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


# y = x1^2 + x2 + 100
# 生成训练数据
def generate_data(size=1000, seed_num=7):
    x1 = np.linspace(10, 20, size)
    x2 = np.linspace(100, 400, size)
    y = x1 ** 2 + x2 + 100
    # 绘制3d图
    # figure = plt.figure()
    # ax = Axes3D(figure)
    # xx1, xx2 = np.meshgrid(x1, x2)
    # yy = xx1 ** 2 + xx2 + 100
    # ax.plot_surface(xx1, xx2, yy, cmap="rainbow")
    # plt.show()
    # 返回组合后的数据
    data = np.vstack((x1, x2, y)).T
    return data


class Network(object):
    def __init__(self, seed_num=7):
        # 构建 2 2 1 型神经网络
        # 随机生成参数
        np.random.seed(seed_num)
        self.w1 = np.random.randn(2, 2)
        self.b1 = np.random.randn(1, 2)
        self.w2 = np.random.randn(2, 1)
        self.b2 = np.random.randn(1, 1)

    def forward(self, x):
        # 前向计算
        self.x2 = np.dot(x, self.w1) + self.b1
        self.x3 = np.dot(self.x2, self.w2) + self.b2
        return self.x3

    def loss(self, y):
        # 使用均方误差作为损失函数
        error = self.x3 - y
        cost = np.power(error, 2)
        return np.mean(cost)

    def update(self, x, y, learn_rate):
        # 用numpy并行计算w2中每个分量的误差偏导,注意是*而不是dot
        gradient_w2 = (self.x3 - y) * self.x2 * 2
        # 求和 + 平均,使每组输入都对w2产生作用
        gradient_w2 = np.mean(gradient_w2, axis=0)
        gradient_w2 = gradient_w2[:, np.newaxis]

        gradient_b2 = (self.x3 - y) * 2
        gradient_b2 = np.mean(gradient_b2)

        gradient_w1 = (self.x3 - y) * 2 * x
        gradient_w1 = np.mean(gradient_w1, axis=0)
        gradient_w1 = gradient_w1[:, np.newaxis].T
        gradient_w1 = np.dot(self.w2, gradient_w1).T

        gradient_b1 = (self.x3 - y) * 2
        gradient_b1 = np.mean(gradient_b1)
        gradient_b1 = gradient_b1 * self.w2.T
        # 更新参数
        self.w1 -= learn_rate * gradient_w1
        self.b1 -= learn_rate * gradient_b1
        self.w2 -= learn_rate * gradient_w2
        self.b2 -= learn_rate * gradient_b2

    def train(self, x, y, iterations=50, learn_rate=0.01):
        losses = []
        data_size = x.shape[0]
        batch_size = 50
        # 每次取batch_size个训练
        for i in range(iterations):
            for k in range(0, data_size, batch_size):
                mini_x = x[k: k + batch_size, :]
                mini_y = y[k: k + batch_size, :]
                self.forward(mini_x)
                l = self.loss(mini_y)
                self.update(mini_x, mini_y, learn_rate)
                losses.append(l)
        return losses


if __name__ == '__main__':
    # 生成数据 shape = (1000, 3)
    data = generate_data(1000)
    # 数据归一化
    maximums = np.max(data, axis=0)
    minimums = np.min(data, axis=0)
    for i in range(data.shape[1]):
        data[:, i] = (data[:, i] - minimums[i]) / (maximums[i] - minimums[i])
    # 打乱数据,前一百条作为测试集,剩下为训练集
    np.random.shuffle(data)
    test_data = data[:100, :]
    train_data = data[100:, :]

    # 构建模型
    train_x = train_data[:, :-1]
    train_y = train_data[:, -1:]
    network = Network()
    iterations = 20
    learn_rate = 0.01
    losses = network.train(train_x, train_y, iterations, learn_rate)

    # 画梯度下降图
    # plot_x = np.arange(len(losses))
    # plot_y = np.array(losses)
    # plt.plot(plot_x, plot_y)
    # plt.show()

    # # 测试集测试
    test_x = test_data[:, :-1]
    test_y = test_data[:, -1:]
    predict_y = network.forward(test_x)
    # 展平并反归一化
    scale_factor = maximums[-1] - minimums[-1]
    test_y = test_y.reshape(-1) * scale_factor + minimums[-1]
    predict_y = predict_y.reshape(-1) * scale_factor + minimums[-1]
    # 排序并画图
    fig, ax = plt.subplots()
    plot_test_x = np.arange(test_x.shape[0])
    ax.plot(plot_test_x, np.sort(test_y), label='real')
    ax.plot(plot_test_x, np.sort(predict_y), label='predict')
    ax.legend()
    plt.show()

    # 预测函数 输入x1, x2 返回预测y
    def predict_function(x1, x2):
        input_x = np.array([x1, x2])
        input_x = input_x.reshape(1, 2)
        # 归一化
        for i in range(2):
            input_x[:, i] = (input_x[:, i] - minimums[i]) / (maximums[i] - minimums[i])
        output_y = network.forward(input_x)
        # 反归一化
        scale_factor = maximums[-1] - minimums[-1]
        output_y = output_y.reshape(-1)[0] * scale_factor + minimums[-1]
        return output_y

    print(predict_function(20, 400))
    # 288.02699335793426 != 300

参考链接

用python和numpy实现神经网络

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值