使用循环神经网络(RNN)实现简易的二进制加法器

使用循环神经网络(RNN)实现简易的二进制加法器

利用python实现简易的循环神经网络,并在一个小demo(8比特二进制加法器)上进行了验证,激活函数为logistic函数,利用反向传播算法进行训练。具体的算法原理以及公式推导可参考相关的文献:《深度学习》、Recurrent nets and LSTM等。

相关的源码以及数据集可在链接中进行下载。


rnn.py

# -*- coding: utf-8 -*-

"""
简易的RNN模型实现
"""
import numpy as np


class rnn():

    def __init__(self):
        self.eb = 200           # 误差容限
        self.eta = 0.0001       # 学习率
        self.maxiter = 2000     # 最大迭代次数
        self.errlist = []       # 误差列表
        self.acclist = []       # 测试精度
        self.data = None        # 数据集
        self.label = None       # 标记集
        self.nSampNum = 0       # 样本集行数
        self.nSampDim = 0       # 样本维度
        self.nHidden = 16       # 隐含层神经元
        self.nOut = 1           # 输出层
        self.iterator = 0       # 最优时迭代次数
        self.hide_wb = None     # 隐含层模型参数
        self.tseq_wb = None     # 时序模型参数
        self.out_wb = None      # 输出层模型参数
        self.time = 0

    def fit(self, X, y, Xx, Yy):
        self.nSampNum, self.nSampDim = np.shape(X[0])
        self.time = len(y)
        self.data = np.copy(X)
        self.label = np.copy(y)
        self.hide_wb = 2 * np.random.random((self.nSampDim, self.nHidden)) - 1
        self.out_wb = 2 * np.random.random((self.nHidden, self.nOut)) - 1
        self.tseq_wb = 2 * np.random.random((self.nHidden, self.nHidden)) - 1

        for iter in range(self.maxiter):
            value_out = np.zeros((self.time, self.nSampNum, self.nOut))             # 存储各个时间节点输出层的输出值
            value_hide = np.zeros((self.time+1, self.nSampNum, self.nHidden))       # 存储各个时间节点隐含层的输出值,最后一个时间点作为初始值
            value_loss = np.zeros((self.time, self.nSampNum))                       # 存储各个时间点的误差矩阵

            # 正向传播
            for _t in range(self.time):
                t = self.time - _t - 1       # 从后面的时间节点往前传
                hide_output = self.sigmoid(self.data[t].dot(self.hide_wb) + value_hide[t+1, :, :].dot(self.tseq_wb))
                value_hide[t, :, :] = hide_output
                out_output = self.sigmoid(hide_output.dot(self.out_wb))
                value_out[t, :, :] = out_output

                loss = self.label[t] - out_output.T
                value_loss[t, :] = np.sum(loss, axis=0)
            if self.eb > self.errorfunc(value_loss) or iter == self.maxiter-1:
                self.iterator = iter
                break
            self.errlist.append(self.errorfunc(value_loss))

            pre = self.predict(Xx)
            self.acclist.append(np.sum(pre == Yy) / np.size(pre))
            print('******** iter:', '%4i' % iter, '******** accuracy:', '%.5f' % self.acclist[iter],
                  '******** loss:', '%5f' % self.errlist[iter])
            # 反向传播
            for t in range(self.time):
                # 输出层梯度
                delta_out = np.multiply(value_loss[t, :], self.dlogit(value_out[t, :, :]).T)
                # 隐藏层梯度
                delta_hide = np.multiply(self.out_wb.dot(delta_out), self.dlogit(value_hide[t, :, :]).T)
                # 时序权重梯度
                delta_tseq = np.dot(delta_hide, value_hide[t+1, :, :])

                # 更新梯度权重值
                self.out_wb += self.eta * delta_out.dot(value_hide[t, :, :]).T
                self.hide_wb += self.eta * delta_hide.dot(self.data[t, :, :]).T
                self.tseq_wb += self.eta * delta_tseq.T

    def predict(self, X):
        assert len(X) == self.time
        n, d = np.shape(X[0])
        value_out = np.zeros((self.time, n, self.nOut))  # 存储各个时间节点输出层的输出值
        value_hide = np.zeros((self.time + 1, n, self.nHidden))  # 存储各个时间节点隐含层的输出值,最后一个时间点作为初始值
        for _t in range(self.time):
            t = self.time - _t - 1  # 从后面的时间节点往前传
            hide_output = self.sigmoid(X[t].dot(self.hide_wb) + value_hide[t + 1, :, :].dot(self.tseq_wb))
            value_hide[t, :, :] = hide_output
            out_output = self.sigmoid(hide_output.dot(self.out_wb))
            value_out[t, :, :] = out_output
        pre = 1*(np.sum(np.signbit(-value_out+0.5), axis=2) > self.nOut/2)
        return pre      # 一列为一个样本,一行为预测的序列

    # sigmoid 函数
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    # sigmoid 导函数
    def dlogit(self, output):
        return output * (1 - output)

    # 矩阵各元素平方之和
    def errorfunc(self, inX):
        return np.sum(np.power(inX, 2)) * 0.5

    def TrendLine(self, plt, color='r', type='loss'):
        X = np.linspace(0, self.iterator, self.iterator)
        if type == 'loss':
            Y = np.log2(self.errlist)
        else:
            Y = np.array(self.acclist)
            color = 'g'
        plt.xlabel("iteration")
        plt.ylabel(type)
        plt.plot(X, Y, color)

rnn_test.py

# -*- coding: utf-8 -*-

"""
利用simple-RNN实现简易的二进制加法器
具体功能为:输入两个数的二进制编码,输出两个数之和的二进制表示
"""

from rnn import rnn
import numpy as np
import matplotlib.pyplot as plt


if __name__ == '__main__':
    data = np.load("dataset.npz")
    X = data['data']
    y = data['label']
    sip_rnn = rnn()


    testset = np.load('testset.npz')
    sip_rnn.fit(X, y, testset['data'], testset['label'])

    pre = sip_rnn.predict(testset['data'])
    print('accuracy:', np.sum(pre == testset['label'])/np.size(pre))

    fig = plt.figure()
    ax1 = fig.add_subplot(211)
    sip_rnn.TrendLine(plt, type='accuray')
    ax2 = fig.add_subplot(212)
    sip_rnn.TrendLine(plt)
    plt.savefig('rnn_info.png')
    plt.show()

datasetgen.py

# -*- coding: utf-8 -*-

"""
生成训练样本
"""
import numpy as np


def binary_dict(dim=8):
    """
    :param dim: 二进制的位数
    :return: 十进制数对应二进制数的字典
    """
    up_bound = pow(2, dim)
    int2bindic = {}
    binary = np.unpackbits(np.array([range(up_bound)], dtype=np.uint8).T, axis=1)
    for i in range(up_bound):
        int2bindic[i] = binary[i]
    return int2bindic


def train_set_gen(n=10000, dim=8, name='dataset'):
    """
    :param n: 训练样本个数
    :param dim: 二进制位数
    :return:
    """
    up_bound = pow(2, dim)
    data, label = [], []
    x1 = np.random.randint(up_bound / 2, size=(n, 1), dtype=np.uint8)  # 十进制
    x2 = np.random.randint(up_bound / 2, size=(n, 1), dtype=np.uint8)
    X1 = np.unpackbits(x1, axis=1)                                     # 二进制
    X2 = np.unpackbits(x2, axis=1)
    Y = np.unpackbits(x1+x2, axis=1)

    for i in range(dim):
        data.append(np.stack((X1[:, i], X2[:, i]), axis=1))
        label.append(Y[:, i])
    np.savez(name+".npz", data=data, label=label)

train_set_gen()
train_set_gen(n=100, name='testset')

实验效果图
rnn_info

好的,我可以为你提供一个简单的 C# 实现 RNN 二进制加法的代码示例。在这个示例中,我们将使用 RNN 来学习如何将两个二进制数相加。 首先,我们需要导入以下命名空间: ```csharp using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; ``` 然后,我们定义一些常量和变量: ```csharp const int INPUT_SIZE = 2; const int HIDDEN_SIZE = 16; const int OUTPUT_SIZE = 1; const int ITERATIONS = 10000; const double LEARNING_RATE = 0.1; double[,] X = new double[,] { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }; double[,] Y = new double[,] { { 0 }, { 1 }, { 1 }, { 0 } }; double[,] Wxh = new double[INPUT_SIZE, HIDDEN_SIZE]; double[,] Whh = new double[HIDDEN_SIZE, HIDDEN_SIZE]; double[,] Why = new double[HIDDEN_SIZE, OUTPUT_SIZE]; double[,] bh = new double[1, HIDDEN_SIZE]; double[,] by = new double[1, OUTPUT_SIZE]; double[,] h = new double[1, HIDDEN_SIZE]; double[,] y = new double[1, OUTPUT_SIZE]; ``` 接下来,我们定义一些函数来执行矩阵乘法和激活函数: ```csharp static double[,] Dot(double[,] a, double[,] b) { int rows = a.GetLength(0); int cols = b.GetLength(1); double[,] c = new double[rows, cols]; for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { double sum = 0.0; for (int k = 0; k < a.GetLength(1); k++) { sum += a[i, k] * b[k, j]; } c[i, j] = sum; } } return c; } static double[,] Sigmoid(double[,] a) { int rows = a.GetLength(0); int cols = a.GetLength(1); double[,] b = new double[rows, cols]; for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { b[i, j] = 1.0 / (1.0 + Math.Exp(-a[i, j])); } } return b; } ``` 然后,我们初始化权重和偏差: ```csharp Random rand = new Random(); for (int i = 0; i < INPUT_SIZE; i++) { for (int j = 0; j < HIDDEN_SIZE; j++) { Wxh[i, j] = rand.NextDouble() - 0.5; } } for (int i = 0; i < HIDDEN_SIZE; i++) { for (int j = 0; j < HIDDEN_SIZE; j++) { Whh[i, j] = rand.NextDouble() - 0.5; } } for (int i = 0; i < HIDDEN_SIZE; i++) { for (int j = 0; j < OUTPUT_SIZE; j++) { Why[i, j] = rand.NextDouble() - 0.5; } } for (int i = 0; i < 1; i++) { for (int j = 0; j < HIDDEN_SIZE; j++) { bh[i, j] = rand.NextDouble() - 0.5; } } for (int i = 0; i < 1; i++) { for (int j = 0; j < OUTPUT_SIZE; j++) { by[i, j] = rand.NextDouble() - 0.5; } } ``` 现在,我们可以开始训练 RNN 模型。在每次迭代中,我们将输入两个二进制数并计算输出结果。然后,我们使用反向传播算法来更新权重和偏差: ```csharp for (int iter = 0; iter < ITERATIONS; iter++) { double loss = 0.0; for (int i = 0; i < X.GetLength(0); i++) { // forward pass h = Sigmoid(Dot(X.GetRow(i), Wxh) + Dot(h, Whh) + bh); y = Sigmoid(Dot(h, Why) + by); // calculate loss loss += Math.Pow(Y[i, 0] - y[0, 0], 2); // backward pass double[,] dy = new double[1, OUTPUT_SIZE]; dy[0, 0] = y[0, 0] - Y[i, 0]; double[,] dh = Dot(dy, Why.Transpose()) * h * (1 - h); double[,] dWhy = Dot(h.Transpose(), dy); double[,] dWxh = Dot(X.GetRow(i).Transpose(), dh); double[,] dWhh = Dot(h.Transpose(), dh) + Dot(h.Transpose(), dh).Transpose(); double[,] dby = dy; double[,] dbh = dh; // update weights and biases Why -= LEARNING_RATE * dWhy; Wxh -= LEARNING_RATE * dWxh; Whh -= LEARNING_RATE * dWhh; by -= LEARNING_RATE * dby; bh -= LEARNING_RATE * dbh; } loss /= X.GetLength(0); if (iter % 1000 == 0) { Console.WriteLine($"Iteration: {iter}, Loss: {loss}"); } } ``` 最后,我们可以使用训练好的模型来计算两个二进制数的和: ```csharp double[,] x1 = new double[,] { { 0, 0 } }; double[,] x2 = new double[,] { { 0, 1 } }; double[,] x3 = new double[,] { { 1, 0 } }; double[,] x4 = new double[,] { { 1, 1 } }; double[,] h1 = new double[1, HIDDEN_SIZE]; double[,] h2 = new double[1, HIDDEN_SIZE]; double[,] h3 = new double[1, HIDDEN_SIZE]; double[,] h4 = new double[1, HIDDEN_SIZE]; double[,] y1 = new double[1, OUTPUT_SIZE]; double[,] y2 = new double[1, OUTPUT_SIZE]; double[,] y3 = new double[1, OUTPUT_SIZE]; double[,] y4 = new double[1, OUTPUT_SIZE]; h1 = Sigmoid(Dot(x1, Wxh) + Dot(h1, Whh) + bh); y1 = Sigmoid(Dot(h1, Why) + by); h2 = Sigmoid(Dot(x2, Wxh) + Dot(h2, Whh) + bh); y2 = Sigmoid(Dot(h2, Why) + by); h3 = Sigmoid(Dot(x3, Wxh) + Dot(h3, Whh) + bh); y3 = Sigmoid(Dot(h3, Why) + by); h4 = Sigmoid(Dot(x4, Wxh) + Dot(h4, Whh) + bh); y4 = Sigmoid(Dot(h4, Why) + by); Console.WriteLine($"0 + 0 = {Math.Round(y1[0, 0])}"); Console.WriteLine($"0 + 1 = {Math.Round(y2[0, 0])}"); Console.WriteLine($"1 + 0 = {Math.Round(y3[0, 0])}"); Console.WriteLine($"1 + 1 = {Math.Round(y4[0, 0])}"); ``` 这个程序的输出应该是: ``` 0 + 0 = 0 0 + 1 = 1 1 + 0 = 1 1 + 1 = 0 ``` 这就是使用 RNN 实现二进制加法的简单示例。希望这可以帮助你理解 RNN 的工作原理。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值