RNN模拟二进制加法

参考trask大神的文章:https://iamtrask.github.io/2015/11/15/anyone-can-code-lstm

使用numpy实现循环神经网络,以模拟二进制加法为例

"""
CreateTime    : 2019/3/5 21:53
Author        : Aaron
Filename      : RNN_binary_addition.py
"""
import copy
import numpy as np

np.random.seed(3)


def sigmoid(x, derivative=False):
    """non-line function,return derivative when derivative is True"""
    if derivative:
        return x * (1 - x)
    return 1 / (1 + np.exp(-x))


def train():
    # training dataset generation
    int2binary = {}
    binary_dim = 8
    largest_number = pow(2, binary_dim)
    binary = np.unpackbits(
        np.array([range(largest_number)], dtype=np.uint8).T, axis=1)
    for i in range(largest_number):
        int2binary[i] = binary[i]

    # input variables
    alpha = 0.1
    input_dim = 2
    hidden_dim = 16
    output_dim = 1

    # initialize neural network weights and update
    synapse_0 = 2 * np.random.random((input_dim, hidden_dim)) - 1
    synapse_1 = 2 * np.random.random((hidden_dim, output_dim)) - 1
    synapse_h = 2 * np.random.random((hidden_dim, hidden_dim)) - 1

    synapse_0_update = np.zeros_like(synapse_0)
    synapse_1_update = np.zeros_like(synapse_1)
    synapse_h_update = np.zeros_like(synapse_h)

    # training logic
    for j in range(10000):

        # generate a simple addition problem (a + b = c)
        a_int = np.random.randint(largest_number / 2)  # int version
        a = int2binary[a_int]  # binary encoding

        b_int = np.random.randint(largest_number / 2)  # int version
        b = int2binary[b_int]  # binary encoding

        # true answer
        c_int = a_int + b_int
        c = int2binary[c_int]

        # where we'll store our best guess (binary encoded)
        d = np.zeros_like(c)

        overall_error = 0

        layer_2_deltas = list()
        layer_1_values = list()
        layer_1_values.append(np.zeros(hidden_dim))

        # moving along the positions in the binary encoding
        for position in range(binary_dim):
            # generate input and output
            X = np.array([[a[binary_dim - position - 1],
                           b[binary_dim - position - 1]]])
            y = np.array([[c[binary_dim - position - 1]]]).T

            # hidden layer (input ~+ prev_hidden)
            layer_1 = sigmoid(np.dot(X, synapse_0) +
                              np.dot(layer_1_values[-1], synapse_h))

            # output layer (new binary representation)
            layer_2 = sigmoid(np.dot(layer_1, synapse_1))

            # did we miss?... if so, by how much?
            layer_2_error = y - layer_2
            layer_2_deltas.append((layer_2_error) * sigmoid(layer_2, True))
            overall_error += np.abs(layer_2_error[0])

            # decode estimate so we can print it out
            d[binary_dim - position - 1] = np.round(layer_2[0][0])

            # store hidden layer so we can use it in the next time_step
            layer_1_values.append(copy.deepcopy(layer_1))

        future_layer_1_delta = np.zeros(hidden_dim)

        for position in range(binary_dim):
            X = np.array([[a[position], b[position]]])
            layer_1 = layer_1_values[-position - 1]
            prev_layer_1 = layer_1_values[-position - 2]

            # error at output layer
            layer_2_delta = layer_2_deltas[-position - 1]
            # error at hidden layer
            layer_1_delta = (future_layer_1_delta.dot(synapse_h.T) +
                             layer_2_delta.dot(synapse_1.T)) * sigmoid(layer_1,
                                                                       True)

            # let's update all our weights so we can try again
            synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
            synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
            synapse_0_update += X.T.dot(layer_1_delta)

            future_layer_1_delta = layer_1_delta

        synapse_0 += synapse_0_update * alpha
        synapse_1 += synapse_1_update * alpha
        synapse_h += synapse_h_update * alpha

        synapse_0_update *= 0
        synapse_1_update *= 0
        synapse_h_update *= 0

        # print out progress
        if j % 1000 == 0:
            print("Error:" + str(overall_error))
            print("Pred:" + str(d))
            print("True:" + str(c))
            out = 0
            for index, value in enumerate(reversed(d)):
                out += value * pow(2, index)
            print(str(a_int) + " + " + str(b_int) + " = " + str(out))
            print("------------")


if __name__ == '__main__':
    train()

结果如下

Error:[3.67776144]
Pred:[0 1 0 0 0 0 1 0]
True:[0 1 1 0 0 0 0 0]
66 + 30 = 66
------------
Error:[4.01601049]
Pred:[1 1 1 1 1 1 1 1]
True:[0 0 1 1 0 0 1 1]
50 + 1 = 255
------------
Error:[4.16581017]
Pred:[0 0 0 0 0 0 0 0]
True:[1 1 0 1 1 1 1 0]
118 + 104 = 0
------------
Error:[3.97075811]
Pred:[0 0 0 0 0 0 0 0]
True:[0 1 0 1 0 1 0 1]
29 + 56 = 0
------------
Error:[4.64590095]
Pred:[1 1 1 1 0 1 0 0]
True:[1 0 0 0 1 0 1 0]
123 + 15 = 244
------------
Error:[2.15875271]
Pred:[0 1 0 1 0 1 0 0]
True:[0 1 0 1 0 1 0 0]
32 + 52 = 84
------------
Error:[3.11152336]
Pred:[0 1 1 1 1 1 0 1]
True:[0 1 0 0 0 1 0 1]
62 + 7 = 125
------------
Error:[0.81364924]
Pred:[0 1 1 1 0 1 1 0]
True:[0 1 1 1 0 1 1 0]
83 + 35 = 118
------------
Error:[1.00536035]
Pred:[1 0 0 0 1 0 1 0]
True:[1 0 0 0 1 0 1 0]
95 + 43 = 138
------------
Error:[0.52282946]
Pred:[1 1 0 0 0 1 0 0]
True:[1 1 0 0 0 1 0 0]
82 + 114 = 196
------------

转载于:https://www.cnblogs.com/kangyuqi/p/10480414.html

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
好的,我可以为你提供一个简单的 C# 实现 RNN 二进制加法的代码示例。在这个示例中,我们将使用 RNN 来学习如何将两个二进制数相加。 首先,我们需要导入以下命名空间: ```csharp using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; ``` 然后,我们定义一些常量和变量: ```csharp const int INPUT_SIZE = 2; const int HIDDEN_SIZE = 16; const int OUTPUT_SIZE = 1; const int ITERATIONS = 10000; const double LEARNING_RATE = 0.1; double[,] X = new double[,] { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }; double[,] Y = new double[,] { { 0 }, { 1 }, { 1 }, { 0 } }; double[,] Wxh = new double[INPUT_SIZE, HIDDEN_SIZE]; double[,] Whh = new double[HIDDEN_SIZE, HIDDEN_SIZE]; double[,] Why = new double[HIDDEN_SIZE, OUTPUT_SIZE]; double[,] bh = new double[1, HIDDEN_SIZE]; double[,] by = new double[1, OUTPUT_SIZE]; double[,] h = new double[1, HIDDEN_SIZE]; double[,] y = new double[1, OUTPUT_SIZE]; ``` 接下来,我们定义一些函数来执行矩阵乘法和激活函数: ```csharp static double[,] Dot(double[,] a, double[,] b) { int rows = a.GetLength(0); int cols = b.GetLength(1); double[,] c = new double[rows, cols]; for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { double sum = 0.0; for (int k = 0; k < a.GetLength(1); k++) { sum += a[i, k] * b[k, j]; } c[i, j] = sum; } } return c; } static double[,] Sigmoid(double[,] a) { int rows = a.GetLength(0); int cols = a.GetLength(1); double[,] b = new double[rows, cols]; for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { b[i, j] = 1.0 / (1.0 + Math.Exp(-a[i, j])); } } return b; } ``` 然后,我们初始化权重和偏差: ```csharp Random rand = new Random(); for (int i = 0; i < INPUT_SIZE; i++) { for (int j = 0; j < HIDDEN_SIZE; j++) { Wxh[i, j] = rand.NextDouble() - 0.5; } } for (int i = 0; i < HIDDEN_SIZE; i++) { for (int j = 0; j < HIDDEN_SIZE; j++) { Whh[i, j] = rand.NextDouble() - 0.5; } } for (int i = 0; i < HIDDEN_SIZE; i++) { for (int j = 0; j < OUTPUT_SIZE; j++) { Why[i, j] = rand.NextDouble() - 0.5; } } for (int i = 0; i < 1; i++) { for (int j = 0; j < HIDDEN_SIZE; j++) { bh[i, j] = rand.NextDouble() - 0.5; } } for (int i = 0; i < 1; i++) { for (int j = 0; j < OUTPUT_SIZE; j++) { by[i, j] = rand.NextDouble() - 0.5; } } ``` 现在,我们可以开始训练 RNN 模型。在每次迭代中,我们将输入两个二进制数并计算输出结果。然后,我们使用反向传播算法来更新权重和偏差: ```csharp for (int iter = 0; iter < ITERATIONS; iter++) { double loss = 0.0; for (int i = 0; i < X.GetLength(0); i++) { // forward pass h = Sigmoid(Dot(X.GetRow(i), Wxh) + Dot(h, Whh) + bh); y = Sigmoid(Dot(h, Why) + by); // calculate loss loss += Math.Pow(Y[i, 0] - y[0, 0], 2); // backward pass double[,] dy = new double[1, OUTPUT_SIZE]; dy[0, 0] = y[0, 0] - Y[i, 0]; double[,] dh = Dot(dy, Why.Transpose()) * h * (1 - h); double[,] dWhy = Dot(h.Transpose(), dy); double[,] dWxh = Dot(X.GetRow(i).Transpose(), dh); double[,] dWhh = Dot(h.Transpose(), dh) + Dot(h.Transpose(), dh).Transpose(); double[,] dby = dy; double[,] dbh = dh; // update weights and biases Why -= LEARNING_RATE * dWhy; Wxh -= LEARNING_RATE * dWxh; Whh -= LEARNING_RATE * dWhh; by -= LEARNING_RATE * dby; bh -= LEARNING_RATE * dbh; } loss /= X.GetLength(0); if (iter % 1000 == 0) { Console.WriteLine($"Iteration: {iter}, Loss: {loss}"); } } ``` 最后,我们可以使用训练好的模型来计算两个二进制数的和: ```csharp double[,] x1 = new double[,] { { 0, 0 } }; double[,] x2 = new double[,] { { 0, 1 } }; double[,] x3 = new double[,] { { 1, 0 } }; double[,] x4 = new double[,] { { 1, 1 } }; double[,] h1 = new double[1, HIDDEN_SIZE]; double[,] h2 = new double[1, HIDDEN_SIZE]; double[,] h3 = new double[1, HIDDEN_SIZE]; double[,] h4 = new double[1, HIDDEN_SIZE]; double[,] y1 = new double[1, OUTPUT_SIZE]; double[,] y2 = new double[1, OUTPUT_SIZE]; double[,] y3 = new double[1, OUTPUT_SIZE]; double[,] y4 = new double[1, OUTPUT_SIZE]; h1 = Sigmoid(Dot(x1, Wxh) + Dot(h1, Whh) + bh); y1 = Sigmoid(Dot(h1, Why) + by); h2 = Sigmoid(Dot(x2, Wxh) + Dot(h2, Whh) + bh); y2 = Sigmoid(Dot(h2, Why) + by); h3 = Sigmoid(Dot(x3, Wxh) + Dot(h3, Whh) + bh); y3 = Sigmoid(Dot(h3, Why) + by); h4 = Sigmoid(Dot(x4, Wxh) + Dot(h4, Whh) + bh); y4 = Sigmoid(Dot(h4, Why) + by); Console.WriteLine($"0 + 0 = {Math.Round(y1[0, 0])}"); Console.WriteLine($"0 + 1 = {Math.Round(y2[0, 0])}"); Console.WriteLine($"1 + 0 = {Math.Round(y3[0, 0])}"); Console.WriteLine($"1 + 1 = {Math.Round(y4[0, 0])}"); ``` 这个程序的输出应该是: ``` 0 + 0 = 0 0 + 1 = 1 1 + 0 = 1 1 + 1 = 0 ``` 这就是使用 RNN 实现二进制加法的简单示例。希望这可以帮助你理解 RNN 的工作原理。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值