C# 手搓多分类BP神经网络_c# bp 神经网络-CSDN博客

本文链接：https://blog.csdn.net/qq_41099210/article/details/142712598

ps: 样本太少容易导致训练结果不稳定，预测数据的特征介于两种样本类型的中间时，也容易导致预测结果不稳定

public static class BP多分类神经网络
{
    public static void 测试()
    {
        double[,] 样本 = new double[,] {
            //数字 1
            { 0, 1, 0,
              0, 1, 0 ,
              0, 1, 0 },

            { 1, 1, 0,
              0, 1, 0,
              0, 1, 0  },

            { 0, 1, 0,
              0, 1, 0,
              1, 1, 1 },

            //数字 2
            { 1, 0, 0,
              0, 1, 0,
              0, 1, 1 },

            { 1, 1, 0,
              0, 1, 0,
              1, 1, 1 },

            { 1, 1, 0,
              0, 1, 0,
              0, 1, 1 }
        };
        int[] 类别向量 = [1, 1, 1, 2, 2, 2];

        var 模型 = Create模型(9, 2, 3, 10);
        训练(样本, 类别向量.map样本(2), 模型);
        //预测数据
        Matrix<double> newInput = Matrix<double>.Build.DenseOfArray(new double[,] {
            { 1, 0, 0,
              0, 0, 0,
              0, 1, 1 }
        });
        Matrix<double> prediction = 预测(newInput, 模型);
        // 打印预测结果  
        Debug.WriteLine("Prediction:");
        Debug.WriteLine(prediction.ToString());
    }
    public static Matrix<double> 预测(Matrix<double> input, List<Matrix<double>> 模型)
    {
        Matrix<double> L;
        L = input.Multiply(模型[0]).ELU();
        for (int i = 1; ; i++)
        {
            if (i == 模型.Count - 1)
            {
                L = L.Multiply(模型[i]).Softmax();
                break;
            }
            L = L.Multiply(模型[i]).ELU();
        }
        return L;
    }
    private static double[,] map样本(this int[] 类别s, int 类别数量)
    {
        int 样本数量 = 类别s.Length;
        double[,] 类别 = new double[样本数量, 类别数量];
        for (int i = 0; i < 样本数量; i++)
        {
            int 类别索引 = 类别s[i] - 1;
            if (类别索引 >= 0 && 类别索引 < 类别数量)
                类别[i, 类别索引] = 1;
            else
                throw new ArgumentException("类别索引超出范围");
        }
        return 类别;
    }
    private static Random rand = new((int)DateTime.Now.Ticks);
    public static void 训练(double[,] 样本, double[,] 类别, List<Matrix<double>> 模型)
    {
        Matrix<double> MatX = Matrix<double>.Build.DenseOfArray(样本);
        Matrix<double> MatY = Matrix<double>.Build.DenseOfArray(类别);

        int Lcount = 模型.Count;
        List<Matrix<double>> Ls = new(new Matrix<double>[Lcount]);
        List<Matrix<double>> L_Deltas = new(new Matrix<double>[Lcount]);
        List<Matrix<double>> L_Errs = new(new Matrix<double>[Lcount]);

        double 学习率 = 0.01;
        double 误差阈值 = 0.001;
        double 当前误差 = double.MaxValue;
        int 迭代次数 = 0;
        for (; 迭代次数 < 100000 && 当前误差 > 误差阈值; 迭代次数++)
        {
            Ls.前向传播(模型,
                (模型item) => MatX.Multiply(模型item).ELU(),
                (集合item, 模型item) => 集合item.Multiply(模型item).ELU(),
                (集合item, 模型item) => 集合item.Multiply(模型item).Softmax());

            L_Deltas.反向传播(Ls, 模型,
                (L_item) => MatY - L_item,
                (L_Delta_item, L_item, 模型item) =>
                L_Delta_item.Multiply(模型item.Transpose()).PointwiseMultiply(L_item.DerivativeELU()));

            //计算当前误差
            当前误差 = (MatY - Ls[^1]).L2Norm();

            模型.权重更新(Ls, L_Deltas,
              (L_Delta_item, L_item, 模型item) => (模型item + (L_item.Transpose().Multiply(L_Delta_item)) * 学习率),
               (L_Delta_item, 模型item) => (模型item + (MatX.Transpose().Multiply(L_Delta_item)) * 学习率));
        }
    }
    private delegate Matrix<double> 权重更新1(Matrix<double> L_Delta_item, Matrix<double> 模型item);
    private delegate Matrix<double> 权重更新2(Matrix<double> L_Delta_item, Matrix<double> L_item, Matrix<double> 模型item);
    private static void 权重更新(this List<Matrix<double>> 模型, List<Matrix<double>> Ls, List<Matrix<double>> L_Deltas,
        权重更新2 中间, 权重更新1 尾)
    {
        int leng = 模型.Count - 1;
        for (int i = leng; ; i--)
        {
            模型[i] = 中间(L_Deltas[i], Ls[i - 1], 模型[i]);
            if (i == 1)
            {
                模型[0] = 尾(L_Deltas[0], 模型[0]);
                break;
            }
        }
    }
    private delegate Matrix<double> 反向传播委托1(Matrix<double> L_item);
    private delegate Matrix<double> 反向传播委托2(Matrix<double> L_Delta_item, Matrix<double> L_item, Matrix<double> 模型item);
    private static void 反向传播(this List<Matrix<double>> L_Deltas, List<Matrix<double>> Ls, List<Matrix<double>> 模型,
        反向传播委托1 首, 反向传播委托2 中间)
    {
        int leng = 模型.Count - 1;
        L_Deltas[leng] = 首(Ls[leng]);
        for (int i = leng; ; i--)
        {
            L_Deltas[i - 1] = 中间(L_Deltas[i], Ls[i - 1], 模型[i]);
            if (i == 1)
                break;
        }
    }
    private delegate Matrix<double> 前向传播委托1(Matrix<double> 模型item);
    private delegate Matrix<double> 前向传播委托2(Matrix<double> L_item, Matrix<double> 模型item);
    /// <summary>
    /// 任务的返回值会赋值给集合中的元素
    /// </summary>
    private static void 前向传播(this List<Matrix<double>> Ls, List<Matrix<double>> 模型, 前向传播委托1 首, 前向传播委托2 中间, 前向传播委托2 尾)
    {
        Ls[0] = 首(模型[0]);
        int leng = 模型.Count - 1;
        for (int i = 1; i < leng; i++)
            Ls[i] = 中间(Ls[i - 1], 模型[i]);
        Ls[leng] = 尾(Ls[leng - 1], 模型[leng]);
    }
    private static List<Matrix<double>> Create模型(int 样本向量长度, int 类别数量, int 隐藏层, int 神经元)
    {
        隐藏层--;
        List<Matrix<double>> matrices = [RandXavier(样本向量长度, 神经元)];
        for (int i = 0; i < 隐藏层; i++)
            matrices.Add(RandXavier(神经元, 神经元));
        matrices.Add(RandXavier(神经元, 类别数量));
        return matrices;
    }
    public static Matrix<double> DerivativeELU(this Matrix<double> matrix, double alpha = 1.0)
    {
        return matrix.Map(x => x > 0 ? 1.0 : alpha * Math.Exp(x));
    }
    public static Matrix<double> DerivativeSigmoid(this Matrix<double> sigmoidOutput)
    {
        var derivative = DenseMatrix.OfArray(new double[sigmoidOutput.RowCount, sigmoidOutput.ColumnCount]);
        for (int i = 0; i < sigmoidOutput.RowCount; i++)
        {
            for (int j = 0; j < sigmoidOutput.ColumnCount; j++)
            {
                double sigmoidValue = sigmoidOutput[i, j];
                derivative[i, j] = sigmoidValue * (1 - sigmoidValue); // Sigmoid函数的导数公式  
            }
        }
        return derivative;
    }
    public static Matrix<double> Sigmoid(this Matrix<double> matrix)
    {
        var sigmoid = new DenseMatrix(matrix.RowCount, matrix.ColumnCount);
        for (int i = 0; i < matrix.RowCount; i++)
        {
            for (int j = 0; j < matrix.ColumnCount; j++)
                sigmoid[i, j] = 1.0 / (1.0 + Math.Exp(-matrix[i, j]));
        }
        return sigmoid;
    }
    public static Matrix<double> Softmax(this Matrix<double> matrix)
    {
        var expMatrix = matrix.Map(Math.Exp);
        var softmaxMatrix = expMatrix.Clone();
        for (int i = 0; i < softmaxMatrix.RowCount; i++)
        {
            double rowSum = expMatrix.Row(i).Sum();
            for (int j = 0; j < softmaxMatrix.ColumnCount; j++)
                softmaxMatrix[i, j] /= rowSum;
        }
        return softmaxMatrix;
    }
    public static Matrix<double> ELU(this Matrix<double> matrix, double alpha = 1.0)
    {
        return matrix.Map(x => x > 0 ? x : alpha * (Math.Exp(x) - 1));
    }
    // 使用正态分布生成随机数，均值为0，方差为variance  
    public static Matrix<double> RandXavier(long m, long n)
    {
        double[,] Mat = new double[m, n];
        double variance = 2.0 / (m + n);
        Random rand = new Random();
        for (int i = 0; i < m; i++)
            for (int j = 0; j < n; j++)
                Mat[i, j] = MathNet.Numerics.Distributions.Normal.Sample(rand, 0, Math.Sqrt(variance));
        return Matrix<double>.Build.DenseOfArray(Mat);
    }
}