ps: 样本太少容易导致训练结果不稳定,预测数据的特征介于两种样本类型的中间时,也容易导致预测结果不稳定
public static class BP多分类神经网络
{
public static void 测试()
{
double[,] 样本 = new double[,] {
//数字 1
{ 0, 1, 0,
0, 1, 0 ,
0, 1, 0 },
{ 1, 1, 0,
0, 1, 0,
0, 1, 0 },
{ 0, 1, 0,
0, 1, 0,
1, 1, 1 },
//数字 2
{ 1, 0, 0,
0, 1, 0,
0, 1, 1 },
{ 1, 1, 0,
0, 1, 0,
1, 1, 1 },
{ 1, 1, 0,
0, 1, 0,
0, 1, 1 }
};
int[] 类别向量 = [1, 1, 1, 2, 2, 2];
var 模型 = Create模型(9, 2, 3, 10);
训练(样本, 类别向量.map样本(2), 模型);
//预测数据
Matrix<double> newInput = Matrix<double>.Build.DenseOfArray(new double[,] {
{ 1, 0, 0,
0, 0, 0,
0, 1, 1 }
});
Matrix<double> prediction = 预测(newInput, 模型);
// 打印预测结果
Debug.WriteLine("Prediction:");
Debug.WriteLine(prediction.ToString());
}
public static Matrix<double> 预测(Matrix<double> input, List<Matrix<double>> 模型)
{
Matrix<double> L;
L = input.Multiply(模型[0]).ELU();
for (int i = 1; ; i++)
{
if (i == 模型.Count - 1)
{
L = L.Multiply(模型[i]).Softmax();
break;
}
L = L.Multiply(模型[i]).ELU();
}
return L;
}
private static double[,] map样本(this int[] 类别s, int 类别数量)
{
int 样本数量 = 类别s.Length;
double[,] 类别 = new double[样本数量, 类别数量];
for (int i = 0; i < 样本数量; i++)
{
int 类别索引 = 类别s[i] - 1;
if (类别索引 >= 0 && 类别索引 < 类别数量)
类别[i, 类别索引] = 1;
else
throw new ArgumentException("类别索引超出范围");
}
return 类别;
}
private static Random rand = new((int)DateTime.Now.Ticks);
public static void 训练(double[,] 样本, double[,] 类别, List<Matrix<double>> 模型)
{
Matrix<double> MatX = Matrix<double>.Build.DenseOfArray(样本);
Matrix<double> MatY = Matrix<double>.Build.DenseOfArray(类别);
int Lcount = 模型.Count;
List<Matrix<double>> Ls = new(new Matrix<double>[Lcount]);
List<Matrix<double>> L_Deltas = new(new Matrix<double>[Lcount]);
List<Matrix<double>> L_Errs = new(new Matrix<double>[Lcount]);
double 学习率 = 0.01;
double 误差阈值 = 0.001;
double 当前误差 = double.MaxValue;
int 迭代次数 = 0;
for (; 迭代次数 < 100000 && 当前误差 > 误差阈值; 迭代次数++)
{
Ls.前向传播(模型,
(模型item) => MatX.Multiply(模型item).ELU(),
(集合item, 模型item) => 集合item.Multiply(模型item).ELU(),
(集合item, 模型item) => 集合item.Multiply(模型item).Softmax());
L_Deltas.反向传播(Ls, 模型,
(L_item) => MatY - L_item,
(L_Delta_item, L_item, 模型item) =>
L_Delta_item.Multiply(模型item.Transpose()).PointwiseMultiply(L_item.DerivativeELU()));
//计算当前误差
当前误差 = (MatY - Ls[^1]).L2Norm();
模型.权重更新(Ls, L_Deltas,
(L_Delta_item, L_item, 模型item) => (模型item + (L_item.Transpose().Multiply(L_Delta_item)) * 学习率),
(L_Delta_item, 模型item) => (模型item + (MatX.Transpose().Multiply(L_Delta_item)) * 学习率));
}
}
private delegate Matrix<double> 权重更新1(Matrix<double> L_Delta_item, Matrix<double> 模型item);
private delegate Matrix<double> 权重更新2(Matrix<double> L_Delta_item, Matrix<double> L_item, Matrix<double> 模型item);
private static void 权重更新(this List<Matrix<double>> 模型, List<Matrix<double>> Ls, List<Matrix<double>> L_Deltas,
权重更新2 中间, 权重更新1 尾)
{
int leng = 模型.Count - 1;
for (int i = leng; ; i--)
{
模型[i] = 中间(L_Deltas[i], Ls[i - 1], 模型[i]);
if (i == 1)
{
模型[0] = 尾(L_Deltas[0], 模型[0]);
break;
}
}
}
private delegate Matrix<double> 反向传播委托1(Matrix<double> L_item);
private delegate Matrix<double> 反向传播委托2(Matrix<double> L_Delta_item, Matrix<double> L_item, Matrix<double> 模型item);
private static void 反向传播(this List<Matrix<double>> L_Deltas, List<Matrix<double>> Ls, List<Matrix<double>> 模型,
反向传播委托1 首, 反向传播委托2 中间)
{
int leng = 模型.Count - 1;
L_Deltas[leng] = 首(Ls[leng]);
for (int i = leng; ; i--)
{
L_Deltas[i - 1] = 中间(L_Deltas[i], Ls[i - 1], 模型[i]);
if (i == 1)
break;
}
}
private delegate Matrix<double> 前向传播委托1(Matrix<double> 模型item);
private delegate Matrix<double> 前向传播委托2(Matrix<double> L_item, Matrix<double> 模型item);
/// <summary>
/// 任务的返回值会赋值给集合中的元素
/// </summary>
private static void 前向传播(this List<Matrix<double>> Ls, List<Matrix<double>> 模型, 前向传播委托1 首, 前向传播委托2 中间, 前向传播委托2 尾)
{
Ls[0] = 首(模型[0]);
int leng = 模型.Count - 1;
for (int i = 1; i < leng; i++)
Ls[i] = 中间(Ls[i - 1], 模型[i]);
Ls[leng] = 尾(Ls[leng - 1], 模型[leng]);
}
private static List<Matrix<double>> Create模型(int 样本向量长度, int 类别数量, int 隐藏层, int 神经元)
{
隐藏层--;
List<Matrix<double>> matrices = [RandXavier(样本向量长度, 神经元)];
for (int i = 0; i < 隐藏层; i++)
matrices.Add(RandXavier(神经元, 神经元));
matrices.Add(RandXavier(神经元, 类别数量));
return matrices;
}
public static Matrix<double> DerivativeELU(this Matrix<double> matrix, double alpha = 1.0)
{
return matrix.Map(x => x > 0 ? 1.0 : alpha * Math.Exp(x));
}
public static Matrix<double> DerivativeSigmoid(this Matrix<double> sigmoidOutput)
{
var derivative = DenseMatrix.OfArray(new double[sigmoidOutput.RowCount, sigmoidOutput.ColumnCount]);
for (int i = 0; i < sigmoidOutput.RowCount; i++)
{
for (int j = 0; j < sigmoidOutput.ColumnCount; j++)
{
double sigmoidValue = sigmoidOutput[i, j];
derivative[i, j] = sigmoidValue * (1 - sigmoidValue); // Sigmoid函数的导数公式
}
}
return derivative;
}
public static Matrix<double> Sigmoid(this Matrix<double> matrix)
{
var sigmoid = new DenseMatrix(matrix.RowCount, matrix.ColumnCount);
for (int i = 0; i < matrix.RowCount; i++)
{
for (int j = 0; j < matrix.ColumnCount; j++)
sigmoid[i, j] = 1.0 / (1.0 + Math.Exp(-matrix[i, j]));
}
return sigmoid;
}
public static Matrix<double> Softmax(this Matrix<double> matrix)
{
var expMatrix = matrix.Map(Math.Exp);
var softmaxMatrix = expMatrix.Clone();
for (int i = 0; i < softmaxMatrix.RowCount; i++)
{
double rowSum = expMatrix.Row(i).Sum();
for (int j = 0; j < softmaxMatrix.ColumnCount; j++)
softmaxMatrix[i, j] /= rowSum;
}
return softmaxMatrix;
}
public static Matrix<double> ELU(this Matrix<double> matrix, double alpha = 1.0)
{
return matrix.Map(x => x > 0 ? x : alpha * (Math.Exp(x) - 1));
}
// 使用正态分布生成随机数,均值为0,方差为variance
public static Matrix<double> RandXavier(long m, long n)
{
double[,] Mat = new double[m, n];
double variance = 2.0 / (m + n);
Random rand = new Random();
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
Mat[i, j] = MathNet.Numerics.Distributions.Normal.Sample(rand, 0, Math.Sqrt(variance));
return Matrix<double>.Build.DenseOfArray(Mat);
}
}