有监督的训练方法：从理论基础到Unity中的手势识别实现-CSDN博客

本文链接：https://blog.csdn.net/chenby186119/article/details/148399099

有监督的训练方法：从理论基础到Unity中的手势识别实现

有监督学习是机器学习的一个主要分支，在游戏AI开发中有着广泛的应用。本章将深入探讨有监督学习的关键概念和方法，包括反向传播算法、手势识别系统的实现，以及一系列优化技术，并通过Unity中的C#代码示例来展示这些概念的实际应用。

9.1 异或函数

异或(XOR)函数是理解神经网络能力的经典例子。它是一个简单但非线性的问题，可以很好地展示神经网络的学习能力。

异或函数定义如下：对于两个二进制输入，当且仅当输入中的1的个数为奇数时，输出为1；否则输出为0。

输入A	输入B	输出
0	0	0
0	1	1
1	0	1
1	1	0

异或问题的关键特征是它不是线性可分的，这意味着无法用单个线性分类器（如单层感知器）来解决。这正是神经网络多层结构的价值所在。

以下是在Unity中实现一个简单的神经网络来学习XOR函数的示例：

csharp

using UnityEngine;
using System.Collections.Generic;

/// <summary>
/// 异或问题的神经网络解决方案
/// </summary>
public class XORNeuralNetwork : MonoBehaviour
{
    [Header("网络结构")]
    public int inputSize = 2;         // 输入层神经元数量
    public int hiddenSize = 3;        // 隐藏层神经元数量
    public int outputSize = 1;        // 输出层神经元数量
    
    [Header("训练参数")]
    public float learningRate = 0.1f;  // 学习率
    public int epochs = 10000;         // 训练轮数
    public bool trainingComplete = false;
    
    // 网络权重和偏置
    private float[,] weightsInputHidden;
    private float[] biasesHidden;
    private float[,] weightsHiddenOutput;
    private float[] biasesOutput;
    
    // 训练数据
    private float[][] inputs = new float[][] {
        new float[] {0, 0},
        new float[] {0, 1},
        new float[] {1, 0},
        new float[] {1, 1}
    };
    
    private float[][] expectedOutputs = new float[][] {
        new float[] {0},
        new float[] {1},
        new float[] {1},
        new float[] {0}
    };
    
    // 运行时状态和诊断
    private float currentError = 1.0f;
    private int currentEpoch = 0;
    
    void Start()
    {
        InitializeNetwork();
        StartTraining();
    }
    
    /// <summary>
    /// 初始化网络权重和偏置
    /// </summary>
    void InitializeNetwork()
    {
        // 初始化权重为小的随机值
        weightsInputHidden = new float[inputSize, hiddenSize];
        biasesHidden = new float[hiddenSize];
        weightsHiddenOutput = new float[hiddenSize, outputSize];
        biasesOutput = new float[outputSize];
        
        for (int i = 0; i < inputSize; i++)
        {
            for (int h = 0; h < hiddenSize; h++)
            {
                weightsInputHidden[i, h] = Random.Range(-0.5f, 0.5f);
            }
        }
        
        for (int h = 0; h < hiddenSize; h++)
        {
            biasesHidden[h] = Random.Range(-0.5f, 0.5f);
            
            for (int o = 0; o < outputSize; o++)
            {
                weightsHiddenOutput[h, o] = Random.Range(-0.5f, 0.5f);
            }
        }
        
        for (int o = 0; o < outputSize; o++)
        {
            biasesOutput[o] = Random.Range(-0.5f, 0.5f);
        }
        
        Debug.Log("神经网络初始化完成");
    }
    
    /// <summary>
    /// 开始训练过程
    /// </summary>
    void StartTraining()
    {
        Debug.Log("开始训练...");
        
        // 在后台协程中训练网络，避免阻塞主线程
        StartCoroutine(TrainNetwork());
    }
    
    /// <summary>
    /// 训练网络的协程
    /// </summary>
    System.Collections.IEnumerator TrainNetwork()
    {
        for (int epoch = 0; epoch < epochs; epoch++)
        {
            currentEpoch = epoch;
            float totalError = 0;
            
            // 对每个训练样本
            for (int sample = 0; sample < inputs.Length; sample++)
            {
                // 前向传播
                float[] hiddenOutputs = new float[hiddenSize];
                float[] finalOutputs = new float[outputSize];
                
                // 输入层到隐藏层
                for (int h = 0; h < hiddenSize; h++)
                {
                    float sum = biasesHidden[h];
                    for (int i = 0; i < inputSize; i++)
                    {
                        sum += inputs[sample][i] * weightsInputHidden[i, h];
                    }
                    hiddenOutputs[h] = Sigmoid(sum);
                }
                
                // 隐藏层到输出层
                for (int o = 0; o < outputSize; o++)
                {
                    float sum = biasesOutput[o];
                    for (int h = 0; h < hiddenSize; h++)
                    {
                        sum += hiddenOutputs[h] * weightsHiddenOutput[h, o];
                    }
                    finalOutputs[o] = Sigmoid(sum);
                }
                
                // 计算误差
                float[] outputErrors = new float[outputSize];
                for (int o = 0; o < outputSize; o++)
                {
                    outputErrors[o] = expectedOutputs[sample][o] - finalOutputs[o];
                    totalError += Mathf.Abs(outputErrors[o]);
                }
                
                // 反向传播
                // 输出层误差梯度
                float[] outputDeltas = new float[outputSize];
                for (int o = 0; o < outputSize; o++)
                {
                    outputDeltas[o] = outputErrors[o] * SigmoidDerivative(finalOutputs[o]);
                }
                
                // 隐藏层误差梯度
                float[] hiddenDeltas = new float[hiddenSize];
                for (int h = 0; h < hiddenSize; h++)
                {
                    float sum = 0;
                    for (int o = 0; o < outputSize; o++)
                    {
                        sum += outputDeltas[o] * weightsHiddenOutput[h, o];
                    }
                    hiddenDeltas[h] = sum * SigmoidDerivative(hiddenOutputs[h]);
                }
                
                // 更新权重和偏置
                // 隐藏层到输出层
                for (int o = 0; o < outputSize; o++)
                {
                    biasesOutput[o] += learningRate * outputDeltas[o];
                    
                    for (int h = 0; h < hiddenSize; h++)
                    {
                        weightsHiddenOutput[h, o] += learningRate * hiddenOutputs[h] * outputDeltas[o];
                    }
                }
                
                // 输入层到隐藏层
                for (int h = 0; h < hiddenSize; h++)
                {
                    biasesHidden[h] += learningRate * hiddenDeltas[h];
                    
                    for (int i = 0; i < inputSize; i++)
                    {
                        weightsInputHidden[i, h] += learningRate * inputs[sample][i] * hiddenDeltas[h];
                    }
                }
            }
            
            // 更新当前误差
            currentError = totalError / inputs.Length;
            
            // 每1000轮输出一次进度
            if (epoch % 1000 == 0 || epoch == epochs - 1)
            {
                Debug.Log($"轮数: {epoch}, 平均误差: {currentError}");
                yield return null; // 让出控制权，避免卡顿
            }
            
            // 如果误差足够小，提前结束训练
            if (currentError < 0.01f)
            {
                Debug.Log($"训练在第 {epoch} 轮收敛，平均误差: {currentError}");
                break;
            }
        }
        
        trainingComplete = true;
        Debug.Log("训练完成!");
        
        // 测试网络
        TestNetwork();
    }
    
    /// <summary>
    /// 测试训练好的网络
    /// </summary>
    void TestNetwork()
    {
        Debug.Log("测试网络:");
        
        for (int i = 0; i < inputs.Length; i++)
        {
            float[] output = Predict(inputs[i]);
            Debug.Log($"输入: [{inputs[i][0]}, {inputs[i][1]}], 预测: {output[0]:F4}, 期望: {expectedOutputs[i][0]}");
        }
    }
    
    /// <summary>
    /// 使用训练好的网络进行预测
    /// </summary>
    public float[] Predict(float[] input)
    {
        // 检查输入大小
        if (input.Length != inputSize)
        {
            Debug.LogError($"输入大小不匹配。期望: {inputSize}, 实际: {input.Length}");
            return new float[outputSize];
        }
        
        // 前向传播
        float[] hiddenOutputs = new float[hiddenSize];
        float[] finalOutputs = new float[outputSize];
        
        // 输入层到隐藏层
        for (int h = 0; h < hiddenSize; h++)
        {
            float sum = biasesHidden[h];
            for (int i = 0; i < inputSize; i++)
            {
                sum += input[i] * weightsInputHidden[i, h];
            }
            hiddenOutputs[h] = Sigmoid(sum);
        }
        
        // 隐藏层到输出层
        for (int o = 0; o < outputSize; o++)
        {
            float sum = biasesOutput[o];
            for (int h = 0; h < hiddenSize; h++)
            {
                sum += hiddenOutputs[h] * weightsHiddenOutput[h, o];
            }
            finalOutputs[o] = Sigmoid(sum);
        }
        
        return finalOutputs;
    }
    
    /// <summary>
    /// Sigmoid激活函数
    /// </summary>
    float Sigmoid(float x)
    {
        return 1.0f / (1.0f + Mathf.Exp(-x));
    }
    
    /// <summary>
    /// Sigmoid函数的导数
    /// </summary>
    float SigmoidDerivative(float x)
    {
        return x * (1 - x);
    }
    
    /// <summary>
    /// 在Inspector中显示当前状态
    /// </summary>
    void OnGUI()
    {
        GUI.Label(new Rect(10, 10, 300, 20), $"训练状态: {(trainingComplete ? "完成" : "进行中...")}");
        GUI.Label(new Rect(10, 30, 300, 20), $"当前轮数: {currentEpoch} / {epochs}");
        GUI.Label(new Rect(10, 50, 300, 20), $"当前误差: {currentError:F6}");
        
        if (trainingComplete)
        {
            GUI.Label(new Rect(10, 80, 300, 20), "测试结果:");
            
            for (int i = 0; i < inputs.Length; i++)
            {
                float[] output = Predict(inputs[i]);
                GUI.Label(new Rect(10, 100 + i * 20, 300, 20), 
                    $"输入: [{inputs[i][0]}, {inputs[i][1]}], 输出: {output[0]:F4}, 期望: {expectedOutputs[i][0]}");
            }
        }
    }
}

这个示例展示了一个完整的神经网络实现，用于解决XOR问题。它包含网络初始化、前向传播、反向传播和预测功能，以及详细的调试输出。

反向传播的工作原理

反向传播(Backpropagation)是训练神经网络的核心算法。它利用链式法则计算损失函数相对于网络参数的梯度，并通过梯度下降来更新这些参数。

反向传播的基本步骤包括：

前向传播：将输入数据传递通过网络，计算每层的输出。
计算误差：比较网络输出与期望输出，计算误差。
反向传播误差：从输出层向输入层逐层计算梯度。
更新权重：根据计算得到的梯度，使用梯度下降法更新网络权重和偏置。

以下是反向传播算法的数学推导和详细实现：

csharp

using UnityEngine;
using System.Collections.Generic;

/// <summary>
/// 反向传播算法的详细演示
/// </summary>
public class BackpropagationDemo : MonoBehaviour
{
    [Header("网络结构")]
    public int[] layerSizes = new int[] { 2, 4, 1 }; // 输入层、隐藏层、输出层的神经元数量
    
    [Header("训练参数")]
    public float learningRate = 0.1f;
    public int maxEpochs = 10000;
    public float convergenceThreshold = 0.001f;
    
    [Header("可视化")]
    public bool showDetailedLogs = false;
    public bool visualizeGradients = true;
    
    // 网络参数
    private List<float[,]> weights; // 层与层之间的权重
    private List<float[]> biases;   // 每层的偏置
    private List<float[]> activations; // 每层的激活值
    private List<float[]> zValues;     // 每层的加权输入
    
    // 训练数据 - XOR问题
    private float[][] trainingInputs = new float[][] {
        new float[] {0, 0},
        new float[] {0, 1},
        new float[] {1, 0},
        new float[] {1, 1}
    };
    
    private float[][] trainingOutputs = new float[][] {
        new float[] {0},
        new float[] {1},
        new float[] {1},
        new float[] {0}
    };
    
    // 学习过程可视化
    private List<float> errorHistory = new List<float>();
    private List<Vector2> weightUpdates = new List<Vector2>();
    
    void Start()
    {
        InitializeNetwork();
        TrainNetwork();
    }
    
    /// <summary>
    /// 初始化网络参数
    /// </summary>
    void InitializeNetwork()
    {
        int layerCount = layerSizes.Length;
        
        weights = new List<float[,]>();
        biases = new List<float[]>();
        activations = new List<float[]>();
        zValues = new List<float[]>();
        
        // 初始化权重和偏置
        for (int l = 0; l < layerCount - 1; l++)
        {
            int currentLayerSize = layerSizes[l];
            int nextLayerSize = layerSizes[l + 1];
            
            // 权重初始化 - 使用Xavier/Glorot初始化
            float stdDev = Mathf.Sqrt(2.0f / (currentLayerSize + nextLayerSize));
            float[,] layerWeights = new float[currentLayerSize, nextLayerSize];
            
            for (int i = 0; i < currentLayerSize; i++)
            {
                for (int j = 0; j < nextLayerSize; j++)
                {
                    // 使用标准正态分布生成随机权重
                    float u1 = Random.value;
                    float u2 = Random.value;
                    float randStdNormal = Mathf.Sqrt(-2.0f * Mathf.Log(u1)) * Mathf.Sin(2.0f * Mathf.PI * u2);
                    layerWeights[i, j] = randStdNormal * stdDev;
                }
            }
            
            weights.Add(layerWeights);
            
            // 偏置初始化为0
            float[] layerBiases = new float[nextLayerSize];
            biases.Add(layerBiases);
        }
        
        // 为每层创建激活值和加权输入存储
        for (int l = 0; l < layerCount; l++)
        {
            activations.Add(new float[layerSizes[l]]);
            
            if (l < layerCount - 1) // 最后一层没有加权输入
                zValues.Add(new float[layerSizes[l + 1]]);
        }
        
        if (showDetailedLogs)
            LogNetworkDetails("初始化网络");
    }
    
    /// <summary>
    /// 训练网络
    /// </summary>
    void TrainNetwork()
    {
        Debug.Log("开始训练网络...");
        
        float currentError = float.MaxValue;
        int epoch = 0;
        
        while (epoch < maxEpochs && currentError > convergenceThreshold)
        {
            currentError = 0;
            
            // 对每个训练样本
            for (int sample = 0; sample < trainingInputs.Length; sample++)
            {
                // 前向传播
                ForwardPass(trainingInputs[sample]);
                
                // 计算误差
                float[] output = activations[activations.Count - 1];
                float[] target = trainingOutputs[sample];
                float sampleError = 0;
                
                for (int i = 0; i < output.Length; i++)
                {
                    float error = target[i] - output[i];
                    sampleError += error * error;
                }
                
                currentError += sampleError / output.Length;
                
                // 反向传播
                Backpropagate(target);
            }
            
            // 计算平均误差
            currentError /= trainingInputs.Length;
            errorHistory.Add(currentError);
            
            // 输出训练进度
            if (epoch % 1000 == 0 || epoch == maxEpochs - 1 || currentError <= convergenceThreshold)
            {
                Debug.Log($"轮数: {epoch}, 误差: {currentError}");
            }
            
            epoch++;
        }
        
        Debug.Log($"训练完成! 轮数: {epoch}, 最终误差: {currentError}");
        
        // 测试网络
        TestNetwork();
    }
    
    /// <summary>
    /// 前向传播
    /// </summary>
    void ForwardPass(float[] input)
    {
        // 设置输入层激活值
        for (int i = 0; i < input.Length; i++)
        {
            activations[0][i] = input[i];
        }
        
        // 逐层前向传播
        for (int l = 0; l < weights.Count; l++)
        {
            int currentLayerSize = layerSizes[l];
            int nextLayerSize = layerSizes[l + 1];
            
            // 计算加权输入
            for (int j = 0; j < nextLayerSize; j++)
            {
                float sum = biases[l][j];
                
                for (int i = 0; i < currentLayerSize; i++)
                {
                    sum += activations[l][i] * weights[l][i, j];
                }
                
                zValues[l][j] = sum;
                activations[l + 1][j] = Sigmoid(sum);
            }
        }
        
        if (showDetailedLogs)
            LogActivations("前向传播");
    }
    
    /// <summary>
    /// 反向传播算法
    /// </summary>
    void Backpropagate(float[] target)
    {
        int layerCount = layerSizes.Length;
        
        // 计算输出层误差
        float[] outputDeltas = new float[layerSizes[layerCount - 1]];
        
        for (int i = 0; i < outputDeltas.Length; i++)
        {
            float error = target[i] - activations[layerCount - 1][i];
            outputDeltas[i] = error * SigmoidDerivative(zValues[layerCount - 2][i]);
        }
        
        // 从后向前逐层计算误差
        List<float[]> deltas = new List<float[]>();
        deltas.Add(outputDeltas);
        
        for (int l = layerCount - 3; l >= 0; l--)
        {
            float[] currentDeltas = new float[layerSizes[l + 1]];
            float[] nextDeltas = deltas[0]; // 最近计算的误差
            
            for (int i = 0; i < currentDeltas.Length; i++)
            {
                float sum = 0;
                
                for (int j = 0; j < layerSizes[l + 2]; j++)
                {
                    sum += nextDeltas[j] * weights[l + 1][i, j];
                }
                
                currentDeltas[i] = sum * SigmoidDerivative(zValues[l][i]);
            }
            
            deltas.Insert(0, currentDeltas);
        }
        
        // 更新权重和偏置
        for (int l = 0; l < weights.Count; l++)
        {
            for (int i = 0; i < layerSizes[l]; i++)
            {
                for (int j = 0; j < layerSizes[l + 1]; j++)
                {
                    float weightGradient = activations[l][i] * deltas[l][j];
                    float weightUpdate = learningRate * weightGradient;
                    
                    // 记录权重更新（用于可视化）
                    if (visualizeGradients && Random.value < 0.01f) // 仅记录1%的更新，以避免过多数据
                    {
                        weightUpdates.Add(new Vector2(weights[l][i, j], weights[l][i, j] + weightUpdate));
                    }
                    
                    weights[l][i, j] += weightUpdate;
                }
            }
            
            for (int j = 0; j < layerSizes[l + 1]; j++)
            {
                biases[l][j] += learningRate * deltas[l][j];
            }
        }
        
        if (showDetailedLogs)
            LogGradients("反向传播", deltas);
    }
    
    /// <summary>
    /// Sigmoid激活函数
    /// </summary>
    float Sigmoid(float x)
    {
        return 1.0f / (1.0f + Mathf.Exp(-x));
    }
    
    /// <summary>
    /// Sigmoid函数的导数
    /// </summary>
    float SigmoidDerivative(float x)
    {
        float sigmoid = Sigmoid(x);
        return sigmoid * (1 - sigmoid);
    }
    
    /// <summary>
    /// 预测函数
    /// </summary>
    float[] Predict(float[] input)
    {
        // 运行前向传播
        ForwardPass(input);
        
        // 返回输出层的激活值
        return (float[])activations[activations.Count - 1].Clone();
    }
    
    /// <summary>
    /// 测试网络性能
    /// </summary>
    void TestNetwork()
    {
        Debug.Log("测试网络性能:");
        
        for (int i = 0; i < trainingInputs.Length; i++)
        {
            float[] input = trainingInputs[i];
            float[] output = Predict(input);
            float[] target = trainingOutputs[i];
            
            string inputStr = string.Join(", ", System.Array.ConvertAll(input, x => x.ToString("F1")));
            string outputStr = string.Join(", ", System.Array.ConvertAll(output, x => x.ToString("F4")));
            string targetStr = string.Join(", ", System.Array.ConvertAll(target, x => x.ToString("F1")));
            
            Debug.Log($"输入: [{inputStr}], 输出: [{outputStr}], 目标: [{targetStr}]");
            
            // 二分类决策（阈值为0.5）
            if (output[0] >= 0.5f && target[0] == 1.0f || output[0] < 0.5f && target[0] == 0.0f)
            {
                Debug.Log("预测正确 ✓");
            }
            else
            {
                Debug.Log("预测错误 ✗");
            }
        }
    }
    
    /// <summary>
    /// 输出网络详细信息（用于调试）
    /// </summary>
    void LogNetworkDetails(string title)
    {
        Debug.Log($"=== {title} ===");
        
        for (int l = 0; l < weights.Count; l++)
        {
            Debug.Log($"第 {l} 层权重:");
            
            for (int i = 0; i < layerSizes[l]; i++)
            {
                string row = "";
                for (int j = 0; j < layerSizes[l + 1]; j++)
                {
                    row += $"{weights[l][i, j]:F4} ";
                }
                Debug.Log(row);
            }
            
            Debug.Log($"第 {l} 层偏置: {string.Join(", ", System.Array.ConvertAll(biases[l], x => x.ToString("F4")))}");
        }
    }
    
    /// <summary>
    /// 输出激活值（用于调试）
    /// </summary>
    void LogActivations(string title)
    {
        Debug.Log($"=== {title} ===");
        
        for (int l = 0; l < activations.Count; l++)
        {
            Debug.Log($"第 {l} 层激活值: {string.Join(", ", System.Array.ConvertAll(activations[l], x => x.ToString("F4")))}");
            
            if (l < zValues.Count)
            {
                Debug.Log($"第 {l} 层加权输入: {string.Join(", ", System.Array.ConvertAll(zValues[l], x => x.ToString("F4")))}");
            }
        }
    }
    
    /// <summary>
    /// 输出梯度信息（用于调试）
    /// </summary>
    void LogGradients(string title, List<float[]> deltas)
    {
        Debug.Log($"=== {title} ===");
        
        for (int l = 0; l < deltas.Count; l++)
        {
            Debug.Log($"第 {l} 层误差: {string.Join(", ", System.Array.ConvertAll(deltas[l], x => x.ToString("F4")))}");
        }
    }
    
    /// <summary>
    /// 在Unity编辑器中可视化训练过程
    /// </summary>
    void OnDrawGizmos()
    {
        if (!Application.isPlaying || !visualizeGradients)
            return;
        
        // 绘制误差历史
        Gizmos.color = Color.red;
        Vector3 graphPos = transform.position + Vector3.right * 5;
        float graphScale = 5.0f;
        
        for (int i = 1; i < errorHistory.Count; i++)
        {
            Vector3 start = graphPos + new Vector3((i - 1) / (float)maxEpochs, errorHistory[i - 1], 0) * graphScale;
            Vector3 end = graphPos + new Vector3(i / (float)maxEpochs, errorHistory[i], 0) * graphScale;
            
            Gizmos.DrawLine(start, end);
        }
        
        // 绘制权重更新
        Gizmos.color = Color.green;
        Vector3 weightGraphPos = transform.position + Vector3.right * 15;
        
        foreach (Vector2 update in weightUpdates)
        {
            Vector3 start = weightGraphPos + new Vector3(update.x, 0, 0) * graphScale;
            Vector3 end = weightGraphPos + new Vector3(update.y, 0, 0) * graphScale;
            
            Gizmos.DrawLine(start, end);
        }
    }
    
    /// <summary>
    /// 在游戏视图中显示训练结果
    /// </summary>
    void OnGUI()
    {
        if (!Application.isPlaying)
            return;
        
        int margin = 10;
        int width = 300;
        int height = 20;
        
        GUI.Label(new Rect(margin, margin, width, height), "XOR问题的反向传播演示");
        
        if (errorHistory.Count > 0)
        {
            GUI.Label(new Rect(margin, margin + height, width, height), 
                $"最终误差: {errorHistory[errorHistory.Count - 1]:F6}");
        }
        
        GUI.Label(new Rect(margin, margin + height * 2, width, height), "测试结果:");
        
        for (int i = 0; i < trainingInputs.Length; i++)
        {
            float[] input = trainingInputs[i];
            float[] output = Predict(input);
            float[] target = trainingOutputs[i];
            
            string result = output[0] >= 0.5f && target[0] == 1.0f || output[0] < 0.5f && target[0] == 0.0f ? "✓" : "✗";
            
            GUI.Label(new Rect(margin, margin + height * (3 + i), width, height), 
                $"输入: [{input[0]}, {input[1]}], 输出: {output[0]:F4}, 目标: {target[0]} {result}");
        }
    }
}

这个详细的反向传播演示不仅实现了完整的算法，还提供了丰富的可视化和调试功能，帮助理解神经网络的内部工作原理。它展示了权重初始化、前向传播、误差计算、反向传播和参数更新的整个过程。