有监督的训练方法:从理论基础到Unity中的手势识别实现
有监督学习是机器学习的一个主要分支,在游戏AI开发中有着广泛的应用。本章将深入探讨有监督学习的关键概念和方法,包括反向传播算法、手势识别系统的实现,以及一系列优化技术,并通过Unity中的C#代码示例来展示这些概念的实际应用。
9.1 异或函数
异或(XOR)函数是理解神经网络能力的经典例子。它是一个简单但非线性的问题,可以很好地展示神经网络的学习能力。
异或函数定义如下:对于两个二进制输入,当且仅当输入中的1的个数为奇数时,输出为1;否则输出为0。
输入A | 输入B | 输出 |
---|---|---|
0 | 0 | 0 |
0 | 1 | 1 |
1 | 0 | 1 |
1 | 1 | 0 |
异或问题的关键特征是它不是线性可分的,这意味着无法用单个线性分类器(如单层感知器)来解决。这正是神经网络多层结构的价值所在。
以下是在Unity中实现一个简单的神经网络来学习XOR函数的示例:
csharp
using UnityEngine;
using System.Collections.Generic;
/// <summary>
/// 异或问题的神经网络解决方案
/// </summary>
public class XORNeuralNetwork : MonoBehaviour
{
[Header("网络结构")]
public int inputSize = 2; // 输入层神经元数量
public int hiddenSize = 3; // 隐藏层神经元数量
public int outputSize = 1; // 输出层神经元数量
[Header("训练参数")]
public float learningRate = 0.1f; // 学习率
public int epochs = 10000; // 训练轮数
public bool trainingComplete = false;
// 网络权重和偏置
private float[,] weightsInputHidden;
private float[] biasesHidden;
private float[,] weightsHiddenOutput;
private float[] biasesOutput;
// 训练数据
private float[][] inputs = new float[][] {
new float[] {0, 0},
new float[] {0, 1},
new float[] {1, 0},
new float[] {1, 1}
};
private float[][] expectedOutputs = new float[][] {
new float[] {0},
new float[] {1},
new float[] {1},
new float[] {0}
};
// 运行时状态和诊断
private float currentError = 1.0f;
private int currentEpoch = 0;
void Start()
{
InitializeNetwork();
StartTraining();
}
/// <summary>
/// 初始化网络权重和偏置
/// </summary>
void InitializeNetwork()
{
// 初始化权重为小的随机值
weightsInputHidden = new float[inputSize, hiddenSize];
biasesHidden = new float[hiddenSize];
weightsHiddenOutput = new float[hiddenSize, outputSize];
biasesOutput = new float[outputSize];
for (int i = 0; i < inputSize; i++)
{
for (int h = 0; h < hiddenSize; h++)
{
weightsInputHidden[i, h] = Random.Range(-0.5f, 0.5f);
}
}
for (int h = 0; h < hiddenSize; h++)
{
biasesHidden[h] = Random.Range(-0.5f, 0.5f);
for (int o = 0; o < outputSize; o++)
{
weightsHiddenOutput[h, o] = Random.Range(-0.5f, 0.5f);
}
}
for (int o = 0; o < outputSize; o++)
{
biasesOutput[o] = Random.Range(-0.5f, 0.5f);
}
Debug.Log("神经网络初始化完成");
}
/// <summary>
/// 开始训练过程
/// </summary>
void StartTraining()
{
Debug.Log("开始训练...");
// 在后台协程中训练网络,避免阻塞主线程
StartCoroutine(TrainNetwork());
}
/// <summary>
/// 训练网络的协程
/// </summary>
System.Collections.IEnumerator TrainNetwork()
{
for (int epoch = 0; epoch < epochs; epoch++)
{
currentEpoch = epoch;
float totalError = 0;
// 对每个训练样本
for (int sample = 0; sample < inputs.Length; sample++)
{
// 前向传播
float[] hiddenOutputs = new float[hiddenSize];
float[] finalOutputs = new float[outputSize];
// 输入层到隐藏层
for (int h = 0; h < hiddenSize; h++)
{
float sum = biasesHidden[h];
for (int i = 0; i < inputSize; i++)
{
sum += inputs[sample][i] * weightsInputHidden[i, h];
}
hiddenOutputs[h] = Sigmoid(sum);
}
// 隐藏层到输出层
for (int o = 0; o < outputSize; o++)
{
float sum = biasesOutput[o];
for (int h = 0; h < hiddenSize; h++)
{
sum += hiddenOutputs[h] * weightsHiddenOutput[h, o];
}
finalOutputs[o] = Sigmoid(sum);
}
// 计算误差
float[] outputErrors = new float[outputSize];
for (int o = 0; o < outputSize; o++)
{
outputErrors[o] = expectedOutputs[sample][o] - finalOutputs[o];
totalError += Mathf.Abs(outputErrors[o]);
}
// 反向传播
// 输出层误差梯度
float[] outputDeltas = new float[outputSize];
for (int o = 0; o < outputSize; o++)
{
outputDeltas[o] = outputErrors[o] * SigmoidDerivative(finalOutputs[o]);
}
// 隐藏层误差梯度
float[] hiddenDeltas = new float[hiddenSize];
for (int h = 0; h < hiddenSize; h++)
{
float sum = 0;
for (int o = 0; o < outputSize; o++)
{
sum += outputDeltas[o] * weightsHiddenOutput[h, o];
}
hiddenDeltas[h] = sum * SigmoidDerivative(hiddenOutputs[h]);
}
// 更新权重和偏置
// 隐藏层到输出层
for (int o = 0; o < outputSize; o++)
{
biasesOutput[o] += learningRate * outputDeltas[o];
for (int h = 0; h < hiddenSize; h++)
{
weightsHiddenOutput[h, o] += learningRate * hiddenOutputs[h] * outputDeltas[o];
}
}
// 输入层到隐藏层
for (int h = 0; h < hiddenSize; h++)
{
biasesHidden[h] += learningRate * hiddenDeltas[h];
for (int i = 0; i < inputSize; i++)
{
weightsInputHidden[i, h] += learningRate * inputs[sample][i] * hiddenDeltas[h];
}
}
}
// 更新当前误差
currentError = totalError / inputs.Length;
// 每1000轮输出一次进度
if (epoch % 1000 == 0 || epoch == epochs - 1)
{
Debug.Log($"轮数: {epoch}, 平均误差: {currentError}");
yield return null; // 让出控制权,避免卡顿
}
// 如果误差足够小,提前结束训练
if (currentError < 0.01f)
{
Debug.Log($"训练在第 {epoch} 轮收敛,平均误差: {currentError}");
break;
}
}
trainingComplete = true;
Debug.Log("训练完成!");
// 测试网络
TestNetwork();
}
/// <summary>
/// 测试训练好的网络
/// </summary>
void TestNetwork()
{
Debug.Log("测试网络:");
for (int i = 0; i < inputs.Length; i++)
{
float[] output = Predict(inputs[i]);
Debug.Log($"输入: [{inputs[i][0]}, {inputs[i][1]}], 预测: {output[0]:F4}, 期望: {expectedOutputs[i][0]}");
}
}
/// <summary>
/// 使用训练好的网络进行预测
/// </summary>
public float[] Predict(float[] input)
{
// 检查输入大小
if (input.Length != inputSize)
{
Debug.LogError($"输入大小不匹配。期望: {inputSize}, 实际: {input.Length}");
return new float[outputSize];
}
// 前向传播
float[] hiddenOutputs = new float[hiddenSize];
float[] finalOutputs = new float[outputSize];
// 输入层到隐藏层
for (int h = 0; h < hiddenSize; h++)
{
float sum = biasesHidden[h];
for (int i = 0; i < inputSize; i++)
{
sum += input[i] * weightsInputHidden[i, h];
}
hiddenOutputs[h] = Sigmoid(sum);
}
// 隐藏层到输出层
for (int o = 0; o < outputSize; o++)
{
float sum = biasesOutput[o];
for (int h = 0; h < hiddenSize; h++)
{
sum += hiddenOutputs[h] * weightsHiddenOutput[h, o];
}
finalOutputs[o] = Sigmoid(sum);
}
return finalOutputs;
}
/// <summary>
/// Sigmoid激活函数
/// </summary>
float Sigmoid(float x)
{
return 1.0f / (1.0f + Mathf.Exp(-x));
}
/// <summary>
/// Sigmoid函数的导数
/// </summary>
float SigmoidDerivative(float x)
{
return x * (1 - x);
}
/// <summary>
/// 在Inspector中显示当前状态
/// </summary>
void OnGUI()
{
GUI.Label(new Rect(10, 10, 300, 20), $"训练状态: {(trainingComplete ? "完成" : "进行中...")}");
GUI.Label(new Rect(10, 30, 300, 20), $"当前轮数: {currentEpoch} / {epochs}");
GUI.Label(new Rect(10, 50, 300, 20), $"当前误差: {currentError:F6}");
if (trainingComplete)
{
GUI.Label(new Rect(10, 80, 300, 20), "测试结果:");
for (int i = 0; i < inputs.Length; i++)
{
float[] output = Predict(inputs[i]);
GUI.Label(new Rect(10, 100 + i * 20, 300, 20),
$"输入: [{inputs[i][0]}, {inputs[i][1]}], 输出: {output[0]:F4}, 期望: {expectedOutputs[i][0]}");
}
}
}
}
这个示例展示了一个完整的神经网络实现,用于解决XOR问题。它包含网络初始化、前向传播、反向传播和预测功能,以及详细的调试输出。
反向传播的工作原理
反向传播(Backpropagation)是训练神经网络的核心算法。它利用链式法则计算损失函数相对于网络参数的梯度,并通过梯度下降来更新这些参数。
反向传播的基本步骤包括:
- 前向传播:将输入数据传递通过网络,计算每层的输出。
- 计算误差:比较网络输出与期望输出,计算误差。
- 反向传播误差:从输出层向输入层逐层计算梯度。
- 更新权重:根据计算得到的梯度,使用梯度下降法更新网络权重和偏置。
以下是反向传播算法的数学推导和详细实现:
csharp
using UnityEngine;
using System.Collections.Generic;
/// <summary>
/// 反向传播算法的详细演示
/// </summary>
public class BackpropagationDemo : MonoBehaviour
{
[Header("网络结构")]
public int[] layerSizes = new int[] { 2, 4, 1 }; // 输入层、隐藏层、输出层的神经元数量
[Header("训练参数")]
public float learningRate = 0.1f;
public int maxEpochs = 10000;
public float convergenceThreshold = 0.001f;
[Header("可视化")]
public bool showDetailedLogs = false;
public bool visualizeGradients = true;
// 网络参数
private List<float[,]> weights; // 层与层之间的权重
private List<float[]> biases; // 每层的偏置
private List<float[]> activations; // 每层的激活值
private List<float[]> zValues; // 每层的加权输入
// 训练数据 - XOR问题
private float[][] trainingInputs = new float[][] {
new float[] {0, 0},
new float[] {0, 1},
new float[] {1, 0},
new float[] {1, 1}
};
private float[][] trainingOutputs = new float[][] {
new float[] {0},
new float[] {1},
new float[] {1},
new float[] {0}
};
// 学习过程可视化
private List<float> errorHistory = new List<float>();
private List<Vector2> weightUpdates = new List<Vector2>();
void Start()
{
InitializeNetwork();
TrainNetwork();
}
/// <summary>
/// 初始化网络参数
/// </summary>
void InitializeNetwork()
{
int layerCount = layerSizes.Length;
weights = new List<float[,]>();
biases = new List<float[]>();
activations = new List<float[]>();
zValues = new List<float[]>();
// 初始化权重和偏置
for (int l = 0; l < layerCount - 1; l++)
{
int currentLayerSize = layerSizes[l];
int nextLayerSize = layerSizes[l + 1];
// 权重初始化 - 使用Xavier/Glorot初始化
float stdDev = Mathf.Sqrt(2.0f / (currentLayerSize + nextLayerSize));
float[,] layerWeights = new float[currentLayerSize, nextLayerSize];
for (int i = 0; i < currentLayerSize; i++)
{
for (int j = 0; j < nextLayerSize; j++)
{
// 使用标准正态分布生成随机权重
float u1 = Random.value;
float u2 = Random.value;
float randStdNormal = Mathf.Sqrt(-2.0f * Mathf.Log(u1)) * Mathf.Sin(2.0f * Mathf.PI * u2);
layerWeights[i, j] = randStdNormal * stdDev;
}
}
weights.Add(layerWeights);
// 偏置初始化为0
float[] layerBiases = new float[nextLayerSize];
biases.Add(layerBiases);
}
// 为每层创建激活值和加权输入存储
for (int l = 0; l < layerCount; l++)
{
activations.Add(new float[layerSizes[l]]);
if (l < layerCount - 1) // 最后一层没有加权输入
zValues.Add(new float[layerSizes[l + 1]]);
}
if (showDetailedLogs)
LogNetworkDetails("初始化网络");
}
/// <summary>
/// 训练网络
/// </summary>
void TrainNetwork()
{
Debug.Log("开始训练网络...");
float currentError = float.MaxValue;
int epoch = 0;
while (epoch < maxEpochs && currentError > convergenceThreshold)
{
currentError = 0;
// 对每个训练样本
for (int sample = 0; sample < trainingInputs.Length; sample++)
{
// 前向传播
ForwardPass(trainingInputs[sample]);
// 计算误差
float[] output = activations[activations.Count - 1];
float[] target = trainingOutputs[sample];
float sampleError = 0;
for (int i = 0; i < output.Length; i++)
{
float error = target[i] - output[i];
sampleError += error * error;
}
currentError += sampleError / output.Length;
// 反向传播
Backpropagate(target);
}
// 计算平均误差
currentError /= trainingInputs.Length;
errorHistory.Add(currentError);
// 输出训练进度
if (epoch % 1000 == 0 || epoch == maxEpochs - 1 || currentError <= convergenceThreshold)
{
Debug.Log($"轮数: {epoch}, 误差: {currentError}");
}
epoch++;
}
Debug.Log($"训练完成! 轮数: {epoch}, 最终误差: {currentError}");
// 测试网络
TestNetwork();
}
/// <summary>
/// 前向传播
/// </summary>
void ForwardPass(float[] input)
{
// 设置输入层激活值
for (int i = 0; i < input.Length; i++)
{
activations[0][i] = input[i];
}
// 逐层前向传播
for (int l = 0; l < weights.Count; l++)
{
int currentLayerSize = layerSizes[l];
int nextLayerSize = layerSizes[l + 1];
// 计算加权输入
for (int j = 0; j < nextLayerSize; j++)
{
float sum = biases[l][j];
for (int i = 0; i < currentLayerSize; i++)
{
sum += activations[l][i] * weights[l][i, j];
}
zValues[l][j] = sum;
activations[l + 1][j] = Sigmoid(sum);
}
}
if (showDetailedLogs)
LogActivations("前向传播");
}
/// <summary>
/// 反向传播算法
/// </summary>
void Backpropagate(float[] target)
{
int layerCount = layerSizes.Length;
// 计算输出层误差
float[] outputDeltas = new float[layerSizes[layerCount - 1]];
for (int i = 0; i < outputDeltas.Length; i++)
{
float error = target[i] - activations[layerCount - 1][i];
outputDeltas[i] = error * SigmoidDerivative(zValues[layerCount - 2][i]);
}
// 从后向前逐层计算误差
List<float[]> deltas = new List<float[]>();
deltas.Add(outputDeltas);
for (int l = layerCount - 3; l >= 0; l--)
{
float[] currentDeltas = new float[layerSizes[l + 1]];
float[] nextDeltas = deltas[0]; // 最近计算的误差
for (int i = 0; i < currentDeltas.Length; i++)
{
float sum = 0;
for (int j = 0; j < layerSizes[l + 2]; j++)
{
sum += nextDeltas[j] * weights[l + 1][i, j];
}
currentDeltas[i] = sum * SigmoidDerivative(zValues[l][i]);
}
deltas.Insert(0, currentDeltas);
}
// 更新权重和偏置
for (int l = 0; l < weights.Count; l++)
{
for (int i = 0; i < layerSizes[l]; i++)
{
for (int j = 0; j < layerSizes[l + 1]; j++)
{
float weightGradient = activations[l][i] * deltas[l][j];
float weightUpdate = learningRate * weightGradient;
// 记录权重更新(用于可视化)
if (visualizeGradients && Random.value < 0.01f) // 仅记录1%的更新,以避免过多数据
{
weightUpdates.Add(new Vector2(weights[l][i, j], weights[l][i, j] + weightUpdate));
}
weights[l][i, j] += weightUpdate;
}
}
for (int j = 0; j < layerSizes[l + 1]; j++)
{
biases[l][j] += learningRate * deltas[l][j];
}
}
if (showDetailedLogs)
LogGradients("反向传播", deltas);
}
/// <summary>
/// Sigmoid激活函数
/// </summary>
float Sigmoid(float x)
{
return 1.0f / (1.0f + Mathf.Exp(-x));
}
/// <summary>
/// Sigmoid函数的导数
/// </summary>
float SigmoidDerivative(float x)
{
float sigmoid = Sigmoid(x);
return sigmoid * (1 - sigmoid);
}
/// <summary>
/// 预测函数
/// </summary>
float[] Predict(float[] input)
{
// 运行前向传播
ForwardPass(input);
// 返回输出层的激活值
return (float[])activations[activations.Count - 1].Clone();
}
/// <summary>
/// 测试网络性能
/// </summary>
void TestNetwork()
{
Debug.Log("测试网络性能:");
for (int i = 0; i < trainingInputs.Length; i++)
{
float[] input = trainingInputs[i];
float[] output = Predict(input);
float[] target = trainingOutputs[i];
string inputStr = string.Join(", ", System.Array.ConvertAll(input, x => x.ToString("F1")));
string outputStr = string.Join(", ", System.Array.ConvertAll(output, x => x.ToString("F4")));
string targetStr = string.Join(", ", System.Array.ConvertAll(target, x => x.ToString("F1")));
Debug.Log($"输入: [{inputStr}], 输出: [{outputStr}], 目标: [{targetStr}]");
// 二分类决策(阈值为0.5)
if (output[0] >= 0.5f && target[0] == 1.0f || output[0] < 0.5f && target[0] == 0.0f)
{
Debug.Log("预测正确 ✓");
}
else
{
Debug.Log("预测错误 ✗");
}
}
}
/// <summary>
/// 输出网络详细信息(用于调试)
/// </summary>
void LogNetworkDetails(string title)
{
Debug.Log($"=== {title} ===");
for (int l = 0; l < weights.Count; l++)
{
Debug.Log($"第 {l} 层权重:");
for (int i = 0; i < layerSizes[l]; i++)
{
string row = "";
for (int j = 0; j < layerSizes[l + 1]; j++)
{
row += $"{weights[l][i, j]:F4} ";
}
Debug.Log(row);
}
Debug.Log($"第 {l} 层偏置: {string.Join(", ", System.Array.ConvertAll(biases[l], x => x.ToString("F4")))}");
}
}
/// <summary>
/// 输出激活值(用于调试)
/// </summary>
void LogActivations(string title)
{
Debug.Log($"=== {title} ===");
for (int l = 0; l < activations.Count; l++)
{
Debug.Log($"第 {l} 层激活值: {string.Join(", ", System.Array.ConvertAll(activations[l], x => x.ToString("F4")))}");
if (l < zValues.Count)
{
Debug.Log($"第 {l} 层加权输入: {string.Join(", ", System.Array.ConvertAll(zValues[l], x => x.ToString("F4")))}");
}
}
}
/// <summary>
/// 输出梯度信息(用于调试)
/// </summary>
void LogGradients(string title, List<float[]> deltas)
{
Debug.Log($"=== {title} ===");
for (int l = 0; l < deltas.Count; l++)
{
Debug.Log($"第 {l} 层误差: {string.Join(", ", System.Array.ConvertAll(deltas[l], x => x.ToString("F4")))}");
}
}
/// <summary>
/// 在Unity编辑器中可视化训练过程
/// </summary>
void OnDrawGizmos()
{
if (!Application.isPlaying || !visualizeGradients)
return;
// 绘制误差历史
Gizmos.color = Color.red;
Vector3 graphPos = transform.position + Vector3.right * 5;
float graphScale = 5.0f;
for (int i = 1; i < errorHistory.Count; i++)
{
Vector3 start = graphPos + new Vector3((i - 1) / (float)maxEpochs, errorHistory[i - 1], 0) * graphScale;
Vector3 end = graphPos + new Vector3(i / (float)maxEpochs, errorHistory[i], 0) * graphScale;
Gizmos.DrawLine(start, end);
}
// 绘制权重更新
Gizmos.color = Color.green;
Vector3 weightGraphPos = transform.position + Vector3.right * 15;
foreach (Vector2 update in weightUpdates)
{
Vector3 start = weightGraphPos + new Vector3(update.x, 0, 0) * graphScale;
Vector3 end = weightGraphPos + new Vector3(update.y, 0, 0) * graphScale;
Gizmos.DrawLine(start, end);
}
}
/// <summary>
/// 在游戏视图中显示训练结果
/// </summary>
void OnGUI()
{
if (!Application.isPlaying)
return;
int margin = 10;
int width = 300;
int height = 20;
GUI.Label(new Rect(margin, margin, width, height), "XOR问题的反向传播演示");
if (errorHistory.Count > 0)
{
GUI.Label(new Rect(margin, margin + height, width, height),
$"最终误差: {errorHistory[errorHistory.Count - 1]:F6}");
}
GUI.Label(new Rect(margin, margin + height * 2, width, height), "测试结果:");
for (int i = 0; i < trainingInputs.Length; i++)
{
float[] input = trainingInputs[i];
float[] output = Predict(input);
float[] target = trainingOutputs[i];
string result = output[0] >= 0.5f && target[0] == 1.0f || output[0] < 0.5f && target[0] == 0.0f ? "✓" : "✗";
GUI.Label(new Rect(margin, margin + height * (3 + i), width, height),
$"输入: [{input[0]}, {input[1]}], 输出: {output[0]:F4}, 目标: {target[0]} {result}");
}
}
}
这个详细的反向传播演示不仅实现了完整的算法,还提供了丰富的可视化和调试功能,帮助理解神经网络的内部工作原理。它展示了权重初始化、前向传播、误差计算、反向传播和参数更新的整个过程。
9.2 RecognizeIt——鼠标手势的识别
手势识别是有监督学习的