本文改编自Mat Buckland的游戏开发中的人工智能技术中的Chapter 9 手势的识别,C++代码重新用python来实现(本文所有遗传算法/神经网络相关代码均改编值Mat的C++代码,如有雷同,纯属巧合)。
废话不多说,先上效果图:
从效果图可以看出来,我们的程序可以识别出来我们鼠标所画的手势是否是我们预定义的手势,如果不能确定的话,也会给出可能性最大的那个手势。
在本章的实现里,没有再用到遗传算法,而是通过反向传播的神经网络,训练出一个识别模型,通过这个模型来判断我们输入的手势。
手势的矢量化
本文中所有的手势,都被矢量成了12个点。如下是所预定义的手势的矢量:
self._vecPatterns = [[1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0],
[-1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0],
[0,1.0, 0,1.0, 0,1.0, 0,1.0, 0,1.0, 0,1.0, 0,1.0, 0,1.0, 0,1.0, 0,1.0, 0,1.0, 0,1.0],
[0,-1.0, 0,-1.0, 0,-1.0, 0,-1.0, 0,-1.0, 0,-1.0, 0,-1.0, 0,-1.0, 0,-1.0, 0,-1.0, 0,-1.0, 0,-1.0],
[1.0,0, 1.0,0, 1.0,0, 0,1.0, 0,1.0, 0,1.0, -1.0,0, -1.0,0, -1.0,0, 0,-1.0, 0,-1.0, 0,-1.0],
[-1.0,0, -1.0,0, -1.0,0, 0,1.0, 0,1.0, 0,1.0, 1.0,0, 1.0,0, 1.0,0, 0,-1.0, 0,-1.0, 0,-1.0],
[1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0, -0.45,0.9, -0.9, 0.45, -0.9,0.45],
[-1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, -1.0,0, 0.45,0.9, 0.9, 0.45, 0.9,0.45],
[-0.7,0.7, -0.7,0.7, -0.7,0.7, -0.7,0.7, -0.7,0.7, -0.7,0.7, -0.7,0.7, -0.7,0.7, -0.7,0.7, -0.7,0.7, -0.7,0.7, -0.7,0.7],
[0.7,0.7, 0.7,0.7, 0.7,0.7, 0.7,0.7, 0.7,0.7, 0.7,0.7, 0.7,0.7, 0.7,0.7, 0.7,0.7, 0.7,0.7, 0.7,0.7, 0.7,0.7],
[1.0,0, 1.0,0, 1.0,0, 1.0,0, -0.72,0.69,-0.7,0.72,0.59,0.81, 1.0,0, 1.0,0, 1.0,0, 1.0,0, 1.0,0]]
self._vecNames = ["Right", "Left", "Down", "Up", "Clockwise Square", "Anti-Clockwise Square","Right Arrow", "Left Arrow", "South West", "Sout East", "Zorro"]
从上面的预定义手势的值可以看出来,Pattern中每两个数为一组(x,y), Right就是一系列的[1.0, 0], Left就是一系列的[-1.0, 0]。
这些预定义的Pattern就是神经网络的训练集,因为有11个手势,我们用不同的bit位来表示我们所期望的训练输出,所以我们神经网络的输出层就有11个outputs
def CreateTrainingSetFromData(self):
self._SetIn = []
self._SetOut = []
for i in xrange(self._iNumPatterns):
self._SetIn.append(self._vecPatterns[i])
outputs = [0]*self._iNumPatterns
outputs[i] = 1
self._SetOut.append(outputs)
return
反向传播神经网络
这个算法具体不多描述,总体的意思就是利用神经网络的输出与预期的输出的偏差,来不断调整神经网络中各个神经元的权重,有兴趣的话可以参看一文弄懂神经网络中的反向传播算法
之前的文章里,我们都是通过遗传算法不断进化来调整神经网络的权重,而反向传播神经网络就不再需要遗传算法了,在一次一次的迭代过程中利用输出与预期的差来不断的指导神经网络权重的调整。
这里只贴下从原文中转化过来的根据ErrorSum来更新权重的函数:
def NetworkTrainingEpoch(self, SetIn, SetOut):
self._ErrorSum = 0
for vec in xrange(len(SetIn)):
outputs = self.Update(SetIn[vec])
if len(outputs) == 0:
return False
for op in xrange(self._NumOutputs):
err = (SetOut[vec][op] - outputs[op]) * outputs[op] * (1 - outputs[op])
self._Layers[1]._Neurons[op]._Error = err
self._ErrorSum += (SetOut[vec][op] - outputs[op]) * (SetOut[vec][op] - outputs[op])
for i in xrange(len(self._Layers[1]._Neurons[op]._Weights) - 1):
self._Layers[1]._Neurons[op]._Weights[i] += err*self._LearningRate*self._Layers[0]._Neurons[i]._Activation
self._Layers[1]._Neurons[op]._Weights[-1] += err * self._LearningRate * dBias
for i in xrange(len(self._Layers[0]._Neurons)):
err = 0
for j in xrange(len(self._Layers[1]._Neurons)):
err += self._Layers[1]._Neurons[j]._Error * self._Layers[1]._Neurons[j]._Weights[i]
err *= self._Layers[0]._Neurons[i]._Activation * (1 - self._Layers[0]._Neurons[i]._Activation)
for w in xrange(self._NumInputs):
self._Layers[0]._Neurons[i]._Weights[w] += err * self._LearningRate * SetIn[vec][w]
self._Layers[0]._Neurons[i]._Weights[self._NumInputs] += err * dBias
return True
在ErrorSum没有达到我们要求前,我们会持续的训练这个神经网络:
def Train(self, data, screen):
SetIn = data.GetInputSet()
SetOut= data.GetOutputSet()
if len(SetIn) != len(SetOut) or len(SetIn[0]) != self._NumInputs or len(SetOut[0]) != self._NumOutputs:
print "Inputs/Outputs length is invalid."
return False
self.InitializeNetwork()
while self._ErrorSum > 0.003:
if self.NetworkTrainingEpoch(SetIn, SetOut) == False:
return False
self._NumEpochs += 1
# 這裡需要更新窗口
print "Epoch: %d, ErrorSum: %f" % (self._NumEpochs, self._ErrorSum)
self._Trained = True
return True
当神经网络训练好了,每个神经元的权重会被保存下来,用来判断我们输入的手势。
手势的识别
当鼠标移动时,会不断的保存鼠标的坐标,并将其在screen上画出来:
if event.type == MOUSEMOTION:
if g_pController.IsDrawing():
pos = pygame.mouse.get_pos()
g_pController.AddPoint(SVector2D(pos[0], pos[1]))
drawPoints = []
for i in xrange(len(self._Path)):
drawPoints.append([int(self._Path[i]._x), int(self._Path[i]._y)])
pygame.draw.lines(screen, [0, 255, 0], False, drawPoints, 1)
根据这些鼠标移动过程中的点,我们将其平滑成12个点:
def Smooth(self):
if len(self._Path) < self._NumSmoothPoints:
print "Length of Path not correct: %d, expected: %d" % (len(self._Path), self._NumSmoothPoints)
return False
self._SmoothPath = copy.deepcopy(self._Path)
while len(self._SmoothPath) > self._NumSmoothPoints:
ShortestSoFar = 99999999.9
PointMarker = 0
for SpanFront in xrange(2, len(self._SmoothPath)-1):
xTmp = self._SmoothPath[SpanFront-1]._x - self._SmoothPath[SpanFront]._x
yTmp = self._SmoothPath[SpanFront-1]._y - self._SmoothPath[SpanFront]._y
length = sqrt(xTmp*xTmp + yTmp*yTmp)
if length < ShortestSoFar:
ShortestSoFar = length
PointMarker = SpanFront
newPoint = SVector2D((self._SmoothPath[PointMarker-1]._x + self._SmoothPath[PointMarker]._x)/2,\
(self._SmoothPath[PointMarker-1]._y + self._SmoothPath[PointMarker]._y)/2)
self._SmoothPath[PointMarker-1] = newPoint
self._SmoothPath = self._SmoothPath[:PointMarker] + self._SmoothPath[(PointMarker+1):]
return True
这样,我们就可以利用这12点作为已经训练好的神经网络的输入,得到一个输出:
def Update(self, inputs):
outputs = []
Weight = 0
if len(inputs) != self._NumInputs:
print "Input Size not correct, current %d, expected: %d" % (len(inputs), self._NumInputs)
return outputs
for i in xrange(self._NumHiddenLayers + 1):
if i > 0:
inputs = copy.deepcopy(outputs)
outputs = []
Weight = 0
for n in xrange(self._Layers[i]._NumNeurons):
netinput = 0.0
NumInputs = self._Layers[i]._Neurons[n]._NumInputs
for k in xrange(NumInputs - 1):
netinput += self._Layers[i]._Neurons[n]._Weights[k] * inputs[Weight]
Weight += 1
netinput += self._Layers[i]._Neurons[n]._Weights[NumInputs-1] * dBias
self._Layers[i]._Neurons[n]._Activation = self.Sigmoid(netinput, 1.0) # ACTIVATION_RESPONSE = 1.0
outputs.append(self._Layers[i]._Neurons[n]._Activation)
Weight = 0
return outputs
这个输出里最优的结果就是我们最终判断出来的手势啦~~~