前言
1 待解决的问题
本文的任务是设计一个手势识别系统。输入数据是包含有手势的视频流,输出是手势的分类结果。本文采用的手势有以下8种。
2 解决方案
数据处理流程如下:
3 算法与实现
普通摄像头获取的图像是RGB模式的,不利于肤色分割,所以考虑将读取的每一帧图像进行颜色空间转换,得到其HSV空间的数据,之后再进行图像分割。
普通的 RGB图像转换到HSV空间如下:
经过参数设定,获取每个通道图像中数值在某一特定区间的区域,进行轮廓提取。
将获取到的指尖位置,存储到一个list中,同时我们还可以获取手掌重心的位置。主要识别方法:
(1)对于“stone”、“1”、“3”、“4”、“5”五种手势只需要根据指尖的数量就可识别。
(2)对于“2”、“6”、“8”,就必须再根据指尖与手心的相对位置进行二次分类才可以得出结果。分类方法是根据指尖位置与手心位置相对位置的的不同来分类。设定指尖位置与手心位置组成一个向量,根据向量的夹角可区分这3种手势的不同。
4 实验结果
5 分析与改进
目前识别的手势有8种,但比较简单。而且稳定性在背景复杂的情况下就会下降,尤其是背景中的与皮肤颜色相近的部分与手部图像连接到一起的情况。该程序由于参数设置是固定的,对于图像的缩放不够鲁棒,可识别的距离是有限制的,实验中在距离摄像头20-50cm的范围可以获得较好的效果。
改进方案:
(1)考虑采用深度摄像机,减少背景的干扰,同时可以获得更加精确的手势信息。
(2)仅仅采用指尖和手心的位置信息来判断手势,仍然不能满足要求,还需要加入其它的特征描述子来增加系统的实用性和准确性。
附录:(源代码)
#include "stdafx.h"
#include "math.h"
#include <iostream>
#include <string>
#include <iomanip>
#include <sstream>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
//多线程
#include <windows.h>
using namespace cv;
using namespace std;
DWORD WINAPI myThread(char* _addr);
float ComputeAngle(float , float , float , float );
int fingerSignal[10][2] = {0};
int centerToUnity[2] = {0};
int fingerNum = 0;
int gesture = -1;
//获取手势信息,在unity的c#脚本中被调用
extern "C" _declspec(dllexport) void _stdcall GetGesture(int _signal[10][2],int _centerToUnity[2],int &_fingerNum,int &_gesture)
{
_fingerNum = fingerNum;
for (int i = 0; i < 10; i++)
{
_signal[i][0] = 0;
_signal[i][1] = 0;
}
for (int i = 0; i < _fingerNum; i++)
{
_signal[i][0] = fingerSignal[i][0];
_signal[i][1] = fingerSignal[i][1];
}
_centerToUnity[0] = centerToUnity[0];
_centerToUnity[1] = centerToUnity[1];
_gesture = gesture;
}
//开启一个线程
extern "C" _declspec(dllexport) void _stdcall StartDevice(char* addr)
{
HANDLE myHandle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)myThread, addr, 0, NULL);//创建线程
}
//手势识别主要线程函数
DWORD WINAPI myThread(char* addr)
{
int delay = 1;
char c;
int frameNum = -1; // Frame counter
VideoCapture captRefrnc(0);
captRefrnc.set(CV_CAP_PROP_FRAME_WIDTH, 640);
captRefrnc.set(CV_CAP_PROP_FRAME_HEIGHT, 480);
if (!captRefrnc.isOpened())
{
return -1;
cout << "Opening camera failed!";
}
Size refS = Size((int)captRefrnc.get(CV_CAP_PROP_FRAME_WIDTH),
(int)captRefrnc.get(CV_CAP_PROP_FRAME_HEIGHT));
bool bHandFlag = false;
const char* WIN_SRC = "Source";
const char* WIN_RESULT = "Result";
namedWindow(WIN_SRC, CV_WINDOW_AUTOSIZE);
namedWindow(WIN_RESULT, CV_WINDOW_AUTOSIZE);
Mat frame; // 读取视频帧序列
Mat frameHSV; // hsv空间
Mat mask(frame.rows, frame.cols, CV_8UC1); // 2值掩膜
Mat dst(frame); // 输出图像
vector< vector<Point> > contours; // 存储轮廓信息
vector< vector<Point> > filterContours; // 筛选后的轮廓
vector< Vec4i > hierarchy; // 轮廓的结构信息
while (true)
{
captRefrnc >> frame;//读取摄像头
if (frame.empty())
{
cout << " < < < Game over! > > > ";
break;
}
imshow(WIN_SRC, frame);
Mat MYcontours(frame.rows, frame.cols, CV_8UC3, Scalar(0, 0, 0));
// 运用中值滤波,去除椒盐噪声
medianBlur(frame, frame, 5);
//转换到HSV空间,便于后面的手部提取
cvtColor(frame, frameHSV, CV_BGR2HSV);
Mat dstTemp1(frame.rows, frame.cols, CV_8UC1);
Mat dstTemp2(frame.rows, frame.cols, CV_8UC1);
// 对HSV空间进行量化,得到2值图像,亮的部分为手的形状
inRange(frameHSV, Scalar(0, 30, 30), Scalar(40, 170, 256), dstTemp1);
inRange(frameHSV, Scalar(156, 30, 30), Scalar(180, 170, 256), dstTemp2);
bitwise_or(dstTemp1, dstTemp2, mask);
// 形态学操作,去除噪声,并使手的边界更加清晰
Mat element = getStructuringElement(MORPH_RECT, Size(3, 3));
erode(mask, mask, element);
morphologyEx(mask, mask, MORPH_OPEN, element);
dilate(mask, mask, element);
morphologyEx(mask, mask, MORPH_CLOSE, element);
frame.copyTo(dst, mask);
contours.clear();
hierarchy.clear();
filterContours.clear();
// 得到手的轮廓
findContours(mask, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
// 去除伪轮廓
for (size_t i = 0; i < contours.size(); i++)
{
if (fabs(contourArea(Mat(contours[i]))) > 3000) //判断手进入区域的阈值
{
filterContours.push_back(contours[i]);
}
}
// 画轮廓
drawContours(dst, filterContours, -1, Scalar(0, 0, 255), 3/*, 8, hierarchy*/);
int index = -1;
Point2f center(0, 0); int num = 0;
for (int i = 0; i < filterContours.size(); i++)
{
for (int j = 0; j < filterContours[i].size(); j++)
{
index = i;
int xx = filterContours[i][j].x;
int yy = filterContours[i][j].y;
MYcontours.at<Vec3b>(yy, xx) = Vec3b(255, 255, 255);
center.x += xx;
center.y += yy;
num = j;
}
center.x = center.x / num;
center.y = center.y / num;
}
if (index == -1)continue;
Point centerINT = Point(center.x, center.y);
circle(MYcontours, centerINT, 15, Scalar(0, 0, 255), CV_FILLED);
// 寻找指尖
vector<Point> couPoint = filterContours[index];
vector<Point> fingerTips;
vector<Point> palmCenter;
Point tmp;
int _max(0), count(0), notice(0);
int fingerCount = -100; int fingerDistance = 0;
for (int i = 0; i < couPoint.size(); i++)
{
tmp = couPoint[i];
int dist = (tmp.x - center.x) * (tmp.x - center.x) + (tmp.y - center.y) * (tmp.y - center.y);
if (dist > _max)
{
_max = dist;
notice = i;
}
if (dist != _max)
{
count++;
if (count > 30)
{
count = 0;
_max = 0;
bool flag = false;
if (center.y + 40 < couPoint[notice].y)
continue;
if (sqrt(pow((couPoint[notice].x - center.x), 2) + pow((couPoint[notice].y - center.y), 2))>100)
{
fingerDistance = abs(notice - fingerCount);
if (fingerDistance >= 40)
{
fingerTips.push_back(couPoint[notice]);
circle(MYcontours, couPoint[notice], 6, Scalar(0, 255, 0), CV_FILLED);
//line(MYcontours, center, couPoint[notice], Scalar(0, 255, 255), 2);
fingerCount = notice;
}
}
}
}
}
//变量类型转换,准备传参数到unity
for (int i = 0; i < 10; i++)
{
fingerSignal[i][0] = 0;
fingerSignal[i][1] = 0;
}
for (int jj = 0; jj < fingerTips.size(); jj++)
{
if (jj >= 10)
{
cout << "Out of range about array signal!" << endl;
break;
}
fingerSignal[jj][0] = fingerTips[jj].x;
fingerSignal[jj][1] = fingerTips[jj].y;
}
fingerNum = fingerTips.size();
centerToUnity[0] = center.x;
centerToUnity[1] = center.y;
// 判断手型
float angle;
putText(MYcontours, "Current Gesture: ", Point(30, 30), FONT_HERSHEY_COMPLEX, 0.6, Scalar(0, 255, 0), 1);
switch (fingerTips.size())
{
case 0:
putText(MYcontours, "Stone ", Point(30, 100), FONT_HERSHEY_COMPLEX, 2, Scalar(0, 255, 255), 1);
gesture = 0;
break;
case 1:
putText(MYcontours, "1 ", Point(30, 100), FONT_HERSHEY_COMPLEX, 2, Scalar(0, 255, 255), 1);
gesture = 1;
break;
case 2:
angle = ComputeAngle(fingerTips[0].x - center.x, fingerTips[0].y - center.y, fingerTips[1].x - center.x, fingerTips[1].y - center.y);
if (angle < 1.1)
{
putText(MYcontours, "2 ", Point(30, 100), FONT_HERSHEY_COMPLEX, 2, Scalar(0, 255, 255), 1);
gesture = 21;
}
else if (angle <1.75)
{
putText(MYcontours, "8 ", Point(30, 100), FONT_HERSHEY_COMPLEX, 2, Scalar(0, 255, 255), 1);
gesture = 22;
}
else
{
putText(MYcontours, "6 ", Point(30, 100), FONT_HERSHEY_COMPLEX, 2, Scalar(0, 255, 255), 1);
gesture = 23;
}
break;
case 3:
putText(MYcontours, "3 ", Point(30, 100), FONT_HERSHEY_COMPLEX, 2, Scalar(0, 255, 255), 1);
gesture = 3;
break;
case 4:
putText(MYcontours, "4 ", Point(30, 100), FONT_HERSHEY_COMPLEX, 2, Scalar(0, 255, 255), 1);
gesture = 4;
break;
case 5:
putText(MYcontours, "5", Point(30, 100), FONT_HERSHEY_COMPLEX, 2, Scalar(0, 255, 255), 1);
gesture = 5;
break;
}
imshow("show_img", MYcontours);
imshow(WIN_RESULT, dst);
dst.release();
c = cvWaitKey(delay);
if (c == 32) break;
}
}
//计算向量夹角的函数
float ComputeAngle(float x1, float y1, float x2, float y2)
{
float n = x1*x2 + y1*y2;
float m = sqrt(x1 *x1 + y1 *y1)*sqrt(x2 *x2 + y2 *y2);
return acos(n / m);
}
//主函数,在生成dll的过程中做测试使用。如果生成的是exe,也可以单独执行。
int main(int argc, char *argv[])
{
char* cc = "HelloWorld";
StartDevice(cc);
int Tsignal[10][2] = { 0 };
int TcenterToUnity[2] = { 0 };
int TfingerNum = 0;
int Tgesture = -1;
while (1)
{
GetGesture(Tsignal, TcenterToUnity, TfingerNum, Tgesture);
cout << endl << endl;
cout << "Tgesture:" << Tgesture << endl;
}
return 0;
}