一、背景
由于公司业务需求,需要通过摄像头识别手部姿态,并将姿态同步到手部模型。为方便模型驱动确定了通过Unity实现该功能。
二、目前效果
三、实现思路
开源识别框架有mediapipe、yolo等,由于首先调研使用的MediaPipeUnityPlugin框架,基本能满足使用,就没有再去看yolo。mediapipe除了手部姿态识别还有人体姿态识别、人脸姿态识别等。
模型原来用的Unity的优妮蒂模型,奈何mediapipe的数据点数和模型的骨骼数不对应,而且手部模型有些糙。所以选用Oculus Hand模型,该模型精度相对比较高,和mediapipe也正好能匹配上。
剩下的工作就是考虑如何将识别的数据去驱动三维模型。首先看下MediaPipeUnityPlugin获取数据的方法。在MediaPipeUnityPlugin中HandTrackingSolution.cs文件中,OnHandLandmarksOutput 方法中Landmark数组大小为21 对应着手部21个数据识别点。
模型驱动部分参考ThreeDPoseUnityBarracuda的VNectModel.cs。该类是驱动人体模型的具体实现方法,用于手部模型驱动存在一定的问题需要进一步的调整。下面代码为VNectModel.cs的原始代码。
// movement and rotatation of center
var forward = TriangleNormal(jointPoints[PositionIndex.hip.Int()].Pos3D, jointPoints[PositionIndex.lThighBend.Int()].Pos3D, jointPoints[PositionIndex.rThighBend.Int()].Pos3D);
jointPoints[PositionIndex.hip.Int()].Transform.position = jointPoints[PositionIndex.hip.Int()].Pos3D * 0.005f + new Vector3(initPosition.x, initPosition.y, initPosition.z + dz);
jointPoints[PositionIndex.hip.Int()].Transform.rotation = Quaternion.LookRotation(forward) * jointPoints[PositionIndex.hip.Int()].InverseRotation;
// rotate each of bones
foreach (var jointPoint in jointPoints)
{
if (jointPoint.Parent != null)
{
var fv = jointPoint.Parent.Pos3D - jointPoint.Pos3D;
jointPoint.Transform.rotation = Quaternion.LookRotation(jointPoint.Pos3D - jointPoint.Child.Pos3D, fv) * jointPoint.InverseRotation;
}
else if (jointPoint.Child != null)
{
jointPoint.Transform.rotation = Quaternion.LookRotation(jointPoint.Pos3D - jointPoint.Child.Pos3D, forward) * jointPoint.InverseRotation;
}
}
该部分为手部模型驱动的关键部分代码,为模型整体的姿态信息和每个骨骼的姿态信息的驱动。
四,关键代码
修改后的VNectModel.cs,适配手部模型驱动
using System.Collections;
using System.Collections.Generic;
using Unity.VisualScripting.Antlr3.Runtime.Tree;
using Unity.VisualScripting;
using UnityEngine;
using UnityEngine.UIElements;
using static LeftCtrl;
using static LeftCtrlDemo;
using System;
public enum PositionIndex : int
{
//rShldrBend = 0,//右肩部弯曲
//rForearmBend,//右前臂弯曲
//rHand,//手
//rThumb2,//拇指
//rMid1,//中指
//lShldrBend,
//lForearmBend,
//lHand,
//lThumb2,
//lMid1,
//lEar,
//lEye,
//rEar,
//rEye,
//Nose,
//rThighBend,//大腿
//rShin,//小腿
//rFoot,//脚
//rToe,//脚趾
//lThighBend,
//lShin,
//lFoot,
//lToe,
//abdomenUpper,//腹部上方
Calculated coordinates
//hip,//臀部
//head,//头
//neck,//脖子
//spine,//脊椎
//Count,//数量
//None,
Hand=0,
Thumb0,
Thumb1,
Thumb2,
Thumb3,
Index0,
Index1,
Index2,
Index3,
Mid0,
Mid1,
Mid2,
Mid3,
Ring0,
Ring1,
Ring2,
Ring3,
Lit0,
Lit1,
Lit2,
Lit3,
Count
}
//枚举值转数字
public static partial class EnumExtend
{
public static int Int(this PositionIndex i)
{
return (int)i;
}
}
public class LeftCtrl : MonoBehaviour
{
// Start is called before the first frame update
void Start()
{
}
public class JointPoint
{
public Vector2 Pos2D = new Vector2();
public float score2D;
public Vector3 Pos3D = new Vector3();
public Vector3 Now3D = new Vector3();
public Vector3[] PrevPos3D = new Vector3[6];
public float score3D;
// Bones
public Transform Transform = null;
public Quaternion InitRotation;
public Quaternion Inverse;
public Quaternion InverseRotation;
public JointPoint Child = null;
public JointPoint Parent = null;
// For Kalman filter
public Vector3 P = new Vector3();
public Vector3 X = new Vector3();
public Vector3 K = new Vector3();
}
private JointPoint[] jointPoints;
private Vector3 initPosition; // Initial center position
public JointPoint[] JointPoints { get { return jointPoints; } }
private float centerTall = 224 * 0.75f;
private float tall = 224 * 0.75f;
private float prevTall = 224 * 0.75f;
public float ZScale = 0.8f;
private Animator anim;
// Update is called once per frame
void Update()
{
if (jointPoints != null)
{
PoseUpdate();
}
}
/// <summary>
/// Initialize joint points
/// </summary>
/// <returns></returns>
public JointPoint[] Init()
{
jointPoints = new JointPoint[PositionIndex.Count.Int()];
for (var i = 0; i < PositionIndex.Count.Int(); i++) jointPoints[i] = new JointPoint();
List<SkinnedMeshRenderer> skinnedRenderers = new List<SkinnedMeshRenderer>();
SkinnedMeshRenderer[] renderers = gameObject.GetComponentsInChildren<SkinnedMeshRenderer>(true);
foreach (var renderer in renderers)
{
if (renderer != null && renderer.bones != null)
{
skinnedRenderers.Add(renderer);
}
}
//anim =gameObject.GetComponent<Animator>();
//anim.GetBoneTransform();
// 输出每个 Skinned Mesh Renderer 上的骨骼信息
jointPoints[PositionIndex.Hand.Int()].Transform = skinnedRenderers[0].bones[1].transform;
jointPoints[PositionIndex.Thumb0.Int()].Transform = skinnedRenderers[0].bones[2].transform;
//jointPoints[PositionIndex.Thumb0.Int()].Transform = skinnedRenderers[0].bones[1].GetChild(6).transform;
jointPoints[PositionIndex.Thumb1.Int()].Transform = skinnedRenderers[0].bones[3].transform;
jointPoints[PositionIndex.Thumb2.Int()].Transform = skinnedRenderers[0].bones[5].transform;
jointPoints[PositionIndex.Thumb3.Int()].Transform = skinnedRenderers[0].bones[6].transform;
jointPoints[PositionIndex.Index0.Int()].Transform = skinnedRenderers[0].bones[7].transform;
jointPoints[PositionIndex.Index1.Int()].Transform = skinnedRenderers[0].bones[8].transform;
jointPoints[PositionIndex.Index2.Int()].Transform = skinnedRenderers[0].bones[9].transform;
jointPoints[PositionIndex.Index3.Int()].Transform = skinnedRenderers[0].bones[10].transform;
jointPoints[PositionIndex.Mid0.Int()].Transform = skinnedRenderers[0].bones[11].transform;
jointPoints[PositionIndex.Mid1.Int()].Transform = skinnedRenderers[0].bones[12].transform;
jointPoints[PositionIndex.Mid2.Int()].Transform = skinnedRenderers[0].bones[13].transform;
jointPoints[PositionIndex.Mid3.Int()].Transform = skinnedRenderers[0].bones[14].transform;
jointPoints[PositionIndex.Ring0.Int()].Transform = skinnedRenderers[0].bones[15].transform;
jointPoints[PositionIndex.Ring1.Int()].Transform = skinnedRenderers[0].bones[16].transform;
jointPoints[PositionIndex.Ring2.Int()].Transform = skinnedRenderers[0].bones[17].transform;
jointPoints[PositionIndex.Ring3.Int()].Transform = skinnedRenderers[0].bones[18].transform;
jointPoints[PositionIndex.Lit0.Int()].Transform =