使用 Unity Sentis 和 Compute Shader，det_10g.onnx 进行高效人脸五官定位

大俊哥

已于 2024-05-31 17:27:14 修改

阅读量1.8k

点赞数 30

文章标签： unity 游戏引擎

于 2024-05-30 16:06:22 首次发布

本文链接：https://blog.csdn.net/m0_55632444/article/details/139326330

版权

前言

在计算机视觉领域，人脸五官定位是一个重要的任务。本文将介绍如何使用 Unity Sentis 和 Compute Shader，结合 det_10g.onnx 模型，实现高效的人脸五官定位。我们将详细讲解每一步骤，并提供完整的代码示例。

模型分析

输入值：

模型的输入是我这边选择的是1x3x640x640；

输出值：步长8, 16, 32 的三组数据

（448，451，454）：这是步长为8的一组数据，448（12800x1=>1x80x80x2 ）,步长为8，高宽640，640/8=80，每行80个预测框，共80行。通过insightface的源码可以看到，num_anchors = 2，每个位置的目标框是两组,正常来说是黑白图两种，既然是同一个位置，那么可以合并一起，意思就是有2张图，每张图大小是80x80，有这么多分值。

451：bboxs: 1x8x80x80 每一个分数对应的四个点(x1,y1,x2,y2)*注意这个点是距离原点的相对值，还是需要计算的,这里1x8 前面1~4 是一个矩形框的点，后面的4~8是另一张图的矩形框坐标点，就是黑白图。

454：kps：1x20x80x80 每一个分数对应的五官坐标点（x,y）*注意这个点是距离原点的相对值，还是需要计算的，这里1~10 是一组坐标点，另外的10~20是另外一张图的一组坐标点，分开计算就行。

（471，474，477）：这是步长为16的一组数据，与上同理。

（494，497，500）：这是步长为32的一组数据，与上同理。

代码示例（待优化）：

using System;
using System.Collections.Generic;
using Unity.Mathematics;
using Unity.Sentis;
using UnityEngine;
using System.Linq;

public class Retinaface  :MonoBehaviour
{
    public ModelAsset modelAsset;
    public Model model; 
    private IWorker worker;
    private GPUComputeBackend gpu;

    public int textureWidth=640, textureHeight =640;
     
    private int[] feat_stride_fpn = new int[] {8, 16, 32};
    public Dictionary<(int,int,int),FunctionalTensor> center_cache = new Dictionary<(int, int, int), FunctionalTensor>() ;
    private int _num_anchors = 2;
    private FunctionalTensor anchor_centers;
 
    private float det_scale = 2.5f;
    
    public ComputeShader postprocess1;
    private RenderTexture scoresRT;
    private RenderTexture boxesRT;
    private RenderTexture kpssRT1;
    private RenderTexture kpssRT2;
    private RenderTexture kpssRT3;
    private RenderTexture kpssRT4;
    private RenderTexture kpssRT5;
    private ComputeBuffer post1;
    
    
    public ComputeShader postprocess2;
    private ComputeBuffer post2;
    private ComputeBuffer counter;
  
     
 
    private void Start()
    {
        InitBuffer();
        
        model = ModelLoader.Load(modelAsset);  
        gpu = new GPUComputeBackend();
    
        var model2 = Functional.Compile(input =>
            {
                List<FunctionalTensor> scores_list = new List<FunctionalTensor>();
                List<FunctionalTensor> bboxes_list = new List<FunctionalTensor>();
                List<FunctionalTensor> kpss_list = new List<FunctionalTensor>();
                var outputs = model.Forward(input); 
                 //遍历不同步长
                 for (int i = 0; i < feat_stride_fpn.Length; i++)
                {
                    var scores = outputs[i*3];
                    var bbox_preds =  outputs[i * 3+1]* feat_stride_fpn[i];
                    var kps_preds = outputs[i*3+2] * feat_stride_fpn[i];
                    
                    int height = 640 / feat_stride_fpn[i];
                    int width =  640 / feat_stride_fpn[i];
                    
                    var key = (height, width, feat_stride_fpn[i]);
                    if (center_cache.ContainsKey(key) )
                    {
                        anchor_centers = center_cache[key];
                    }
                    else
                    {
                           //构建坐标系
                         var range_X = Functional.ARange(0,  height);
                        
                        var range_640_x = range_X.Unsqueeze(-1).BroadcastTo(new[] {height});
                        var range_640_y =   range_640_x.Transpose(0, 1);
                        
                        //(n,n,2)
                        anchor_centers =Functional.Stack(new[] {range_640_y, range_640_x}, 2);
                        //(n*n,2) 
                        anchor_centers = (anchor_centers * feat_stride_fpn[i]).Reshape(new[] {-1, 2});
                        //(n*n*2,2)  (12800,2)(3200,2)(800,2)
                        anchor_centers = Functional.Concat(new []{anchor_centers,anchor_centers},1).Reshape(new[] {-1, 2});

                        if (center_cache.Count<100 )
                        {
                            center_cache[key] = anchor_centers;
                        }  
                    }

                   
                    
                    //(bbox (左上x距离中心距离，左上y距离中心距离，右下x距离中心距离，右下y距离中心距离))
                    //(n,4) (左上坐标x1，y1，右下坐标x2，y2)
                   
                    var bboxes = distance2bbox(anchor_centers, bbox_preds);
  
                    scores_list.Add(scores);
                    bboxes_list.Add(bboxes);

                    //shape (n,10) (12800,10) 
                    var kpss = distance2kps(anchor_centers, kps_preds);
                      
                    kpss_list.Add(kpss);
                   
                }
                 
                //(n,1)
                var scores_vstack = Functional.Concat(scores_list.ToArray(), 0); 
                //(n,4)
                var boxes_vstack = Functional.Concat(bboxes_list.ToArray(), 0)/det_scale;
                //(n,10)
                var kpss_vstack = Functional.Concat(kpss_list.ToArray(), 0)/det_scale;
                //非极大值抑制，不知道为什么没效果
                
                /*var indices = Functional.NMS(boxes_vstack, scores_vstack.Transpose(0, 1), 0.5f);
               
                var output_scores = Functional.Gather(scores_vstack ,0,indices);
                var output_boxes  = Functional.Gather(boxes_vstack,0,indices);
                var output_kpss   = Functional.Gather(kpss_vstack,0,indices); */
               
                
              
                return (scores_vstack,boxes_vstack, kpss_vstack );
               
            },
            InputDef.FromModel(model)[0]
            );
       
        worker = WorkerFactory.CreateWorker(BackendType.GPUCompute, model2);
       
        Detect(t2d);
       
    }

    private void Update()
    {
        Detect(t2d);
    }

    void InitBuffer()
    { 
        scoresRT = new RenderTexture(210, 80, 0); 
        boxesRT = new RenderTexture(210, 80, 0, RenderTextureFormat.ARGBFloat); 
        kpssRT1 = new RenderTexture(210, 80, 0,RenderTextureFormat.RGFloat); 
        kpssRT2 = new RenderTexture(210, 80, 0,RenderTextureFormat.RGFloat); 
        kpssRT3 = new RenderTexture(210, 80, 0,RenderTextureFormat.RGFloat); 
        kpssRT4 = new RenderTexture(210, 80, 0,RenderTextureFormat.RGFloat); 
        kpssRT5 = new RenderTexture(210, 80, 0,RenderTextureFormat.RGFloat); 
        
        post1 = new ComputeBuffer(512, 4*15);
        post2 = new ComputeBuffer(512, 4*15, ComputeBufferType.Append);
        counter = new ComputeBuffer(1, sizeof(uint), ComputeBufferType.Counter);
        
        _material = new Material(_visualizer);
        
        _drawArgs = new ComputeBuffer(4, sizeof(uint),
            ComputeBufferType.IndirectArguments);
        _drawArgs.SetData(new int [] {6, 0, 0, 0});
        
    }

    FunctionalTensor distance2bbox(FunctionalTensor points,FunctionalTensor distance)
    {
        FunctionalTensor x1 = points[.., 0] - distance[.., 0];
        FunctionalTensor y1 = points[.., 1] - distance[.., 1];
        FunctionalTensor x2 = points[.., 0] + distance[.., 2];
        FunctionalTensor y2 = points[.., 1] + distance[.., 3];

        return Functional.Stack(new[] {x1, y1, x2, y2}, -1);
    }

    FunctionalTensor distance2kps(FunctionalTensor points, FunctionalTensor distance)
    {
        List<FunctionalTensor> preds = new List<FunctionalTensor>();
        int[] range = new[] {0, 2, 4, 6, 8};
        foreach (var i in range)
        {
            FunctionalTensor px = points[.., i % 2] + distance[.., i];
            FunctionalTensor py = points[.., i % 2 + 1] + distance[.., i + 1];
            preds.Add(px);
            preds.Add(py);
        }

        return Functional.Stack(preds.ToArray(), -1);
    }

    
    
    private float[] tempBox = new float[15];


    public Material testMat;
    void Detect(Texture source)
    {

        using (var input = TextureConverter.ToTensor(source, 640, 640, 3))
        {
            worker.Execute(input);
        }
        
        using var scores = worker.PeekOutput("output_0") as TensorFloat; 
        using var boxes = worker.PeekOutput("output_1") as TensorFloat;  
        using var kpss = worker.PeekOutput("output_2") as TensorFloat;  
         
      
        scores.Reshape( new TensorShape(1,210,80,1));
        boxes.Reshape( new TensorShape(1,210,80,4));
        kpss.Reshape( new TensorShape(1,210,80,10));
       // kpss.Reshape( new TensorShape(1,210,400,2));
       TensorFloat kpss_1 = TensorFloat.AllocNoData(new TensorShape(1,210,80,2)); 
       TensorFloat kpss_2 = TensorFloat.AllocNoData(new TensorShape(1,210,80,2)); 
       TensorFloat kpss_3 = TensorFloat.AllocNoData(new TensorShape(1,210,80,2)); 
       TensorFloat kpss_4 = TensorFloat.AllocNoData(new TensorShape(1,210,80,2)); 
       TensorFloat kpss_5 = TensorFloat.AllocNoData(new TensorShape(1,210,80,2)); 
       gpu.Slice(kpss,kpss_1,new [] {0},new [] {3},new [] {1});
       gpu.Slice(kpss,kpss_2,new [] {2},new [] {3},new [] {1});
       gpu.Slice(kpss,kpss_3,new [] {4},new [] {3},new [] {1});
       gpu.Slice(kpss,kpss_4,new [] {6},new [] {3},new [] {1});
       gpu.Slice(kpss,kpss_5,new [] {8},new [] {3},new [] {1});
        
        
      
        
        
        using  TensorFloat tagetT = TensorFloat.AllocNoData(new TensorShape(1, 1, 80,210)); 
        gpu.Transpose(scores,tagetT,new int[] {0, 3, 1, 2});
        
        using   TensorFloat tagetT1 = TensorFloat.AllocNoData(new TensorShape(1, 4, 80,210)); 
        gpu.Transpose(boxes,tagetT1,new int[] {0, 3, 1, 2}); 
        
        using   TensorFloat tagetT2 = TensorFloat.AllocNoData(new TensorShape(1, 2, 80,210)); 
        gpu.Transpose(kpss_1,tagetT2,new int[] {0, 3, 1, 2}); 
        
        using   TensorFloat tagetT3 = TensorFloat.AllocNoData(new TensorShape(1, 2, 80,210)); 
        gpu.Transpose(kpss_2,tagetT3,new int[] {0, 3, 1, 2}); 
        
        using   TensorFloat tagetT4 = TensorFloat.AllocNoData(new TensorShape(1, 2, 80,210)); 
        gpu.Transpose(kpss_3,tagetT4,new int[] {0, 3, 1, 2}); 
        
        using   TensorFloat tagetT5 = TensorFloat.AllocNoData(new TensorShape(1, 2, 80,210)); 
        gpu.Transpose(kpss_4,tagetT5,new int[] {0, 3, 1, 2}); 
        
        using   TensorFloat tagetT6 = TensorFloat.AllocNoData(new TensorShape(1, 2, 80,210)); 
        gpu.Transpose(kpss_5,tagetT6,new int[] {0, 3, 1, 2}); 
            
        scoresRT = TextureConverter.ToTexture(tagetT, 210, 80, 1);  
        boxesRT  = TextureConverter.ToTexture(tagetT1 , 210, 80, 4); 
        kpssRT1  = TextureConverter.ToTexture(tagetT2 , 210, 80, 2); 
        kpssRT2  = TextureConverter.ToTexture(tagetT3 , 210, 80, 2); 
        kpssRT3  = TextureConverter.ToTexture(tagetT4 , 210, 80, 2); 
        kpssRT4  = TextureConverter.ToTexture(tagetT5 , 210, 80, 2); 
        kpssRT5  = TextureConverter.ToTexture(tagetT6 , 210, 80, 2); 
      
        
        post2.SetCounterValue(0);
        counter.SetCounterValue(0);
        
        
        postprocess1.SetTexture(0, "Scores",  scoresRT);
        postprocess1.SetTexture(0, "Boxes", boxesRT);
        postprocess1.SetTexture(0, "kpss_1", kpssRT1);
        postprocess1.SetTexture(0, "kpss_2", kpssRT2);
        postprocess1.SetTexture(0, "kpss_3", kpssRT3);
        postprocess1.SetTexture(0, "kpss_4", kpssRT4);
        postprocess1.SetTexture(0, "kpss_5", kpssRT5);
        
        
        postprocess1.SetInts("InputSize", 210,80);
        postprocess1.SetFloat("Threshold", 0.3f);
        postprocess1.SetBuffer(0, "Output",  post1);
        postprocess1.SetBuffer(0, "OutputCount",  counter);
        postprocess1.Dispatch (0, (boxesRT.width+14 )/16,boxesRT.height/4,1);
        
        
        postprocess2.SetFloat ("Threshold", 0.5f);
        postprocess2.SetBuffer(0, "Input",  post1);
        postprocess2.SetBuffer(0, "InputCount",  counter);
        postprocess2.SetBuffer(0, "Output",  post2);
        postprocess2.Dispatch (0, 1, 1, 1); 
     
        post2.GetData(tempBox);
      
        
        testMat.SetTexture("_MainTex",t2d);
        testMat.SetVector("_leftPos",new Vector2(tempBox[0]/256,  1-tempBox[1]/256));
        testMat.SetVector("_rightPos",new Vector2(tempBox[2]/256,  1-tempBox[3]/256));
        
        testMat.SetVector("_rightEye",new Vector2(tempBox[5]/256,  1-tempBox[6]/256));
        testMat.SetVector("_leftEye",new Vector2(tempBox[7]/256,  1-tempBox[8]/256));
        testMat.SetVector("_nose",new Vector2(tempBox[9]/256,  1-tempBox[10]/256));
        testMat.SetVector("_rightMouse",new Vector2(tempBox[11]/256,  1-tempBox[12]/256));
        testMat.SetVector("_leftMouse",new Vector2(tempBox[13]/256,  1-tempBox[14]/256));
    }


   
 
    private void OnDestroy()
    {
         gpu.Dispose();
         worker.Dispose();
    }
}