使用TensorFlow.js进行人脸触摸检测第2部分：使用BodyPix

最新推荐文章于 2024-08-31 08:57:22 发布

寒冰屋

最新推荐文章于 2024-08-31 08:57:22 发布

阅读量1k

点赞数

分类专栏：人工智能前端文章标签： TensorFlow.js BodyPix

原文链接：https://www.codeproject.com/Articles/5272775/Face-Touch-Detection-with-TensorFlow-js-Part-2-Usi

版权

人工智能同时被 2 个专栏收录

573 篇文章 58 订阅

订阅专栏

前端

238 篇文章 4 订阅

订阅专栏

下一步是什么？我们可以使用TensorFlow.js做更多的事情吗？

下载TensorFlowJS示例-6.1 MB

TensorFlow + JavaScript。现在，最流行，最先进的AI框架支持地球上使用最广泛的编程语言，因此，让我们在我们的web浏览器中通过深度学习实现奇迹，通过TensorFlow.js的WebGL GPU加速！

这是我们六个系列的第五篇文章：

在上一篇论文中，我们使用TensorFlow.js训练了一个AI以模拟donottouchyourface.com应用程序，该应用程序旨在通过学习停止触摸面部来帮助人们降低患病的风险。在本文中，我们将使用BodyPix（身体部位检测和分割库）来尝试删除面部触摸检测的训练步骤。

起点

对于此项目，我们需要：

导入TensorFlow.js和BodyPix
添加视频元素
添加画布进行调试
为“触摸与无触摸”状态添加文本元素
添加网络摄像头设置功能
每200毫秒运行一次模型预测，而不是选择图像，但仅在模型首次训练之后

这是我们的起点：

<html>
    <head>
        <title>Face Touch Detection with TensorFlow.js Part 2: Using BodyPix</title>
        <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@2.0.0/dist/tf.min.js"></script>
        <script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/body-pix@2.0"></script>
        <style>
            img, video {
                object-fit: cover;
            }
        </style>
    </head>
    <body>
        <video autoplay playsinline muted id="webcam" width="224" height="224"></video>
        <canvas id="canvas" width="224" height="224"></canvas>
        <h1 id="status">Loading...</h1>
        <script>
        async function setupWebcam() {
            return new Promise( ( resolve, reject ) => {
                const webcamElement = document.getElementById( "webcam" );
                const navigatorAny = navigator;
                navigator.getUserMedia = navigator.getUserMedia ||
                navigatorAny.webkitGetUserMedia || navigatorAny.mozGetUserMedia ||
                navigatorAny.msGetUserMedia;
                if( navigator.getUserMedia ) {
                    navigator.getUserMedia( { video: true },
                        stream => {
                            webcamElement.srcObject = stream;
                            webcamElement.addEventListener( 'loadeddata', resolve, false );
                        },
                    error => reject());
                }
                else {
                    reject();
                }
            });
        }

        (async () => {
            await setupWebcam();

            setInterval( predictImage, 200 );
        })();

        async function predictImage() {
            // Prediction Code Goes Here
        }
        </script>
    </body>
</html>

设置BodyPix

在加载时，BodyPix需要几个参数——您可能会认出其中一些。它支持两种不同的架构预训练模型：MobileNetV1和ResNet50。所需参数可能会因您选择的型号而异。我们将使用MobileNet并通过以下代码初始化BodyPix：

(async () => {
    model = await bodyPix.load({
        architecture: 'MobileNetV1',
        outputStride: 16,
        multiplier: 0.50,
        quantBytes: 2
    });
    await setupWebcam();
    setInterval( predictImage, 200 );
})();

检测脸部触摸

通过身体部位分割，我们从BodyPix获得了两条数据：

用二维屏幕像素坐标表示的身体部位的关键点，例如鼻子、耳朵、手腕、肘部等
以一维数组格式存储的二维分割像素数据

经过简短测试，我发现检索到的鼻子和耳朵的关键点坐标相当可靠，而一个人的手腕的点还不够准确，无法确定手是否在触摸脸部。因此，我们将使用分割像素来确定面部触摸。

因为鼻子和耳朵的关键点似乎是可靠的，所以我们可以使用它们来估计人脸的圆形区域。使用该圆形区域，我们可以确定左侧或右侧分割像素是否与该区域重叠——并将状态标记为面部触摸。

这是我从起点模板编写predictImage()函数的方式，使用距离公式检查面部区域是否重叠：

async function predictImage() {
    const img = document.getElementById( "webcam" );
    const segmentation = await model.segmentPersonParts( img );
    if( segmentation.allPoses.length > 0 ) {
        const keypoints = segmentation.allPoses[ 0 ].keypoints;
        const nose = keypoints[ 0 ].position;
        const earL = keypoints[ 3 ].position;
        const earR = keypoints[ 4 ].position;
        const earLtoNose = Math.sqrt( Math.pow( nose.x - earL.x, 2 ) + Math.pow( nose.y - earL.y, 2 ) );
        const earRtoNose = Math.sqrt( Math.pow( nose.x - earR.x, 2 ) + Math.pow( nose.y - earR.y, 2 ) );
        const faceRadius = Math.max( earLtoNose, earRtoNose );

        // Check if any of the left_hand(10) or right_hand(11) pixels are within the nose to faceRadius
        let isTouchingFace = false;
        for( let y = 0; y < 224; y++ ) {
            for( let x = 0; x < 224; x++ ) {
                if( segmentation.data[ y * 224 + x ] === 10 ||
                    segmentation.data[ y * 224 + x ] === 11 ) {
                    const distToNose = Math.sqrt( Math.pow( nose.x - x, 2 ) + Math.pow( nose.y - y, 2 ) );
                    // console.log( distToNose );
                    if( distToNose < faceRadius ) {
                        isTouchingFace = true;
                        break;
                    }
                }
            }
            if( isTouchingFace ) {
                break;
            }
        }
        if( isTouchingFace ) {
            document.getElementById( "status" ).innerText = "Touch";
        }
        else {
            document.getElementById( "status" ).innerText = "Not Touch";
        }

        // --- Uncomment the following to view the BodyPix mask ---
        // const canvas = document.getElementById( "canvas" );
        // bodyPix.drawMask(
        //     canvas, img,
        //     bodyPix.toColoredPartMask( segmentation ),
        //     0.7,
        //     0,
        //     false
        // );
    }
}

如果您想查看BodyPix预测的像素，可以取消该功能的底部注释。

我的方法predictImage()是使用手像素的接近度进行非常粗略的估算。一个有趣的挑战可能是找到一种更准确的方法来检测人的手何时触摸了面部！

技术脚注

使用BodyPix进行面部触摸检测的一个优点是，用户无需使用不良行为示例来训练AI。
BodyPix的另一个优点是，当人的手隐藏在其后面时，它可以将前面的脸分割。
这种方法和预测比我们在上一篇文章中所使用的更具体于识别人脸触摸动作; 但是，如果有足够的样本数据，第一种方法可能会导致更准确的预测
预计性能问题，因为BodyPix在计算上很昂贵

终点线

供您参考，这是此项目的完整代码：

<html>
    <head>
        <title>Face Touch Detection with TensorFlow.js Part 2: Using BodyPix</title>
        <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@2.0.0/dist/tf.min.js"></script>
        <script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/body-pix@2.0"></script>
        <style>
            img, video {
                object-fit: cover;
            }
        </style>
    </head>
    <body>
        <video autoplay playsinline muted id="webcam" width="224" height="224"></video>
        <canvas id="canvas" width="224" height="224"></canvas>
        <h1 id="status">Loading...</h1>
        <script>
        async function setupWebcam() {
            return new Promise( ( resolve, reject ) => {
                const webcamElement = document.getElementById( "webcam" );
                const navigatorAny = navigator;
                navigator.getUserMedia = navigator.getUserMedia ||
                navigatorAny.webkitGetUserMedia || navigatorAny.mozGetUserMedia ||
                navigatorAny.msGetUserMedia;
                if( navigator.getUserMedia ) {
                    navigator.getUserMedia( { video: true },
                        stream => {
                            webcamElement.srcObject = stream;
                            webcamElement.addEventListener( 'loadeddata', resolve, false );
                        },
                    error => reject());
                }
                else {
                    reject();
                }
            });
        }

        let model = null;

        (async () => {
            model = await bodyPix.load({
                architecture: 'MobileNetV1',
                outputStride: 16,
                multiplier: 0.50,
                quantBytes: 2
            });
            await setupWebcam();
            setInterval( predictImage, 200 );
        })();

        async function predictImage() {
            const img = document.getElementById( "webcam" );
            const segmentation = await model.segmentPersonParts( img );
            if( segmentation.allPoses.length > 0 ) {
                const keypoints = segmentation.allPoses[ 0 ].keypoints;
                const nose = keypoints[ 0 ].position;
                const earL = keypoints[ 3 ].position;
                const earR = keypoints[ 4 ].position;
                const earLtoNose = Math.sqrt( Math.pow( nose.x - earL.x, 2 ) + Math.pow( nose.y - earL.y, 2 ) );
                const earRtoNose = Math.sqrt( Math.pow( nose.x - earR.x, 2 ) + Math.pow( nose.y - earR.y, 2 ) );
                const faceRadius = Math.max( earLtoNose, earRtoNose );

                // Check if any of the left_hand(10) or right_hand(11) pixels are within the nose to faceRadius
                let isTouchingFace = false;
                for( let y = 0; y < 224; y++ ) {
                    for( let x = 0; x < 224; x++ ) {
                        if( segmentation.data[ y * 224 + x ] === 10 ||
                            segmentation.data[ y * 224 + x ] === 11 ) {
                            const distToNose = Math.sqrt( Math.pow( nose.x - x, 2 ) + Math.pow( nose.y - y, 2 ) );
                            // console.log( distToNose );
                            if( distToNose < faceRadius ) {
                                isTouchingFace = true;
                                break;
                            }
                        }
                    }
                    if( isTouchingFace ) {
                        break;
                    }
                }
                if( isTouchingFace ) {
                    document.getElementById( "status" ).innerText = "Touch";
                }
                else {
                    document.getElementById( "status" ).innerText = "Not Touch";
                }

                // --- Uncomment the following to view the BodyPix mask ---
                // const canvas = document.getElementById( "canvas" );
                // bodyPix.drawMask(
                //     canvas, img,
                //     bodyPix.toColoredPartMask( segmentation ),
                //     0.7,
                //     0,
                //     false
                // );
            }
        }
        </script>
    </body>
</html>