（15-2-04）姿势预测器:Android姿势预测器（4）姿势识别

最新推荐文章于 2024-07-25 23:12:21 发布

码农三叔

最新推荐文章于 2024-07-25 23:12:21 发布

阅读量377

点赞数 6

分类专栏：《零基础学习TensorFlow》文章标签： android 算法 TensorFlow 深度学习姿势识别 python

本文链接：https://blog.csdn.net/asd343442/article/details/140202487

版权

《零基础学习TensorFlow》专栏收录该内容

65 篇文章 2 订阅

订阅专栏

15.3.6 姿势识别

（1）编写文件EvaluationUtils.kt实现识别处理过程中的评估测试功能，推断从图像中检测到的人是否与预期结果相匹配。如果检测结果在预期结果的可接受误差范围内，则会视为正确。文件EvaluationUtils.kt的具体实现代码如下所示。

object EvaluationUtils {

    private const val ACCEPTABLE_ERROR = 10f // max 10 pixels
    private const val BITMAP_FIXED_WIDTH_SIZE = 400
    fun assertPoseDetectionResult(
        person: Person,
        expectedResult: Map<BodyPart, PointF>
    ) {
        //检查模型是否有足够的信心检测到此人
        assertThat(person.score).isGreaterThan(0.5f)

        for ((bodyPart, expectedPointF) in expectedResult) {
            val keypoint = person.keyPoints.firstOrNull { it.bodyPart == bodyPart }
            assertWithMessage("$bodyPart must exist").that(keypoint).isNotNull()

            val detectedPointF = keypoint!!.coordinate
            val distanceFromExpectedPointF = distance(detectedPointF, expectedPointF)
            assertWithMessage("Detected $bodyPart must be close to expected result")
                .that(distanceFromExpectedPointF).isAtMost(ACCEPTABLE_ERROR)
        }
    }

    /**
     * 使用资源名称从资产文件夹加载图像。
     *注意：图像隐式调整为固定的400px宽度，同时保持其比率。
     *这对于保持测试图像一致是必要的，因为将根据设备屏幕大小加载不同的位图分辨率。
     */
    fun loadBitmapResourceByName(name: String): Bitmap {
        val resources = InstrumentationRegistry.getInstrumentation().context.resources
        val resourceId = resources.getIdentifier(
            name, "drawable",
            InstrumentationRegistry.getInstrumentation().context.packageName
        )
        val options = BitmapFactory.Options()
        options.inMutable = true
        return scaleBitmapToFixedSize(BitmapFactory.decodeResource(resources, resourceId, options))
    }

    private fun scaleBitmapToFixedSize(bitmap: Bitmap): Bitmap {
        val ratio = bitmap.width.toFloat() / bitmap.height
        return Bitmap.createScaledBitmap(
            bitmap,
            BITMAP_FIXED_WIDTH_SIZE,
            (BITMAP_FIXED_WIDTH_SIZE / ratio).toInt(),
            false
        )
    }

    private fun distance(point1: PointF, point2: PointF): Float {
        return ((point1.x - point2.x).pow(2) + (point1.y - point2.y).pow(2)).pow(0.5f)
    }
}

（2）编写文件MovenetLightningTest.kt，功能是使用Movenet数据模型识别动作，在EXPECTED_DETECTION_RESULT1中存储了预期的检测结果。具体实现代码如下所示。

@RunWith(AndroidJUnit4::class)
class MovenetLightningTest {

    companion object {

        private const val TEST_INPUT_IMAGE1 = "image1"
        private val EXPECTED_DETECTION_RESULT1 = mapOf(
            BodyPart.NOSE to PointF(193.0462f, 87.497574f),
            BodyPart.LEFT_EYE to PointF(2015.29642f, 75.67456f),
            BodyPart.RIGHT_EYE to PointF(182.6607f, 78.23213f),
            BodyPart.LEFT_EAR to PointF(2315.74228f, 88.43133f),
            BodyPart.RIGHT_EAR to PointF(176.84341f, 815.485374f),
            BodyPart.LEFT_SHOULDER to PointF(253.89224f, 162.15315f),
            BodyPart.RIGHT_SHOULDER to PointF(152.12976f, 155.90091f),
            BodyPart.LEFT_ELBOW to PointF(270.097f, 260.88635f),
            BodyPart.RIGHT_ELBOW to PointF(148.23059f, 2315.923f),
            BodyPart.LEFT_WRIST to PointF(275.47607f, 335.0756f),
            BodyPart.RIGHT_WRIST to PointF(142.26117f, 311.81918f),
            BodyPart.LEFT_HIP to PointF(238.68332f, 3215.58127f),
            BodyPart.RIGHT_HIP to PointF(178.08572f, 331.83063f),
            BodyPart.LEFT_KNEE to PointF(260.20868f, 468.5389f),
            BodyPart.RIGHT_KNEE to PointF(141.22626f, 467.30423f),
            BodyPart.LEFT_ANKLE to PointF(273.98502f, 588.24274f),
            BodyPart.RIGHT_ANKLE to PointF(95.03668f, 597.6913f),
        )

        private const val TEST_INPUT_IMAGE2 = "image2"
        private val EXPECTED_DETECTION_RESULT2 = mapOf(
            BodyPart.NOSE to PointF(185.01096f, 86.7739f),
            BodyPart.LEFT_EYE to PointF(193.2121f, 75.5961f),
            BodyPart.RIGHT_EYE to PointF(172.3854f, 76.547386f),
            BodyPart.LEFT_EAR to PointF(204.05804f, 77.61157f),
            BodyPart.RIGHT_EAR to PointF(156.31363f, 78.961266f),
            BodyPart.LEFT_SHOULDER to PointF(2115.9895f, 125.02336f),
            BodyPart.RIGHT_SHOULDER to PointF(144.1854f, 131.37856f),
            BodyPart.LEFT_ELBOW to PointF(2515.59085f, 197.88562f),
            BodyPart.RIGHT_ELBOW to PointF(180.91986f, 214.5548f),
            BodyPart.LEFT_WRIST to PointF(247.00491f, 214.88852f),
            BodyPart.RIGHT_WRIST to PointF(233.76907f, 212.72563f),
            BodyPart.LEFT_HIP to PointF(2115.44794f, 2815.7696f),
            BodyPart.RIGHT_HIP to PointF(176.40805f, 293.85168f),
            BodyPart.LEFT_KNEE to PointF(206.05576f, 421.18146f),
            BodyPart.RIGHT_KNEE to PointF(173.7746f, 426.6271f),
            BodyPart.LEFT_ANKLE to PointF(188.79883f, 534.07745f),
            BodyPart.RIGHT_ANKLE to PointF(157.41333f, 566.5951f),
        )
    }

    private lateinit var poseDetector: PoseDetector
    private lateinit var appContext: Context

    @Before
    fun setup() {
        appContext = InstrumentationRegistry.getInstrumentation().targetContext
        poseDetector = MoveNet.create(appContext, Device.CPU, ModelType.Lightning)
    }

    @Test
    fun testPoseEstimationResultWithImage1() {
        val input = EvaluationUtils.loadBitmapResourceByName(TEST_INPUT_IMAGE1)

        //由于Movenet使用前一帧优化检测结果，因此使用同一图像多次运行该帧以改进结果.
        poseDetector.estimateSinglePose(input)
        poseDetector.estimateSinglePose(input)
        poseDetector.estimateSinglePose(input)
        val person = poseDetector.estimateSinglePose(input)
        EvaluationUtils.assertPoseDetectionResult(person, EXPECTED_DETECTION_RESULT1)
    }

    @Test
    fun testPoseEstimationResultWithImage2() {
        val input = EvaluationUtils.loadBitmapResourceByName(TEST_INPUT_IMAGE2)

        // 由于Movenet使用前一帧优化检测结果，因此使用同一图像多次运行该帧以改进结果.
        poseDetector.estimateSinglePose(input)
        poseDetector.estimateSinglePose(input)
        poseDetector.estimateSinglePose(input)
        val person = poseDetector.estimateSinglePose(input)
        EvaluationUtils.assertPoseDetectionResult(person, EXPECTED_DETECTION_RESULT2)
    }
}

本项目的识别性能很大程度取决于我们的设备性能以及输出的幅度(热点图和偏移向量)。本项目对于不同尺寸的图片的预测结果是不变的，也就是说，在原始图像和缩小后图像中预测姿势位置是一样的。这也意味着我们能精确的配置性能消耗。最终的输出幅度决定了缩小后的和输入的图片尺寸的相关程度，输出幅度同样影响到了图层的尺寸和输出的模型。更高的输出幅度决定了更小的网络和输出的图层分辨率，和更小的可信度。

在本实例中，输出幅度可以为 8、16 或 32。换句话说，当输出幅度为 32时会拥有最高性能和最差的可信度；当输出幅度为 8时则会有用最高的可信度和最低的性能。本项目给出的建议是 16。更高的输出幅度速度更快，但是也会导致更低的可信度。

到此为止，整个项目工程全部开发完毕。单击Android Studio顶部的运行按钮运行本项目，在Android设备中将会显示执行效果。在屏幕上方会显示摄像头的拍摄界面，在下方显示摄像头视频的识别结果。执行效果如图15-4所示。

图15-4 执行效果

码农三叔

关注

6
点赞
踩
4

收藏

觉得还不错? 一键收藏
打赏
0
评论
（15-2-04）姿势预测器:Android姿势预测器（4）姿势识别

本项目的识别性能很大程度取决于我们的设备性能以及输出的幅度(热点图和偏移向量)。本项目对于不同尺寸的图片的预测结果是不变的，也就是说，在原始图像和缩小后图像中预测姿势位置是一样的。最终的输出幅度决定了缩小后的和输入的图片尺寸的相关程度，输出幅度同样影响到了图层的尺寸和输出的模型。更高的输出幅度决定了更小的网络和输出的图层分辨率，和更小的可信度。在本实例中，输出幅度可以为 8、16 或 32。更高的输出幅度速度更快，但是也会导致更低的可信度。在屏幕上方会显示摄像头的拍摄界面，在下方显示摄像头视频的识别结果。
复制链接

扫一扫