Hololens2上，屏幕某个位置的像素点显示在三维空间

爱工作不爱上班

已于 2023-07-20 11:50:02 修改

阅读量2k

点赞数 4

文章标签： hololens unity3d

于 2022-09-22 17:34:44 首次发布

本文链接：https://blog.csdn.net/shenqingyu0605202324/article/details/126996072

版权

项目的github地址：AliceShum/HololensScreenPos2WorldPos (github.com)

Hololens拍照之后，有时需要获取照片某个像素点在真实世界的坐标，比如要获取照片上的人所在的像素点对应真实世界的位置，下面是我的方法。

Hololens拍照部分，参考官方文档

把拍照时的PhotoCaptureFrame保存下来，拍照方法提供了获取Projection Matrix、Camera to World Matrix两个变换矩阵，可以通过它们将照片上的像素点转成对应unity空间的三维坐标点。

相机的两个矩阵参考：(40条消息) 相机标定(二)——图像坐标与世界坐标转换_相机坐标系与世界坐标系转换公式_white_Learner的博客-CSDN博客

public HoloLensCameraStream.Resolution resolution = new HoloLensCameraStream.Resolution(3904, 2196);
public PhotoCaptureFrame savedPhotoCaptureFrame;

//根据照片的大小设置Quad的大小；这个quad是把拍照时的图片放在材质上，再按照下面的设置，可以看到当时拍摄时相机的画面，放在空间中，恰好和拍照时镜头看到的画面一致
//根据拍照时像素中心点的位置设置Quad的位置
public void SetQuadSizeAndPos()
    {
        //todo 把相机拍的照片设置为quad的材质纹理

        Matrix4x4 cameraToWorldMatrix;
        bool isSuccess1 = savedPhotoCaptureFrame.TryGetCameraToWorldMatrix(out cameraToWorldMatrix);
        Matrix4x4 projectionMatrix;
        bool isSuccess2 = savedPhotoCaptureFrame.TryGetProjectionMatrix(out projectionMatrix);
        if (!isSuccess1)
        {
            return;
        }
        if (!isSuccess2)
        {
            return;
        }

        Vector3 imageCenterDirection = LocatableCameraUtils.PixelCoordToWorldCoord(cameraToWorldMatrix, projectionMatrix, resolution, new Vector2(resolution.width / 2, resolution.height / 2));
        Vector3 imageTopLeftDirection = LocatableCameraUtils.PixelCoordToWorldCoord(cameraToWorldMatrix, projectionMatrix, resolution, new Vector2(0, 0));
        Vector3 imageTopRightDirection = LocatableCameraUtils.PixelCoordToWorldCoord(cameraToWorldMatrix, projectionMatrix, resolution, new Vector2(resolution.width, 0));
        Vector3 imageBotLeftDirection = LocatableCameraUtils.PixelCoordToWorldCoord(cameraToWorldMatrix, projectionMatrix, resolution, new Vector2(0, resolution.height));
        Vector3 imageBotRightDirection = LocatableCameraUtils.PixelCoordToWorldCoord(cameraToWorldMatrix, projectionMatrix, resolution, new Vector2(resolution.width, resolution.height));

        //quad物体的size按照截图的大小来设置
        float x = Vector3.Distance(imageTopLeftDirection, imageTopRightDirection);
        float y = Vector3.Distance(imageTopLeftDirection, imageBotLeftDirection);
        quad.transform.localScale = new Vector3(x, y, 1);

        Vector3 holoCamPos = GetHololensCameraPosByMatrix(cameraToWorldMatrix);
        quad.gameObject.transform.position = imageCenterDirection + holoCamPos;

        quad.transform.localEulerAngles = GetFaceHololensCameraEulerAngles(cameraToWorldMatrix);
    }

    
    //获取拍照时Hololens镜头的位置
    Vector3 GetHololensCameraPosByMatrix(Matrix4x4 cameraToWorldMatrix)
    {
        Vector3 pos = new Vector3(cameraToWorldMatrix[12], cameraToWorldMatrix[13], cameraToWorldMatrix[14]);
        return pos;
    }

    //获取拍照时Hololens镜头的旋转角度
    Vector3 GetHololensCameraAngleByMatrix(Matrix4x4 cameraToWorldMatrix)
    {
        Vector3 angle = cameraToWorldMatrix.rotation.eulerAngles;
        return angle;
    }

    //获取物体面向Hololens镜头的朝向角度
    Vector3 GetFaceHololensCameraEulerAngles(Matrix4x4 cameraToWorldMatrix)
    {
        Vector3 inverseNormal = -cameraToWorldMatrix.GetColumn(2);
        Quaternion quadRotation = Quaternion.LookRotation(inverseNormal, cameraToWorldMatrix.GetColumn(1));
        return quadRotation.eulerAngles;
    }

using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public static class LocatableCameraUtils
{
    /// <summary>
    /// Helper method for pixel projection into Unity3D world space.
    /// This method return a Vector3 with direction: optical center of the camera to the pixel coordinate
    /// The method is based on: https://developer.microsoft.com/en-us/windows/mixed-reality/locatable_camera#pixel_to_application-specified_coordinate_system
    /// </summary>
    /// <param name="cameraToWorldMatrix">The camera to Unity world matrix.</param>
    /// <param name="projectionMatrix">Projection Matrix.</param>
    /// <param name="pixelCoordinates">The coordinate of the pixel that should be converted to world-space.</param>
    /// <param name="cameraResolution">The resolution of the image that the pixel came from.</param>
    /// <returns>Vector3 with direction: optical center to camera world-space coordinates</returns>
    public static Vector3 PixelCoordToWorldCoord(Matrix4x4 cameraToWorldMatrix, Matrix4x4 projectionMatrix, HoloLensCameraStream.Resolution cameraResolution, Vector2 pixelCoordinates)
    {
        pixelCoordinates = ConvertPixelCoordsToScaledCoords(pixelCoordinates, cameraResolution); // -1 to 1 coords

        float focalLengthX = projectionMatrix.GetColumn(0).x;
        float focalLengthY = projectionMatrix.GetColumn(1).y;
        float centerX = projectionMatrix.GetColumn(2).x;
        float centerY = projectionMatrix.GetColumn(2).y;

        // On Microsoft Webpage the centers are normalized 
        float normFactor = projectionMatrix.GetColumn(2).z;
        centerX = centerX / normFactor;
        centerY = centerY / normFactor;

        Vector3 dirRay = new Vector3((pixelCoordinates.x - centerX) / focalLengthX, (pixelCoordinates.y - centerY) / focalLengthY, 1.0f / normFactor); //Direction is in camera space
        Vector3 direction = new Vector3(Vector3.Dot(cameraToWorldMatrix.GetRow(0), dirRay), Vector3.Dot(cameraToWorldMatrix.GetRow(1), dirRay), Vector3.Dot(cameraToWorldMatrix.GetRow(2), dirRay));

        return direction;
    }

    public static Vector3 GetNormalOfPose(Matrix4x4 pose)
    {
        return new Vector3(Vector3.Dot(Vector3.forward, pose.GetRow(0)), Vector3.Dot(Vector3.forward, pose.GetRow(1)), Vector3.Dot(Vector3.forward, pose.GetRow(2)));
    }

    public static Quaternion GetRotationFacingView(Matrix4x4 viewTransform)
    {
        return Quaternion.LookRotation(-viewTransform.GetColumn(2), viewTransform.GetColumn(1));
    }

    public static Matrix4x4 BytesToMatrix(byte[] inMatrix)
    {
        //Then convert the floats to a matrix.
        Matrix4x4 outMatrix = new Matrix4x4
        {
            m00 = inMatrix[0],
            m01 = inMatrix[1],
            m02 = inMatrix[2],
            m03 = inMatrix[3],
            m10 = inMatrix[4],
            m11 = inMatrix[5],
            m12 = inMatrix[6],
            m13 = inMatrix[7],
            m20 = inMatrix[8],
            m21 = inMatrix[9],
            m22 = inMatrix[10],
            m23 = inMatrix[11],
            m30 = inMatrix[12],
            m31 = inMatrix[13],
            m32 = inMatrix[14],
            m33 = inMatrix[15]
        };
        return outMatrix;
    }

    /// <summary>
    /// Helper method for converting into UnityEngine.Matrix4x4
    /// </summary>
    /// <param name="matrixAsArray"></param>
    /// <returns></returns>
    public static Matrix4x4 ConvertFloatArrayToMatrix4x4(float[] matrixAsArray)
    {
        //There is probably a better way to be doing this but System.Numerics.Matrix4x4 is not available 
        //in Unity and we do not include UnityEngine in the plugin.
        Matrix4x4 m = new Matrix4x4();
        m.m00 = matrixAsArray[0];
        m.m01 = matrixAsArray[1];
        m.m02 = matrixAsArray[2];
        m.m03 = matrixAsArray[3];
        m.m10 = matrixAsArray[4];
        m.m11 = matrixAsArray[5];
        m.m12 = matrixAsArray[6];
        m.m13 = matrixAsArray[7];
        m.m20 = matrixAsArray[8];
        m.m21 = matrixAsArray[9];
        m.m22 = matrixAsArray[10];
        m.m23 = matrixAsArray[11];
        m.m30 = matrixAsArray[12];
        m.m31 = matrixAsArray[13];
        m.m32 = matrixAsArray[14];
        m.m33 = matrixAsArray[15];

        return m;
    }

    /// <summary>
    /// Converts pixel coordinates to screen-space coordinates that span from -1 to 1 on both axes.
    /// This is the format that is required to determine the z-depth of a given pixel taken by the HoloLens camera.
    /// </summary>
    /// <param name="pixelCoords">The coordinate of the pixel that should be converted to screen-space.</param>
    /// <param name="res">The resolution of the image that the pixel came from.</param>
    /// <returns>A 2D vector with values between -1 and 1, representing the left-to-right scale within the image dimensions.</returns>
    static Vector2 ConvertPixelCoordsToScaledCoords(Vector2 pixelCoords, HoloLensCameraStream.Resolution resolution)
    {
        float halfWidth = (float)resolution.width / 2f;
        float halfHeight = (float)resolution.height / 2f;

        //Translate registration to image center;
        pixelCoords.x -= halfWidth;
        pixelCoords.y -= halfHeight;

        //Scale pixel coords to percentage coords (-1 to 1)
        pixelCoords = new Vector2(pixelCoords.x / halfWidth, pixelCoords.y / halfHeight * -1f);

        return pixelCoords;
    }
}


namespace HoloLensCameraStream
{
    /// <summary>
    /// A structure for holding a resolution.
    /// </summary>
    public struct Resolution
    {
        /// <summary>
        /// The width property.
        /// </summary>
        public readonly int width;

        /// <summary>
        /// The height property.
        /// </summary>
        public readonly int height;

        public Resolution(int width, int height)
        {
            this.width = width;
            this.height = height;
        }
    }
}

参考了github上的案例。因为它的项目有报错，我只能这样写。

注意：上面的PixelCoordToWorldCoord方法计算的Z是固定值（约等于1），因此位置不对的。最后我还是以另一种方式获得正确的深度值：打开Spatial Awareness，通过射线检测到环境网格的碰撞点。

RaycastHit hit;
int layerAsLayerMask = (1 << 31); //31 --> 环境网格 Spatial Awareness
Vector3 ori = GetHololensCameraPosByMatrix(cameraToWorldMatrix); //拍照时Hololens镜头的位置
Vector3 dir = pos; //pos是你需要计算的某个像素点，在使用PixelCoordToWorldCoord方法后，得到的坐标
if (Physics.Raycast(ori , dir , out hit, Mathf.Infinity, layerAsLayerMask))
            {
                Debug.Log("最终结果：" + hit.point);
                
            }

最后的效果：黑底的是拍到的照片，白底的是要拍照的物体。相机位置是在拍照时Hololens镜头的位置，白底上的红点是要获取的最终空间位置，黑底上的蓝色点是照片上等比分开的像素点。可以看到，蓝色点和绿色点大致对应。白色线条是模拟从镜头出发朝向某个二维像素点发射的射线，在两个人身上的部位一样。