PTZ-SLAM源代码阅读——ptz_camera.py

最新推荐文章于 2023-02-18 16:14:19 发布

qq_28829853

最新推荐文章于 2023-02-18 16:14:19 发布

阅读量313

点赞数

分类专栏：学习笔记文章标签： python

本文链接：https://blog.csdn.net/qq_28829853/article/details/112393867

版权

学习笔记专栏收录该内容

10 篇文章 0 订阅

订阅专栏

compute_displacement

    def compute_dispalcement(self):
        """
        displacement between the projetion center and the rotation center
        :return:
        """
        fl = self.focal_length
        wt = self.displacement
        return np.array([wt[0] + wt[3] * fl,
                         wt[1] + wt[4] * fl,
                         wt[2] + wt[5] * fl])

无法解释，如果有知道的哥们姐们可以在评论区留言。

recompute_matrix

    def recompute_matrix(self):
        """
        compute 3 x 4 projection matrix
        :return:
        """

        K = np.array([[self.focal_length, 0, self.principal_point[0]],
                      [0, self.focal_length, self.principal_point[1]],
                      [0, 0, 1]])
        rotation = self.compute_rotation_matrix()
        cc = np.identity(4)
        cc[0][3] = -self.camera_center[0]
        cc[1][3] = -self.camera_center[1]
        cc[2][3] = -self.camera_center[2]

        R = np.identity(4)
        R[0:3, 0:3] = rotation

        disp = self.compute_dispalcement()
        disp_mat = np.eye(3, 4)
        disp_mat[0][3] = disp[0]
        disp_mat[1][3] = disp[1]
        disp_mat[2][3] = disp[2]

        self.projection_matrix = np.dot(np.dot(K, disp_mat), np.dot(R, cc))

其中

cc = np.identity(4)

返回的是nxn的主对角线为1，其余地方为0的数组

disp_mat = np.eye(3, 4)

函数的原型：numpy.eye(N,M=None,k=0,dtype=<class 'float'>,order='C)

返回的是一个二维2的数组(N,M)，对角线的地方为1，其余的地方为0.

参数介绍：

（1）N:int型，表示的是输出的行数

（2）M：int型，可选项，输出的列数，如果没有就默认为N

（3）k：int型，可选项，对角线的下标，默认为0表示的是主对角线，负数表示的是低对角，正数表示的是高对角。

（4）dtype：数据的类型，可选项，返回的数据的数据类型

（5）order：{‘C’，‘F'}，可选项，也就是输出的数组的形式是按照C语言的行优先’C'，还是按照Fortran形式的列优先‘F'存储在内存中

self.projection_matrix = np.dot(np.dot(K, disp_mat), np.dot(R, cc))

如果将displacement视为0的话，得出的就是投影矩阵。

project_ray

    def project_ray(self, ray):
        """
        Project a ray in tripod coordinate to image.
        :param ray: ray is a array, tuple or list of [2]
        :return: projected image point tuple(2)
        """
        theta = math.radians(ray[0])
        phi = math.radians(ray[1])

        K = self.compute_camera_matrix()

        pan_tilt_rotation = np.dot(self.compute_tilt_matrix(), self.compute_pan_matrix())
        disp = self.compute_dispalcement()

        ray_p = np.array([math.tan(theta), -math.tan(phi) * math.sqrt(math.tan(theta) * math.tan(theta) + 1), 1])

        img_p = np.dot(K, np.dot(pan_tilt_rotation, ray_p) + disp)
        assert img_p[2] != 0.0

        return img_p[0] / img_p[2], img_p[1] / img_p[2]

为了理解 tripod coordinate，我们这里使用论文中的一张图

 ray_p = np.array([math.tan(theta), -math.tan(phi) * math.sqrt(math.tan(theta) * math.tan(theta) + 1), 1])

$\mathrm{P}_{p t z}\left(\mathbf{r}_{i}\right)=\mathrm{KQ}_{\phi} \mathrm{Q}_{\theta}\left[\begin{array}{c} \tan \left(\theta_{i}\right) \\ -\tan \left(\phi_{i}\right) \sqrt{\tan ^{2}\left(\theta_{i}\right)+1} \\ 1 \end{array}\right]$

back_project_to_3d_point

    def back_project_to_3d_point(self, x, y):
        """
        Used for general camera (not for ray-based PTZ cameras)
        Back project image point to 3d point.
        The 3d points on the same ray are all corresponding to the image point.
        So you should set a dimension (z) to determine that 3d point.
        :param x: image point location x
        :param y: image point location y
        :return: array [3] of image point
        """

        # set z(3d point) here.
        z = 0

        pan = math.radians(self.pan)
        tilt = math.radians(self.tilt)

        k = np.array([[self.focal_length, 0, self.principal_point[0]],
                      [0, self.focal_length, self.principal_point[1]],
                      [0, 0, 1]])

        rotation = np.dot(np.array([[1, 0, 0],
                                    [0, math.cos(tilt), math.sin(tilt)],
                                    [0, -math.sin(tilt), math.cos(tilt)]]),

                          np.array([[math.cos(pan), 0, -math.sin(pan)],
                                    [0, 1, 0],
                                    [math.sin(pan), 0, math.cos(pan)]]))

        rotation = np.dot(rotation, self.base_rotation)

        inv_mat = np.linalg.inv(np.dot(k, rotation))#np.linalg.inv 表示矩阵求逆

        coe = (z - self.camera_center[2]) / (inv_mat[2, 0] * x + inv_mat[2, 1] * y + inv_mat[2, 2])

        p = np.dot(inv_mat, coe * np.array([x, y, 1])) + self.camera_center

        return p

coe = (z - self.camera_center[2]) / (inv_mat[2, 0] * x + inv_mat[2, 1] * y + inv_mat[2, 2])

        p = np.dot(inv_mat, coe * np.array([x, y, 1])) + self.camera_center

无法解释，如果有知道的哥们姐们可以在评论区留言。

    def back_project_to_ray(self, x, y):
        """
        Back project image point to ray.
        :param x: image point x.
        :param y: image point y.
        :return: tuple (2) of ray: pan, tilt in degree.
        """
        pan = math.radians(self.pan)
        tilt = math.radians(self.tilt)

        im_pos = np.array([x, y, 1])  # homogenerous coordinate
        disp = self.compute_dispalcement()

        K = np.array([[self.focal_length, 0, self.principal_point[0]],
                      [0, self.focal_length, self.principal_point[1]],
                      [0, 0, 1]])
        invK = np.linalg.inv(K)

        pan_tilt_R = np.dot(self.compute_tilt_matrix(), self.compute_pan_matrix())
        pan_tilt_R_inv = np.linalg.inv(pan_tilt_R)
        x3d, y3d, z3d = np.dot(pan_tilt_R_inv, np.dot(invK, im_pos) - disp)

        theta = math.atan(x3d / z3d)
        phi = math.atan(-y3d / math.sqrt(x3d * x3d + z3d * z3d))

        return math.degrees(theta), math.degrees(phi)

def estimate_camera_from_homography(homography, camera, points3d_on_field):
    """
    :param homography: [3, 4] projection matrix from model to image
    :param camera: PTZ camera object
    :param points3d_on_field: sample points on play field. [N, 3]
    :return: corresponding pan, tilt, zoom
    """

    N = len(points3d_on_field)

    width = camera.principal_point[0] * 2
    height = camera.principal_point[1] * 2

    points2d = np.zeros([N, 2])

    in_image_index = []
    for i in range(N):
        p = np.array([points3d_on_field[i][0], points3d_on_field[i][1], 0, 1])
        p = np.dot(homography, p)
        if p[2] != 0.0:
            points2d[i][0] = p[0] / p[2]
            points2d[i][1] = p[1] / p[2]

            if 0 < points2d[i][0] < width and 0 < points2d[i][1] < height:
                in_image_index.append(i)

    points2d = points2d[in_image_index]

    pose = camera.get_ptz()

    optimized_pose = least_squares(compute_residual, pose, verbose=0, x_scale='jac', ftol=1e-5, method='trf', args=(points3d_on_field[in_image_index], points2d, camera))

    return optimized_pose.x

optimized_pose = least_squares(compute_residual, pose, verbose=0, x_scale='jac', ftol=1e-5, method='trf', args=(points3d_on_field[in_image_index], points2d, camera))

函数原型为

scipy.optimize.least_squares(fun, x0, jac='2-point', bounds=(-inf, inf), method='trf', ftol=1e-08, xtol=1e-08, gtol=1e-08, x_scale=1.0, loss='linear', f_scale=1.0, diff_step=None, tr_solver=None, tr_options={}, jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={})

给定残差f(x)(n个实变量的m-dimensional实函数)和损失函数rho(s)(标量函数)，least_squares找到成本函数F(x)的局部最小值.

详细参见python scipy optimize.least_squares用法及代码示例

这个函数实现的是论文的公式7

$\{\theta, \phi, f\}=\arg \min _{\mathrm{P}} \sum_{i}\left\|\mathbf{p}_{i}-\mathrm{P}\left(\hat{\mathbf{r}}_{i}\right)\right\|^{2}$