compute_displacement
def compute_dispalcement(self):
"""
displacement between the projetion center and the rotation center
:return:
"""
fl = self.focal_length
wt = self.displacement
return np.array([wt[0] + wt[3] * fl,
wt[1] + wt[4] * fl,
wt[2] + wt[5] * fl])
无法解释,如果有知道的哥们姐们可以在评论区留言。
recompute_matrix
def recompute_matrix(self):
"""
compute 3 x 4 projection matrix
:return:
"""
K = np.array([[self.focal_length, 0, self.principal_point[0]],
[0, self.focal_length, self.principal_point[1]],
[0, 0, 1]])
rotation = self.compute_rotation_matrix()
cc = np.identity(4)
cc[0][3] = -self.camera_center[0]
cc[1][3] = -self.camera_center[1]
cc[2][3] = -self.camera_center[2]
R = np.identity(4)
R[0:3, 0:3] = rotation
disp = self.compute_dispalcement()
disp_mat = np.eye(3, 4)
disp_mat[0][3] = disp[0]
disp_mat[1][3] = disp[1]
disp_mat[2][3] = disp[2]
self.projection_matrix = np.dot(np.dot(K, disp_mat), np.dot(R, cc))
其中
cc = np.identity(4)
返回的是nxn的主对角线为1,其余地方为0的数组
disp_mat = np.eye(3, 4)
函数的原型:numpy.eye(N,M=None,k=0,dtype=<class 'float'>,order='C)
返回的是一个二维2的数组(N,M),对角线的地方为1,其余的地方为0.
参数介绍:
(1)N:int型,表示的是输出的行数
(2)M:int型,可选项,输出的列数,如果没有就默认为N
(3)k:int型,可选项,对角线的下标,默认为0表示的是主对角线,负数表示的是低对角,正数表示的是高对角。
(4)dtype:数据的类型,可选项,返回的数据的数据类型
(5)order:{‘C’,‘F'},可选项,也就是输出的数组的形式是按照C语言的行优先’C',还是按照Fortran形式的列优先‘F'存储在内存中
self.projection_matrix = np.dot(np.dot(K, disp_mat), np.dot(R, cc))
如果将displacement视为0的话,得出的就是投影矩阵。
project_ray
def project_ray(self, ray):
"""
Project a ray in tripod coordinate to image.
:param ray: ray is a array, tuple or list of [2]
:return: projected image point tuple(2)
"""
theta = math.radians(ray[0])
phi = math.radians(ray[1])
K = self.compute_camera_matrix()
pan_tilt_rotation = np.dot(self.compute_tilt_matrix(), self.compute_pan_matrix())
disp = self.compute_dispalcement()
ray_p = np.array([math.tan(theta), -math.tan(phi) * math.sqrt(math.tan(theta) * math.tan(theta) + 1), 1])
img_p = np.dot(K, np.dot(pan_tilt_rotation, ray_p) + disp)
assert img_p[2] != 0.0
return img_p[0] / img_p[2], img_p[1] / img_p[2]
为了理解 tripod coordinate,我们这里使用论文中的一张图
ray_p = np.array([math.tan(theta), -math.tan(phi) * math.sqrt(math.tan(theta) * math.tan(theta) + 1), 1])
back_project_to_3d_point
def back_project_to_3d_point(self, x, y):
"""
Used for general camera (not for ray-based PTZ cameras)
Back project image point to 3d point.
The 3d points on the same ray are all corresponding to the image point.
So you should set a dimension (z) to determine that 3d point.
:param x: image point location x
:param y: image point location y
:return: array [3] of image point
"""
# set z(3d point) here.
z = 0
pan = math.radians(self.pan)
tilt = math.radians(self.tilt)
k = np.array([[self.focal_length, 0, self.principal_point[0]],
[0, self.focal_length, self.principal_point[1]],
[0, 0, 1]])
rotation = np.dot(np.array([[1, 0, 0],
[0, math.cos(tilt), math.sin(tilt)],
[0, -math.sin(tilt), math.cos(tilt)]]),
np.array([[math.cos(pan), 0, -math.sin(pan)],
[0, 1, 0],
[math.sin(pan), 0, math.cos(pan)]]))
rotation = np.dot(rotation, self.base_rotation)
inv_mat = np.linalg.inv(np.dot(k, rotation))#np.linalg.inv 表示矩阵求逆
coe = (z - self.camera_center[2]) / (inv_mat[2, 0] * x + inv_mat[2, 1] * y + inv_mat[2, 2])
p = np.dot(inv_mat, coe * np.array([x, y, 1])) + self.camera_center
return p
coe = (z - self.camera_center[2]) / (inv_mat[2, 0] * x + inv_mat[2, 1] * y + inv_mat[2, 2])
p = np.dot(inv_mat, coe * np.array([x, y, 1])) + self.camera_center
无法解释,如果有知道的哥们姐们可以在评论区留言。
def back_project_to_ray(self, x, y):
"""
Back project image point to ray.
:param x: image point x.
:param y: image point y.
:return: tuple (2) of ray: pan, tilt in degree.
"""
pan = math.radians(self.pan)
tilt = math.radians(self.tilt)
im_pos = np.array([x, y, 1]) # homogenerous coordinate
disp = self.compute_dispalcement()
K = np.array([[self.focal_length, 0, self.principal_point[0]],
[0, self.focal_length, self.principal_point[1]],
[0, 0, 1]])
invK = np.linalg.inv(K)
pan_tilt_R = np.dot(self.compute_tilt_matrix(), self.compute_pan_matrix())
pan_tilt_R_inv = np.linalg.inv(pan_tilt_R)
x3d, y3d, z3d = np.dot(pan_tilt_R_inv, np.dot(invK, im_pos) - disp)
theta = math.atan(x3d / z3d)
phi = math.atan(-y3d / math.sqrt(x3d * x3d + z3d * z3d))
return math.degrees(theta), math.degrees(phi)
def estimate_camera_from_homography(homography, camera, points3d_on_field):
"""
:param homography: [3, 4] projection matrix from model to image
:param camera: PTZ camera object
:param points3d_on_field: sample points on play field. [N, 3]
:return: corresponding pan, tilt, zoom
"""
N = len(points3d_on_field)
width = camera.principal_point[0] * 2
height = camera.principal_point[1] * 2
points2d = np.zeros([N, 2])
in_image_index = []
for i in range(N):
p = np.array([points3d_on_field[i][0], points3d_on_field[i][1], 0, 1])
p = np.dot(homography, p)
if p[2] != 0.0:
points2d[i][0] = p[0] / p[2]
points2d[i][1] = p[1] / p[2]
if 0 < points2d[i][0] < width and 0 < points2d[i][1] < height:
in_image_index.append(i)
points2d = points2d[in_image_index]
pose = camera.get_ptz()
optimized_pose = least_squares(compute_residual, pose, verbose=0, x_scale='jac', ftol=1e-5, method='trf', args=(points3d_on_field[in_image_index], points2d, camera))
return optimized_pose.x
optimized_pose = least_squares(compute_residual, pose, verbose=0, x_scale='jac', ftol=1e-5, method='trf', args=(points3d_on_field[in_image_index], points2d, camera))
函数原型为
scipy.optimize.least_squares(fun, x0, jac='2-point', bounds=(-inf, inf), method='trf', ftol=1e-08, xtol=1e-08, gtol=1e-08, x_scale=1.0, loss='linear', f_scale=1.0, diff_step=None, tr_solver=None, tr_options={}, jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={})
给定残差f(x)(n个实变量的m-dimensional实函数)和损失函数rho(s)(标量函数),least_squares找到成本函数F(x)的局部最小值.
详细参见python scipy optimize.least_squares用法及代码示例
这个函数实现的是论文的公式7