COLMAP中将旋转矩阵转为四元数的实现

fengbingchun

于 2023-05-27 13:38:36 发布

阅读量1.7k

点赞数

分类专栏： 3D 文章标签： colmap

本文链接：https://blog.csdn.net/fengbingchun/article/details/130900119

版权

3D 专栏收录该内容

17 篇文章 4 订阅

订阅专栏

instant-ngp中执行scripts/colmap2nerf.py时，在colmap_text目录下会生成cameras.txt、images.txt、points3D.txt三个文件:

1.cameras.txt：

(1).该文件包含数据集中所有重构相机(all reconstructed cameras)的内在参数(intrinsic parameters)，每个相机占用一行；

(2).参数的长度是可变的，依赖于相机型号(camera model)，如OPENCV、PINHOLE；

(3).每行内容依次为：CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[];

2.images.txt:

(1).该文件包含数据集中所有重建图像(reconstructed images)的位姿和关键点(pose and keypoints)，每幅图像占用两行;

(2).每两行定义一幅图像的信息：

IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME
POINTS2D[] as (X, Y, POINT3D_ID)

(3).利用四元数(QW, QX, QY, QZ)和平移向量(TX, TY, TZ)，将图像的重构位姿(reconstructed pose)指定为图像从世界到相机坐标系的投影。四元数(quaternion)是使用Hamilton约定来定义的；

(4).关键点的位置以像素坐标指定:若3D点标识符(3D point identifier)为-1，则表明此关键点在重建中没有观察(observe)到3D点;

3.points3D.txt:

(1).该文件包含数据集中所有重建的3D点的信息，每个点使用一行

POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)

在https://blog.csdn.net/fengbingchun/article/details/130667646 中介绍过已知cameras.txt和images.txt进行3D模型重建的过程，这里介绍下已知相机内外参如何生成cameras.txt和images.txt。

相机内外参格式如下所示：每个相机的内外参存在于txt文件中：前三行为内参，后三行为外参

8242.163473484103 0 2576.928611903816 0
0 8242.163473484103 1733.503691370957 0
0 0 1 0
2.044289726145588e-004 -0.2587487517264626 -0.9659446369688031 27.59432346095996
-0.9993063898830017 -3.602307923217642e-002 9.438056030485108e-003 -0.6400803719560595
-3.723838540803551e-002 0.9652727185840433 -0.2585766451355823 35.62807466319453

以下为测试代码：

import os
from inspect import currentframe, getframeinfo
import numpy as np
from pyquaternion import Quaternion

def get_dir_list(path):
    dir_list = []
    txt_list = []

    for x in os.listdir(path):
        if x.startswith("N") and x.endswith(".txt"): # it starts with N and ends with .txt
            dir_list.append(path+"/"+x)
            txt_list.append(x)

    return dir_list, txt_list

def parse_txt(txt_name):
    with open(os.path.join(txt_name), "r") as f:
        elements = [] # 6*4

        for line in f:
            if line[0] == "#":
                continue

            tmp = []
            for v in line.split(" "):
                tmp.append(v.replace("\n", "")) # remove line breaks(\n) at the end of the line
            ret = [float(ele) for ele in tmp] # str to float
            if len(ret) != 4:
                print(f"Error: the number of cols that are not supported:{len(ret)}, LINE: {getframeinfo(currentframe()).lineno}")
                raise
    
            elements.append(ret)

        if len(elements) != 6:
            print(f"Error: the number of rows that are not supported:{len(elements)}, LINE: {getframeinfo(currentframe()).lineno}")
            raise

    return elements

def get_number(name):
    pos = 0
    for index in name:
        if index.isnumeric():
            break
        pos = pos + 1

    number = int(name[pos:])
    #print(f"number:{number}")

    return number

def get_image_id_and_name(txt_name, image_suffix):
    pos = txt_name.rfind("/")
    name = txt_name[pos+1:]
    image_name = name.replace("txt", image_suffix)
    #print(f"image name: {image_name}; name: {name}")

    image_id = str(name[0:-4]) # remove: .txt
    #image_id = str(name[0:-8]) # remove: _KRT.txt
    #print(f"image id: {image_id}")
    image_id = get_number(image_id)

    return image_id, image_name

def generate_cameras_txt(dir_list, cameras_txt_name, images_number, image_size, image_suffix, camera_model):
    f = open(cameras_txt_name, "w")
    f.write("# Camera list with one line of data per camera:\r")
    f.write("#   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\r")
    f.write("# Number of cameras: %d\r" % images_number)

    for x in dir_list:
        elements = parse_txt(x)
        #print(f"{x} elements:\n\t{elements}")

        image_id, image_name = get_image_id_and_name(x, image_suffix)
        #print(f"id:{image_id},name:{image_name}")

        string = str(image_id) + " " + camera_model + " " + str(image_size[0]) + " " + str(image_size[1])
        string = string + " " + str(elements[0][0]) + " " + str(elements[1][1]) + " " + str(elements[0][2]) + " " + str(elements[1][2]) + "\r"
        f.write(string)

    f.close()

def get_rotate_matrix(elements):
    R = [[elements[3][0], elements[3][1], elements[3][2]],
         [elements[4][0], elements[4][1], elements[4][2]],
         [elements[5][0], elements[5][1], elements[5][2]]]
    #print(f"R:\r{R}")

    return np.array(R)

def calculate_quaternion(elements):
    m = get_rotate_matrix(elements)
    # reference: https://github.com/colmap/colmap/issues/434
    m = m.transpose()

    return Quaternion(matrix=m), m

def generate_images_txt(dir_list, images_txt_name, images_number, image_suffix):
    f = open(images_txt_name, "w")
    f.write("# Image list with two lines of data per image:\r")
    f.write("#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\r")
    f.write("#   POINTS2D[] as (X, Y, POINT3D_ID)\r")
    f.write("# Number of images: %d, mean observations per image:\r" % images_number)

    for x in dir_list:
        elements = parse_txt(x)
        quaternion, m = calculate_quaternion(elements)
        #print(f"quaternion:\r\t{quaternion}")
        
        T = np.array([[elements[3][3]], [elements[4][3]], [elements[5][3]]])
        # reference: https://github.com/colmap/colmap/issues/434
        T = np.matmul((-m), T) # 3*1

        image_id, image_name = get_image_id_and_name(x, image_suffix)

        string = str(image_id) + " " + str(quaternion[0]) + " " + str(quaternion[1]) + " " + str(quaternion[2]) + " " + str(quaternion[3]) + " "
        string = string + str(T[0][0]) + " " + str(T[1][0]) + " " + str(T[2][0]) + " " + str(image_id) + " " + str(image_name) + "\r\n"
        f.write(string)

    f.close()

if __name__ == "__main__":
    dir_list, txt_list = get_dir_list("test_data/txt")
    #print(f"dir_list:\n\t{dir_list}\ntxt_list:\n\t{txt_list}")

    cameras_txt_name = "test_data/txt/cameras.txt"
    images_number = 118
    image_size = [5184, 3456] # width, height
    image_suffix = "PNG"
    camera_model = "PINHOLE"
    generate_cameras_txt(dir_list, cameras_txt_name, images_number, image_size, image_suffix, camera_model)
    
    images_txt_name = "test_data/txt/images.txt"
    generate_images_txt(dir_list, images_txt_name, images_number, image_suffix)

    print("test finish")

注意：

(1).旋转矩阵转四元数调用的是pyquaternion模块的接口；

(2).参考https://github.com/colmap/colmap/issues/434 中的说明，不能直接进行转换，旋转矩阵R需要使用transpose(R)，平移向量T需要使用-transpose(R) * T。

生成的cameras.txt内容如下：

# Camera list with one line of data per camera:
#   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]
# Number of cameras: 118
1 PINHOLE 5184 3456 8242.163473484103 8242.163473484103 2576.928611903816 1733.503691370957
2 PINHOLE 5184 3456 8131.912069111961 8131.912069111961 2556.374127401603 1752.782750899889

生成的images.txt内容如下：

# Image list with two lines of data per image:
#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME
#   POINTS2D[] as (X, Y, POINT3D_ID)
# Number of images: 118, mean observations per image:
1 -0.42000140017768267 0.5689473071692082 -0.5527994973158874 -0.44080664840834116 0.6814544907248062 -27.273869403751362 35.87321789136011 1 N001.PNG

2 0.49895054847423237 -0.5018861154245287 0.48563874859151474 0.5131409973757768 0.6946838348090978 -27.127815150960185 29.108370323558272 2 N002.PNG

GitHub：https://github.com/fengbingchun/Python_Test