SFM实现代码及详解（python）

Jenkinwey

已于 2022-05-08 13:09:26 修改

阅读量9.4k

点赞数 14

文章标签： python opencv 计算机视觉

于 2021-10-26 16:27:25 首次发布

本文链接：https://blog.csdn.net/qq_37591788/article/details/120974282

版权

SFM即运动恢复结构（Structure from Motion），这里给出实现的python代码即解释，关于SFM流程的介绍详见我的这篇博客：

特征提取

这里采用opencv库的SIFT角点：

def extract_features(image_names):
    
    sift = cv2.SIFT_create(0, 3, 0.04, 10) # 这些是默认值创建
    key_points_for_all = []
    descriptor_for_all = []
    colors_for_all = []
    for image_name in image_names:
        image = cv2.imread(image_name)
        
        if image is None:
            continue
        key_points, descriptor = sift.detectAndCompute(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), None)
        
        if len(key_points) <= 10:
            continue
        
        key_points_for_all.append(key_points)
        descriptor_for_all.append(descriptor)
        colors = np.zeros((len(key_points), 3))
        for i, key_point in enumerate(key_points):
            p = key_point.pt  # p是关键点的位置
            colors[i] = image[int(p[1])][int(p[0])]         
        colors_for_all.append(colors)
    return np.array(key_points_for_all), np.array(descriptor_for_all), np.array(colors_for_all),key_points_for_all

特征匹配

def match_features(query, train):
    # Brute Force匹配是opencv二维特征点匹配常见的办法，BFMatcher总是尝试所有可能的匹配，从而使得它总能够找到最佳匹配
    bf = cv2.BFMatcher(cv2.NORM_L2)
    knn_matches = bf.knnMatch(query, train, k=2)
    matches = []
    #Apply Lowe's SIFT matching ratio test(MRT)，值得一提的是，这里的匹配没有
    #标准形式，可以根据需求进行改动。
    # 令最近邻的距离为d1，再找到第二近的匹配对点之间距离为d2，如果两个距离d1和d2之比小于一个阈值如0.6，就可以判定为可接受的匹配对。
    for m, n in knn_matches:
        if m.distance < sfm_config.MRT * n.distance:
            matches.append(m)
    return np.array(matches),matches

可视化匹配结果：

def vis_match(image_names,key_points_for_all,matches_for_all):
    for i in range(len(key_points_for_all) - 1):
        image1 = cv2.imread(image_names[i])
        image2 = cv2.imread(image_names[i+1])

        # flags = 2表示只可视化要匹配的关键点，不是所有的都显示
        img_match = cv2.drawMatches(image1,key_points_for_all[i],image2,key_points_for_all[i+1],matches_for_all[i][640:680],None,flags=2)
        cv2.imshow('result.jpg', img_match)
        cv2.waitKey(0)

求解旋转与平移

这里主要通过opencv的findEssentialMat和recoverPose函数实现。

def find_transform(K, p1, p2):  
    # p1,p2是匹配好的关键点
    focal_length = 0.5 * (K[0, 0] + K[1, 1])
    principle_point = (K[0, 2], K[1, 2])
    # 参数mask输出N个元素的数组，其中每个元素对于异常值设置为0，对其他点设置为1。
    E,mask = cv2.findEssentialMat(p1, p2, focal_length, principle_point, cv2.RANSAC, 0.999, 1.0)
    cameraMatrix = np.array([[focal_length, 0, principle_point[0]], [0, focal_length, principle_point[1]], [0, 0, 1]])
    # 该函数求解出来的 R,t已经是最合适的R,t；已经通过内部的代码去掉了另外三种错误的解
    # 在输出掩码中，只有通过手性检查的内点
    pass_count, R, T, mask = cv2.recoverPose(E, p1, p2, cameraMatrix, mask)
    
    return R, T, mask

对前两帧图片实现三维恢复

def init_structure(K, key_points_for_all, colors_for_all, matches_for_all):  
    # 只有前两张图片是通过求本征矩阵来求解R，t
    p1, p2 = get_matched_points(key_points_for_all[0], key_points_for_all[1], matches_for_all[0])
    c1, c2 = get_matched_colors(colors_for_all[0], colors_for_all[1], matches_for_all[0])
    
    if find_transform(K, p1, p2):
        R,T,mask = find_transform(K, p1, p2)
    else:
        R,T,mask = np.array([]), np.array([]), np.array([])
                
    p1 = maskout_points(p1, mask)  # 去除不符合要求的点（即mask=0的点）
    p2 = maskout_points(p2, mask)
    colors = maskout_points(c1, mask)
    #设置第一个相机的变换矩阵，即作为剩下摄像机矩阵变换的基准。
    R0 = np.eye(3, 3)
    T0 = np.zeros((3, 1))
    structure = reconstruct(K, R0, T0, R, T, p1, p2) # 关键点在空间中的三维点
    rotations = [R0, R]
    motions = [T0, T]
    correspond_struct_idx = []
    for key_p in key_points_for_all:
        correspond_struct_idx.append(np.ones(len(key_p)) *- 1)
    correspond_struct_idx = np.array(correspond_struct_idx)
    idx = 0
    matches = matches_for_all[0]  # 第一帧到第二帧的匹配
    for i, match in enumerate(matches):
        if mask[i] == 0:
            continue
        correspond_struct_idx[0][int(match.queryIdx)] = idx
        correspond_struct_idx[1][int(match.trainIdx)] = idx
        idx += 1
    # 至此，对前两帧图片实现了三维恢复，将保留的匹配点的索引写在correspond_struct_idx的前两行中，其余为-1,保留的匹配点是从0-n（保留的个数）
    return structure, correspond_struct_idx, colors, rotations, motions

制作图像点以及空间点

def get_objpoints_and_imgpoints(matches, struct_indices, structure, key_points):
    # 在上一帧保留点的基础上，筛选下一帧匹配好的点
    object_points = []
    image_points = []
    for match in matches:
        query_idx = match.queryIdx
        train_idx = match.trainIdx
        struct_idx = struct_indices[query_idx]  
        if struct_idx < 0: 
            continue
        object_points.append(structure[int(struct_idx)])  
        image_points.append(key_points[train_idx].pt)
    
    return np.array(object_points), np.array(image_points)

三维重建

def reconstruct(K, R1, T1, R2, T2, p1, p2):
    # proj1/2是3×4的变换矩阵
    proj1 = np.zeros((3, 4))
    proj2 = np.zeros((3, 4))
    proj1[0:3, 0:3] = np.float32(R1)
    proj1[:, 3] = np.float32(T1.T)
    proj2[0:3, 0:3] = np.float32(R2)
    proj2[:, 3] = np.float32(T2.T)
    fk = np.float32(K)
    proj1 = np.dot(fk, proj1)
    proj2 = np.dot(fk, proj2)
    s = cv2.triangulatePoints(proj1, proj2, p1.T, p2.T) # s是空间点的齐次坐标
    structure = []  # 3维点
    
    for i in range(len(s[0])):
        col = s[:, i]
        col /= col[3]
        structure.append([col[0], col[1], col[2]])
    
    return np.array(structure)

将已作出的点云进行融合

def fusion_structure(matches, struct_indices, next_struct_indices, structure, next_structure, colors, next_colors):
    # 将上一帧恢复的三维点重新恢复
    for i,match in enumerate(matches):  
        query_idx = match.queryIdx
        train_idx = match.trainIdx
        struct_idx = struct_indices[query_idx]  
        if struct_idx >= 0:
            next_struct_indices[train_idx] = struct_idx
            continue
        structure = np.append(structure, [next_structure[i]], axis = 0)
        colors = np.append(colors, [next_colors[i]], axis = 0)
        struct_indices[query_idx] = next_struct_indices[train_idx] = len(structure) - 1
    return struct_indices, next_struct_indices, structure, colors

以上就是实现sfm的主要代码，完整的代码见sfm.py，config文件。以上代码需要注意的是，structure是通过不断的三角化恢复新的三维点，而三角化需要的两帧的变换矩阵，这个变换矩阵是相对于同一个世界坐标系的（也就是第一帧图像的相机坐标系）。因为求解PnP时的object_points虽然为上一帧的点，但是它的坐标系是初始帧的坐标系（object_points来源于不断累加的structure）得到的motions和rotations是每一帧相对于固定的初始帧坐标系的平移和旋转。