SFM即运动恢复结构(Structure from Motion),这里给出实现的python代码即解释,关于SFM流程的介绍详见我的这篇博客:
特征提取
这里采用opencv库的SIFT角点:
def extract_features(image_names):
sift = cv2.SIFT_create(0, 3, 0.04, 10) # 这些是默认值创建
key_points_for_all = []
descriptor_for_all = []
colors_for_all = []
for image_name in image_names:
image = cv2.imread(image_name)
if image is None:
continue
key_points, descriptor = sift.detectAndCompute(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), None)
if len(key_points) <= 10:
continue
key_points_for_all.append(key_points)
descriptor_for_all.append(descriptor)
colors = np.zeros((len(key_points), 3))
for i, key_point in enumerate(key_points):
p = key_point.pt # p是关键点的位置
colors[i] = image[int(p[1])][int(p[0])]
colors_for_all.append(colors)
return np.array(key_points_for_all), np.array(descriptor_for_all), np.array(colors_for_all),key_points_for_all
特征匹配
def match_features(query, train):
# Brute Force匹配是opencv二维特征点匹配常见的办法,BFMatcher总是尝试所有可能的匹配,从而使得它总能够找到最佳匹配
bf = cv2.BFMatcher(cv2.NORM_L2)
knn_matches = bf.knnMatch(query, train, k=2)
matches = []
#Apply Lowe's SIFT matching ratio test(MRT),值得一提的是,这里的匹配没有
#标准形式,可以根据需求进行改动。
# 令最近邻的距离为d1,再找到第二近的匹配对点之间距离为d2,如果两个距离d1和d2之比小于一个阈值如0.6,就可以判定为可接受的匹配对。
for m, n in knn_matches:
if m.distance < sfm_config.MRT * n.distance:
matches.append(m)
return np.array(matches),matches
可视化匹配结果:
def vis_match(image_names,key_points_for_all,matches_for_all):
for i in range(len(key_points_for_all) - 1):
image1 = cv2.imread(image_names[i])
image2 = cv2.imread(image_names[i+1])
# flags = 2表示只可视化要匹配的关键点,不是所有的都显示
img_match = cv2.drawMatches(image1,key_points_for_all[i],image2,key_points_for_all[i+1],matches_for_all[i][640:680],None,flags=2)
cv2.imshow('result.jpg', img_match)
cv2.waitKey(0)
求解旋转与平移
这里主要通过opencv的findEssentialMat和recoverPose函数实现。
def find_transform(K, p1, p2):
# p1,p2是匹配好的关键点
focal_length = 0.5 * (K[0, 0] + K[1, 1])
principle_point = (K[0, 2], K[1, 2])
# 参数mask输出N个元素的数组,其中每个元素对于异常值设置为0,对其他点设置为1。
E,mask = cv2.findEssentialMat(p1, p2, focal_length, principle_point, cv2.RANSAC, 0.999, 1.0)
cameraMatrix = np.array([[focal_length, 0, principle_point[0]], [0, focal_length, principle_point[1]], [0, 0, 1]])
# 该函数求解出来的 R,t已经是最合适的R,t;已经通过内部的代码去掉了另外三种错误的解
# 在输出掩码中,只有通过手性检查的内点
pass_count, R, T, mask = cv2.recoverPose(E, p1, p2, cameraMatrix, mask)
return R, T, mask
对前两帧图片实现三维恢复
def init_structure(K, key_points_for_all, colors_for_all, matches_for_all):
# 只有前两张图片是通过求本征矩阵来求解R,t
p1, p2 = get_matched_points(key_points_for_all[0], key_points_for_all[1], matches_for_all[0])
c1, c2 = get_matched_colors(colors_for_all[0], colors_for_all[1], matches_for_all[0])
if find_transform(K, p1, p2):
R,T,mask = find_transform(K, p1, p2)
else:
R,T,mask = np.array([]), np.array([]), np.array([])
p1 = maskout_points(p1, mask) # 去除不符合要求的点(即mask=0的点)
p2 = maskout_points(p2, mask)
colors = maskout_points(c1, mask)
#设置第一个相机的变换矩阵,即作为剩下摄像机矩阵变换的基准。
R0 = np.eye(3, 3)
T0 = np.zeros((3, 1))
structure = reconstruct(K, R0, T0, R, T, p1, p2) # 关键点在空间中的三维点
rotations = [R0, R]
motions = [T0, T]
correspond_struct_idx = []
for key_p in key_points_for_all:
correspond_struct_idx.append(np.ones(len(key_p)) *- 1)
correspond_struct_idx = np.array(correspond_struct_idx)
idx = 0
matches = matches_for_all[0] # 第一帧到第二帧的匹配
for i, match in enumerate(matches):
if mask[i] == 0:
continue
correspond_struct_idx[0][int(match.queryIdx)] = idx
correspond_struct_idx[1][int(match.trainIdx)] = idx
idx += 1
# 至此,对前两帧图片实现了三维恢复,将保留的匹配点的索引写在correspond_struct_idx的前两行中,其余为-1,保留的匹配点是从0-n(保留的个数)
return structure, correspond_struct_idx, colors, rotations, motions
制作图像点以及空间点
def get_objpoints_and_imgpoints(matches, struct_indices, structure, key_points):
# 在上一帧保留点的基础上,筛选下一帧匹配好的点
object_points = []
image_points = []
for match in matches:
query_idx = match.queryIdx
train_idx = match.trainIdx
struct_idx = struct_indices[query_idx]
if struct_idx < 0:
continue
object_points.append(structure[int(struct_idx)])
image_points.append(key_points[train_idx].pt)
return np.array(object_points), np.array(image_points)
三维重建
def reconstruct(K, R1, T1, R2, T2, p1, p2):
# proj1/2是3×4的变换矩阵
proj1 = np.zeros((3, 4))
proj2 = np.zeros((3, 4))
proj1[0:3, 0:3] = np.float32(R1)
proj1[:, 3] = np.float32(T1.T)
proj2[0:3, 0:3] = np.float32(R2)
proj2[:, 3] = np.float32(T2.T)
fk = np.float32(K)
proj1 = np.dot(fk, proj1)
proj2 = np.dot(fk, proj2)
s = cv2.triangulatePoints(proj1, proj2, p1.T, p2.T) # s是空间点的齐次坐标
structure = [] # 3维点
for i in range(len(s[0])):
col = s[:, i]
col /= col[3]
structure.append([col[0], col[1], col[2]])
return np.array(structure)
将已作出的点云进行融合
def fusion_structure(matches, struct_indices, next_struct_indices, structure, next_structure, colors, next_colors):
# 将上一帧恢复的三维点重新恢复
for i,match in enumerate(matches):
query_idx = match.queryIdx
train_idx = match.trainIdx
struct_idx = struct_indices[query_idx]
if struct_idx >= 0:
next_struct_indices[train_idx] = struct_idx
continue
structure = np.append(structure, [next_structure[i]], axis = 0)
colors = np.append(colors, [next_colors[i]], axis = 0)
struct_indices[query_idx] = next_struct_indices[train_idx] = len(structure) - 1
return struct_indices, next_struct_indices, structure, colors
以上就是实现sfm的主要代码,完整的代码见sfm.py,config文件。以上代码需要注意的是,structure是通过不断的三角化恢复新的三维点,而三角化需要的两帧的变换矩阵,这个变换矩阵是相对于同一个世界坐标系的(也就是第一帧图像的相机坐标系)。因为求解PnP时的object_points虽然为上一帧的点,但是它的坐标系是初始帧的坐标系(object_points来源于不断累加的structure)得到的motions和rotations是每一帧相对于固定的初始帧坐标系的平移和旋转
。