可视化软件:
view3dscene is a viewer for many 3D model formats: glTF, X3D, VRML, Collada, 3DS, MD3, Wavefront OBJ, STL and (2D) Spine JSON.
Explore the virtual world with collisions, gravity, animations, sensors, shadows, mirrors, shaders and more. You can also convert all models to X3D or VRML.
https://castle-engine.io/view3dscene.php
该方法需要手动画出图像中的平行线
具体代码中的数学原理可参考PDF:
https://github.com/bliuag/Single-View-Metrology/blob/master/SingleViewMetrologyFinalReport.pdf
https://www.robots.ox.ac.uk/~vgg/projects/SingleView/
视频:
https://www.robots.ox.ac.uk/~vgg/projects/SingleView/models/merton/movies/merton.mpg
https://www.robots.ox.ac.uk/~vgg/projects/SingleView/models/hut/movies/hutme_camera.mpg
网上实现一般都是matlab
比如:
博客讲解:
http://sklin93.github.io/svm.html
具体代码:
https://github.com/sklin93/Single-View-Metrology
还有另外一个人的可视化暂时,但是没公布GUI代码:
https://www.cs.cmu.edu/~ph/869/results/sim/asst3/index.html
其他matlab代码:
https://github.com/wenshuoguo/single-view-metrology
https://github.com/bliuag/Single-View-Metrology *
https://github.com/wenshuoguo/single-view-metrology
https://github.com/haoqianzhang/Single-view-metrology
https://github.com/jyt0532/Single_View_Metrology
一些给予该方法的新工作: (SFMEDU2的作者)
https://vision.princeton.edu/projects/2012/SUNprimitive/
c++实现
代码:
https://github.com/ZhengRui/Single-View-Metrology
博客理论:
http://zhengrui.github.io/singleviewmetrology.html
博客步骤的:
To get the patches ("Edit"-"Polygon")
需要按住ctrl,然后鼠标点击之前选择好的兴趣点(逆时针或顺时针)
取消当前选中的兴趣点,通过alt+D实现
这一步保存后,会将图片和该图片对应的wrl保存,wrl里面存放了形成该图像所选择的兴趣点数目(在该图像上的2D坐标和对应我们建立三维坐标系的3D坐标)
1(0,0,0)
2(1,0,0)
3(1,1,0)
4(0,1,0)
1(0,0,-1)
2(0,0,0)
理论参考:
http://www.cs.cmu.edu/~ph/869/src/asst3/asst3.html
http://www.cs.cornell.edu/courses/cs4670/2012fa/projects/p4/
https://comp5421.github.io/Project_3_Single_View_Modeling/
python版本:
jupyter 步骤实现求H:
https://codeload.github.com/hkchengrex/Single-View-Metrology-Step-By-Step/zip/master
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
im = np.array(Image.open('sony.jpg'))
plt.imshow(im)
# Obtain some parallel lines to the axes manually
# Obtained from sony.jpg
x_para = [
[(393, 539), (607, 425)], # Two points -> One line (X, Y) order
[(396, 398), (618, 295)],
[(168, 230), (383, 142)],
]
y_para = [
[(385, 396), (168, 235)],
[(388, 538), (173, 365)],
[(609, 283), (387, 144)],
]
z_para = [
[(390, 538), (390, 400)],
[(606, 428), (617, 291)],
[(174, 366), (168, 235)],
]
# Visualize the lines XYZ-RGB
plt.imshow(im)
extend = 1000
for l in x_para:
slope = (l[1][1] - l[0][1]) / (l[1][0] - l[0][0] + 1e-6)
plt.plot((l[0][0]-extend, l[0][0]+extend), (l[0][1]-extend*slope, l[0][1]+extend*slope), 'r-')
for l in y_para:
slope = (l[1][1] - l[0][1]) / (l[1][0] - l[0][0] + 1e-6)
plt.plot((l[0][0]-extend, l[0][0]+extend), (l[0][1]-extend*slope, l[0][1]+extend*slope), 'g-')
for l in z_para:
slope = (l[1][1] - l[0][1]) / (l[1][0] - l[0][0] + 1)
ex = min(extend, np.abs(1000/slope)) # handle nasty division by zero
plt.plot((l[0][0]-ex, l[0][0]+ex), (l[0][1]-ex*slope, l[0][1]+ex*slope), 'b-')
plt.show()
#Use your brain power to convince yourself where the vanishing points should be.
# Compute the vanishing points using Bob Collin's method
def get_vanishing_points(lines):
moment = np.zeros((3, 3))
for l in lines:
e1 = np.array([*l[0], 1]).astype(np.float32)
e2 = np.array([*l[1], 1]).astype(np.float32)
inf_line = np.cross(e1, e2)[:, None]
moment += inf_line @ inf_line.T
# Find best fit
w, v = np.linalg.eig(moment)
i = np.argmin(w)
return v[:, i] / v[2, i]
vx = get_vanishing_points(x_para)
vy = get_vanishing_points(y_para)
vz = get_vanishing_points(z_para)
# Visualize the lines with the vanishing point XYZ-RGB
plt.imshow(im)
for l in x_para:
plt.plot((vx[0], l[1][0]), (vx[1], l[1][1]), 'r-')
for l in y_para:
plt.plot((vy[0], l[1][0]), (vy[1], l[1][1]), 'g-')
for l in z_para:
plt.plot((vz[0], l[1][0]), (vz[1], l[1][1]), 'b-')
plt.show()
#These vanishing points should match your intuition.
# Compute the vanishing lines
lxy = np.cross(vx, vy)
lxz = np.cross(vx, vz)
lyz = np.cross(vy, vz)
# Define the image of the origin, i.e. the bottom center of the box
origin = np.array([393, 539, 1])
## Projection matrix
# Define reference scale. These scales can come from real measurement or human fantasy
x_ref_length = 200
y_ref_length = 400
z_ref_length = 100
# The corresponding image positions of the reference points.
ref_x = [607, 425, 1]
ref_y = [173, 365, 1]
ref_z = [396, 398, 1]
# Compute the scales in the projection mtrix from reference scale
ax = ((ref_x-origin)[0:2] / (vx-ref_x)[0:2] / x_ref_length).mean()
ay = ((ref_y-origin)[0:2] / (vy-ref_y)[0:2] / y_ref_length).mean()
az = ((ref_z-origin)[1:2] / (vz-ref_z)[1:2] / z_ref_length).mean() # Not so stable in x -- hard to label
# Obtain the projection matrix, see slides
proj = np.stack([vx*ax, vy*ay, vz*az, origin], 1)
# Helper functions to obtain 3D coordinates, following notations in Desmond & Stanley's writeup
def get_coord(o, I, b, t, v, a):
b = np.array([*b, 1])
t = np.array([*t, 1])
return -(o.dot(I)*np.linalg.norm(np.cross(b, t))) / (b.dot(I)*np.linalg.norm(np.cross(v, t))) / a
# Used for the co-z method. Measure the height from the ground plane
# b - Image position of a point on the ground plane.
# This point should have the same 3D (x,y) as the point that you are going to measure
# t - Image position of the point that you are going to measure
def get_z_co_z(b, t):
return get_coord(origin, lxy, b, t, vz, az)
# Used the co-plane method. Measure x/y given a height.
# proj - the projection matrix
# z - the height of the current reference plane
def get_h_co_plane(proj, z):
h = np.concatenate([proj[:, 0:2], proj[:, 3:4]+(az*vz*z)[:,None]], 1)
return np.linalg.inv(h)
## Now assume that we didn't know the 3D coordinates of the Sony box and we want to compute that
# Forward homography on the reference plane, followed by filling the plane height
def get_3d(h, x, z):
w = (h @ [*x, 1])
w[0:2] = w[0:2] / w[2]
w[2] = z
return w
# First we use the co-plane method to obtain 3D coordinates on the ground plane from image coordinates
h = get_h_co_plane(proj, 0)
box_bottom_center = get_3d(h, (393,539), 0)
box_bottom_left = get_3d(h, (173,365), 0)
box_bottom_right = get_3d(h, (607,425), 0)
print('Coordinates of points on the bottom plane')
print(box_bottom_center, box_bottom_left, box_bottom_right)
# Second we use the co-z method to obtain the height of the top side
z_pi = get_z_co_z((393,539), (390,394))
print('Height of box:', z_pi)
# With that height, obtain a new reference plane, and obtain the points' 3D coordinates from image coordinates
h = get_h_co_plane(proj, z_pi)
box_top_center = get_3d(h, (390,394), z_pi)
box_top_left = get_3d(h, (162,226), z_pi)
box_top_right = get_3d(h, (617,291), z_pi)
print('Coordinates of points on the top plane')
print(box_top_center, box_top_left, box_top_right)
#That isn't very interesting since we already kinda know a lot of the box Let's #obtain some 3D information about that alarm clock that is sitting on the same #ground plane as the box. The principal is the same -- use co-plane and co-z.
# For the alarm clock
h = get_h_co_plane(proj, 0)
clock_bottom_center = get_3d(h, (627,217), 0)
clock_bottom_left = get_3d(h, (515,188), 0)
clock_bottom_right = get_3d(h, (652,189), 0)
print('Coordinates of points on the bottom side')
print(clock_bottom_center, clock_bottom_left, clock_bottom_right)
# Now we compute the coordinates for the top side
z_pi = get_z_co_z((627,217), (635,124))
print('Height of alarm clock:', z_pi)
# Get the homography for the top plane
h = get_h_co_plane(proj, z_pi)
clock_top_center = get_3d(h, (635,124), z_pi)
clock_top_left = get_3d(h, (534,95), z_pi)
clock_top_right = get_3d(h, (658,106), z_pi)
print('Coordinates of points on the top side')
print(clock_top_center, clock_top_left, clock_top_right)
#Now we can compute 3D coordaintes all we want. Next up we can perform homography #to obtain textures. Textures can then be mapped onto the 3D model for better #visualization.
## Texture mapping using homographies
# Should be easy. Demo here for reference.
import cv2
def padded_warp_and_crop(im, src_pts, dst_pts):
h, _ = cv2.findHomography(np.array(src_pts), np.array(dst_pts))
max_x = max_y = 0
min_x = im.shape[1]
min_y = im.shape[0]
for c in src_pts:
p = h @ np.array([*c, 1])
p = p/p[2]
min_x = min(min_x, p[0])
min_y = min(min_y, p[1])
max_x = max(max_x, p[0])
max_y = max(max_y, p[1])
translation = np.array([[1,0,min_x],[0,1,min_y],[0,0,1]])
ht = translation @ h
warpped = cv2.warpPerspective(im, ht, (int(max_x-min_x), int(max_y-min_y)))
return warpped
text_x = padded_warp_and_crop(im, [(393,539),(390,394),(607,425),(617,291)],
[box_bottom_center[[0,2]],
box_top_center[[0,2]],
box_bottom_right[[0,2]],
box_top_right[[0,2]]])
plt.imshow(text_x)
plt.show()
text_y = padded_warp_and_crop(im, [(393,539),(390,394),(173,365),(162,226)],
[box_bottom_center[[1,2]],
box_top_center[[1,2]],
box_bottom_left[[1,2]],
box_top_left[[1,2]]])
plt.imshow(text_y)
plt.show()
# We need one more point to do homography. Compute the coordinate for the far point on the top side.
top_far_im = (377, 137)
z_pi = get_z_co_z((393,539), (390,394))
h = get_h_co_plane(proj, z_pi)
box_top_far = get_3d(h, top_far_im, z_pi)
text_z = padded_warp_and_crop(im, [(390,394),(162,226),(617,291),top_far_im],
[box_top_center[[0,1]],
box_top_left[[0,1]],
box_top_right[[0,1]],
box_top_far[[0,1]]])
plt.imshow(text_z)
plt.show()
通过labelme先预标注实现的:
https://codeload.github.com/kalyanghosh/3D-Reconstruction-using-Single-View-Metrology/zip/master
python实现完成流程的代码:
https://codeload.github.com/mcnuggets-lab/cv-project3/zip/master