我正在尝试使用 OpenCV 在 Python 中实现一个基本的从运动结构到运动的管道,以便在给定两个图像中的对应点的情况下从两个图像生成点云。我设法从中获得 3D 点,但这些位置并没有真正意义。
import sys, cv2, numpy as np
from numpy import *
def valid_cameras(inliers1, inliers2, rot, trans):
# check if the point correspondences are in front of both images
rot_inv = rot
for first, second in zip(inliers1, inliers2):
first_z = np.dot(rot[0, :] - second[0]*rot[2, :], trans) / np.dot(rot[0, :] - second[0]*rot[2, :], second)
first_3d_point = np.array([first[0] * first_z, second[0] * first_z, first_z])
second_3d_point = np.dot(rot.T, first_3d_point) - np.dot(rot.T, trans)
if first_3d_point[2] < 0 or second_3d_point[2] < 0:
return False
return True
# points1 point2 are corresponding pixel in the two images that match computer with SIFT and RANSAC
def point_cloud(points1, points2):
F, inliers = cv2.findFundamentalMat(points2, points1, cv2.RANSAC)
mask = np.where( inliers.flatten() )
for x1, x2 in zip(np.int32(points1[mask]), np.int32(points2[mask])):
x1 = x1.tolist()
x2 = x2.tolist()
x1.append(1)
x2.append(1)
x1 = np.array(x1)
x2 = np.array(x2)
#print x1.T.dot(F.dot(x2)) should be approxiatemly 0.0
# FROM HERE
# Is this correct for iPhone 6?
focal = 4.89 # mm EFL
x, y = 2448, 3264
sx, sy = 24, 36
fx, fy = focal * x / sx, focal * y / sy
K = np.array([
[fx, 0.0, 0.0],
[0.0, fy, 0.0],
[0.0, 0.0, 1.0,],
])
K_inv = np.linalg.inv(K)
# Decompose into the essential matrix
E = K.T.dot(F).dot(K)
# Decompose essential matrix into R, t (See Hartley and Zisserman 9.13)
U, S, Vt = np.linalg.svd(E)
W = np.array([0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]).reshape(3, 3)
# print .flatten()
mask = mask[0].tolist()
inliers1, inliers2 = [], []
for i in range( len(mask) ):
if mask[i]:
# normalize and homogenize the image coordinates
inliers1.append(K_inv.dot([points1[i][0], points1[i][1], 1.0]))
inliers2.append(K_inv.dot([points2[i][0], points2[i][1], 1.0]))
# Determine the correct choice of second camera matrix
# only in one of the four configurations will all the points be in front of both cameras
# First choice: R = U * Wt * Vt, T = +u_3 (See Hartley Zisserman 9.19)
R = U.dot(W).dot(Vt)
T = U[:, 2]
if not valid_cameras(inliers1, inliers2, R, T):
# Second choice: R = U * W * Vt, T = -u_3
T = - U[:, 2]
if not valid_cameras(inliers1, inliers2, R, T):
# Third choice: R = U * Wt * Vt, T = u_3
R = U.dot(W.T).dot(Vt)
T = U[:, 2]
if not valid_cameras(inliers1, inliers2, R, T):
# Fourth choice: R = U * Wt * Vt, T = -u_3
T = - U[:, 2]
T = T.reshape(1, 3)
P1 = np.mat('1 0 0 0 ; 0 1 0 0 ; 0 0 1 0')
P2 = np.bmat([[R, T.T]])
points1 = np.hstack((points1, np.ones((points1.shape[0], 1))))
points2 = np.hstack((points2, np.ones((points2.shape[0], 1))))
X = cv2.triangulatePoints(P1[:3], P2[:3], points1.T[:2], points2.T[:2])
# Remember to divide out the 4th row. Make it homogeneous
X /= X[3]
# Recover the origin arrays from PX
x1 = dot(P1[:3],X)
x2 = dot(P2[:3],X)
# Again, put in homogeneous form before using them
x1 /= x1[2]
x2 /= x2[2]
print X # 3d points
我担心评论# FROM HERE 中的逻辑是否正确?特别是 iPhone 6 的相机矩阵是否正确,从 2d 对应计算 3d 点的逻辑是否正确?