[Computer Vision]实验五：SFM

一、实验内容

二、实验过程

[2.1 使用Matplotlib绘制三维数据、绘制外极点和极线、三角剖分](#2.1 使用Matplotlib绘制三维数据、绘制外极点和极线、三角剖分)

[2.2 多视图三维重建](#2.2 多视图三维重建)

[2.3 对拍摄物（鼠标）的多视图三维重建](#2.3 对拍摄物（鼠标）的多视图三维重建)

三、实验小结

一、实验内容

了解对极几何、基础矩阵、本质矩阵等相关知识点，对某个物体从不同角度拍摄多张照片，利用SFM构建出三维模型，并可视化。
输出基础矩阵、本质矩阵、投影矩阵（位姿）、部分三维空间点的坐标。

二、实验过程

2.1 使用Matplotlib绘制三维数据、绘制外极点和极线、三角剖分

（1）sfm.py文件:这段代码实现了一系列计算机视觉中常用的算法，主要用于处理立体视觉和三维重建中的基本矩阵（Fundamental Matrix）、本质矩阵（Essential Matrix）、三角测量（Triangulation）以及随机抽样一致性（RANSAC）算法。

python 复制代码

import numpy as np
from matplotlib.pyplot import cla, plot
from numpy import diag, dot, linspace, mean, sqrt, std, vstack, zeros
from scipy import linalg
from torch import det, svd
import torch
#计算基本矩阵（Fundamental Matrix）
def compute_fundamental(x1,x2):
    n=x1.shape[1]
    if x2.shape[1]!=n:
        raise ValueError("Number of points don't match.")
    A=zeros((n,9))
    for i in range(n):
        A[i]=[x1[0,i]*x2[0,i],x1[0,i]*x2[1,i],x1[0,i]*x2[2,i],
              x1[1,i]*x2[0,i],x1[1,i]*x2[1,i],x1[1,i]*x2[2,i],
              x1[2,i]*x2[0,i],x1[2,i]*x2[1,i],x1[2,i]*x2[2,i]]
        U,S,V=linalg.svd(A)
        F=V[-1].reshape(3,3)
        U,S,V=linalg.svd(F)
        S[2]=0
        F=dot(U,dot(diag(S),V))
        return F
    
#计算极点
def compute_episole(F):
    U,S,V=linalg.svd(F)
    e=V[-1]
    return e/e[2]

#绘制极线
def plot_epipolar_line(im,F,x,epipole=None,show_epipole=True):
    m,n=im.shape[:2]
    line=dot(F,x)
    t=linspace(0,n,100)
    lt=np.array([(line[2]+line[0]*tt)/(-line[1])for tt in t])
    ndx=(lt>=0)&(lt<m)
    plot(t[ndx],lt[ndx],linewidth=2)

    if show_epipole:
        if epipole is None:
            epipole=compute_episole(F)
        plot(epipole[0]/epipole[2],epipole[1]/epipole[2],'r*')

#三角测量
def triangle_point(x1,x2,P1,P2):
    M=zeros((6,6))
    M[:3,:4]=P1
    M[3:,:4]=P2
    M[:3,4]=-x1
    M[3:,5]=-x2

    U,S,V=linalg.svd(M)
    X=V[-1,:4]
    return X/X[3]

def triangulate(x1,x2,P1,P2):
    n=x1.shape[1]
    if x2.shape[1]!=n:
        raise ValueError("Number of points don't match.")
    X=[triangle_point(x1[:,i],x2[:,i],P1,P2)for i in range(n)]
    return np.array(X).T

#从点对应关系计算投影矩阵
def compute_P(x,X):
    n=x.shape[1]
    if X.shape[1]!=n:
        raise ValueError("Number of points don't match.")
    M=zeros((3*n,12+n))
    for i in range(n):
        M[3*i,0:4]=X[:,i]
        M[3*i+1,4:8]=X[:,i]
        M[3*i+2,8:12]=X[:,i]
        M[3*i:3*i+3,i+12]=-x[:,i]
    U,S,V=linalg.svd(M)
    return V[-1,:12].reshape((3,4))

#从基本矩阵计算投影矩阵
def compute_P_from_fundamental(F):
    e=compute_episole(F.T)
    Te=skew(e)
    return vstack((dot(Te,F.T).T,e)).T

def skew(a):
    return np.array([[0,-a[2],a[1]],[a[2],0,-a[0]],[-a[1],a[0],0]])

#从本质矩阵计算投影矩阵
def compute_P_from_essential(E):
    U,S,V=linalg.svd(E)
    if det(torch.tensor(dot(U, V)))<0:
        V=-V
    E=dot(U,dot(diag([1,1,0]),V))

    Z=skew([0,0,-1])
    W=np.array([[0,-1,0],[1,0,0],[0,0,1]])

    P2=[vstack((dot(U,dot(W,V)).T,U[:,2])).T,
        vstack((dot(U,dot(W,V)).T,-U[:,2])).T,
        vstack((dot(U,dot(W.T,V)).T,U[:,2])).T,
        vstack((dot(U,dot(W.T,V)).T,-U[:,2])).T]
    return P2

#归一化八点法计算基本矩阵
def compute_fundamental_normalized(x1,x2):
        n=x1.shape[1]
        if x2.shape[1]!=n:
            raise ValueError("Number of points don't match.")
        
        x1=x1/x1[2]
        mean_1=mean(x1[:2],axis=1)
        S1=sqrt(2)/std(x1[:2])
        T1=np.array([[S1,0,-S1*mean_1[0]],[0,S1,-S1*mean_1[1]],[0,0,1]])
        x1=dot(T1,x1)
        
        x2=x2/x2[2]
        mean_2=mean(x2[:2],axis=1)
        S2=sqrt(2)/std(x2[:2])
        T2=np.array([[S2,0,-S2*mean_2[0]],[0,S2,-S2*mean_2[1]],[0,0,1]])
        x2=dot(T2,x2)
        F=compute_fundamental(x1,x2)
        F=dot(T1.T,dot(F,T2))
        return F/F[2,2]

#RANSAC模型 用于拟合基本矩阵
class RansacModel(object):
    def __init__(self,debug=False):
        self.debug=debug

    def fit(self,data):
        data=data.T
        x1=data[:3,:8]
        x2=data[3:,:8]
        F=compute_fundamental_normalized(x1,x2)
        return F
    
    def get_error(self,data,F):
        data=data.T
        x1=data[:3]
        x2=data[3:]
        Fx1=dot(F,x1)
        Fx2=dot(F,x2)
        denom=Fx1[0]**2+Fx1[1]**2+Fx2[0]**2+Fx2[1]**2
        err=(diag(dot(x1.T,dot(F,x2))))**2/denom
        return err
    
def F_from_ransac(x1,x2,model,maxiter=5000,match_theshold=1e-6):
        import ransac
        data=vstack((x1,x2))
        F,ransac_data=ransac.ransac(data.T,model,8,maxiter,match_theshold,10,
                                    return_all=True)
        return F,ransac_data['inliers']

（2）load_vggdata.py文件

python 复制代码

from tkinter import NO
from PIL import Image
from matplotlib import figure, pyplot
from matplotlib.pyplot import imshow,plot,axis, show
from numpy import genfromtxt, loadtxt, ones,  vstack, where
import numpy as np
import camera
from mpl_toolkits.mplot3d import axes3d
import sfm

image1 = Image.open("D:/Computer vision/3d/images/001.jpg")

im1 = np.array(image1)
image2 = Image.open("D:/Computer vision/3d/images/002.jpg")

im2 = np.array(image2)
points2D=[loadtxt('D:/Computer vision/3d/2D/00'+str(i+1)+'.corners').T for i in range(3)]
points3D=loadtxt('D:/Computer vision/3d/3D/p3d').T
corr = genfromtxt('D:/Computer vision/3d/2D/nview-corners', dtype='int', invalid_raise=False, usemask=True)
P = [camera.Camera(loadtxt(f'D:/Computer vision/3d/2D/00{i+1}.P')) for i in range(3)]

def process1():
    X=vstack((points3D,ones(points3D.shape[1])))
    x=P[0].project(X)
    pyplot.figure()
    imshow(im1)
    plot(points2D[0][0],points2D[0][1],'*')
    axis('off')

    pyplot.figure()
    imshow(im1)
    plot(x[0],x[1],'r.')
    axis('off')
    show()

def process2():
    fig=pyplot.figure()
    ax=fig.gca(projection="3d")
    ax.plot(points3D[0],points3D[1],points3D[2],'k.')
    show()

def process3():
    ndx=(corr[:,0]>=0)&(corr[:,1]>=0)
    x1=points2D[0][:,corr[ndx,0]]
    x1=vstack((x1,ones(x1.shape[1])))
    x2=points2D[1][:,corr[ndx,1]]
    x2=vstack((x2,ones(x2.shape[1])))

    F=sfm.compute_fundamental(x1,x2)

    e=sfm.compute_episole(F)

    pyplot.figure()
    imshow(im1)
    for i in range(5):
        sfm.plot_epipolar_line(im1,F,x2[:,i],e,False)
    axis('off')
    pyplot.figure()
    imshow(im2)

    for i in range(5):
        plot(x2[0,i],x2[1,i],'o')
    axis('off')
    show()

def process4():

    ndx=(corr[:,0]>=0)&(corr[:,1]>=0)

    x1=points2D[0][:,corr[ndx,0]]
    x1=vstack((x1,ones(x1.shape[1])))
    x2=points2D[1][:,corr[ndx,1]]
    x2=vstack((x2,ones(x2.shape[1])))
    Xtrue=points3D[:,ndx]
    Xtrue=vstack((Xtrue,ones(Xtrue.shape[1])))

    Xest=sfm.triangulate(x1,x2,P[0].P,P[1].P)
    print(Xest[:,:3])
    print(Xtrue[:,:3])

    fig1=pyplot.figure()
    ax1=fig1.gca(projection='3d')
    ax1.plot(Xest[0],Xest[1],Xest[2],'ko')
    ax1.plot(Xtrue[0],Xtrue[1],Xtrue[2],'r.')
    show()

def process5():
    corr1=corr[:,0]
    ndx3D=where(corr1>=0)[0]
    ndx2D=corr1[ndx3D]

    x=points2D[0][:,ndx2D]
    x=vstack((x,ones(x.shape[1])))
    X=points3D[:,ndx3D]
    X=vstack((X,ones(X.shape[1])))

    Pest=camera.Camera(sfm.compute_P(x,X))

    print(Pest.P/Pest.P[2,3])
    print(P[0].P/P[0].P[2,3])
    xest=Pest.project(X)

    pyplot.figure()
    imshow(im1)
    plot(x[0],x[1],'bo')
    plot(xest[0],xest[1],'r.')
    axis('off')
    show()

if __name__ == '__main__':
    process1()
    process2()
    process3()
    process4()
    process5()

(3)结果展示

a.如图1所示，显示视图与图像点、投影的三维点
图1

b. 如图2所示，使用Matplottib工具绘制数据集中的三维点
图2

c. 如图3所示，绘制图像的外极线和外极点
图3

d.对图像点通过三角剖分恢复这些点的三维位置，首先利用前两个视图的信息对图像点进行三角剖分，输出前三个图像点的齐次坐标，最后绘制出最接近的三维图像点，输出三维点的信息如图4所示，这些三维点的可视化如图5所示。
图4
图5

e. 由三维点计算照相机矩阵，如图6所示，以归一化的格式打印估计出的照相机矩阵，使用估计出的照相机矩阵投影这些三维点，绘制投影后的结果如图7所示。
图6
图7

2.2 多视图三维重建

（1）module_test.py文件:这段代码实现了一个完整的三维重建流程，包括特征提取、匹配、本质矩阵计算、投影矩阵计算、三角测量以及三维点的可视化。

python 复制代码

from PIL import Image
import cv2
from pylab import *
import numpy as np
import camera
import homography
import sfm
import os
#环境设置与相机内参
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

K=array([[ 2142.86273,  146.047950,  1409.66408],
         [-313.655714,  2372.61671,  646.528794],
         [-0.206318753, -0.0289928473,  0.978055206]])
K1=array([[ 1887.51493,  152.070907,  1736.23309],
         [-438.905450,  2363.03101,  607.565547],
         [-0.360084369, -0.0406467998,  0.932033843]])
K2=array([[ 2260.20652  ,42.1635948 , 1220.66876 ],
          [-120.310166 , 2397.29053 , 619.849171 ],
         [-0.123407237 ,-0.000706258860 , 0.992355861  ]])

#图像加载与特征提取
im1 = array(Image.open('D:\\Computer vision\\3d\\Alcatraz_courtyard\\San_Francisco_2313.jpg'))
im2 = array(Image.open('D:\\Computer vision\\3d\\Alcatraz_courtyard\\San_Francisco_2314.jpg'))

im3 = array(Image.open('D:\\Computer vision\\3d\\Alcatraz_courtyard\\San_Francisco_2364.jpg'))

orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(cv2.cvtColor(im1, cv2.COLOR_RGB2GRAY), None)
kp2, des2 = orb.detectAndCompute(cv2.cvtColor(im2, cv2.COLOR_RGB2GRAY), None)
kp3, des3 = orb.detectAndCompute(cv2.cvtColor(im3, cv2.COLOR_RGB2GRAY), None)

l1 = np.array([kp.pt for kp in kp1]).T
l2 = np.array([kp.pt for kp in kp2]).T
l3 = np.array([kp.pt for kp in kp3]).T

print("l1 shape:",l1.shape)
print("l2 shape:",l2.shape)
print("l3 shape:", l3.shape)

#特征匹配
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)#暴力匹配（BFMatcher）对特征描述子进行匹配
matches = bf.match(des1, des2)
matches = sorted(matches, key=lambda x: x.distance)
matches3 = bf.match(des1, des3)
matches3 = sorted(matches3, key=lambda x: x.distance)
ndx = np.array([m.queryIdx for m in matches])
ndx2 = np.array([m.trainIdx for m in matches])
ndx3 = np.array([m.queryIdx for m in matches3])
ndx3_train = np.array([m.trainIdx for m in matches3])
a=l1[:, ndx]
b=l2[:, ndx2]
a3 = l1[:, ndx3]
b3 = l3[:, ndx3_train]

print("a:",a.shape)
print("b:",b.shape)
print("a3:", a3.shape)
print("b3:", b3.shape)

#使用 homography.make_homog 将像素坐标转换为齐次坐标。
x1 = homography.make_homog(l1[:, ndx])
x2 = homography.make_homog(l2[:, ndx2])

print("Shape of x1 before normalization:", x1.shape)
print("Shape of x2 before normalization:", x2.shape)
inv_K = np.linalg.inv(K)
inv_K1 = np.linalg.inv(K1)
inv_K2 = np.linalg.inv(K2)
#计算相机内参矩阵的逆矩阵，用于将像素坐标转换为归一化图像平面坐标。
x1_new = homography.make_homog(l1[:, ndx3])
x3_new = homography.make_homog(l3[:, ndx3_train])
print("Shape of x1_new before normalization:", x1_new.shape)
print("Shape of x3_new before normalization:", x3_new.shape)
x1n = dot(inv_K, x1)
x2n = dot(inv_K1, x2)
x1n_new = dot(inv_K, x1_new)
x3n_new = dot(inv_K2, x3_new)

model = sfm.RansacModel()
E, inliers = sfm.F_from_ransac(x1n, x2n, model)
E_new, inliers_new = sfm.F_from_ransac(x1n_new, x3n_new, model)
print("本质矩阵E：",E)
P1 = array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]])
P2 = sfm.compute_P_from_essential(E)
P3 = sfm.compute_P_from_essential(E_new)

ind = 0
maxres = 0
for i in range(4):

#使用三角测量方法从匹配的点对中恢复三维点坐标。
    X = sfm.triangulate(x1n[:, inliers], x2n[:, inliers], P1, P2[i])
    d1 = dot(P1, X)[2]
    d2 = dot(P2[i], X)[2]
    X_new = sfm.triangulate(x1n_new[:, inliers_new], x3n_new[:, inliers_new], P1, P3[i])
    d1_new = dot(P1, X_new)[2]
    d3_new = dot(P3[i], X_new)[2]

    if sum(d1 > 0) + sum(d2 > 0) > maxres:
        maxres = sum(d1 > 0) + sum(d2 > 0)
        ind = i
        infront = (d1 > 0) & (d2 > 0)
        maxres_new = sum(d1_new > 0) + sum(d3_new > 0)
        ind_new = i
        infront_new = (d1_new > 0) & (d3_new > 0)

    X = sfm.triangulate(x1n[:, inliers], x2n[:, inliers], P1, P2[ind])
    X = X[:, infront]
    X_new = sfm.triangulate(x1n_new[:, inliers_new], x3n_new[:, inliers_new], P1, P3[ind_new])
    X_new = X_new[:, infront_new]

fig = figure()
ax = fig.gca(projection='3d')
ax.plot(-X[0], X[1], X[2], 'k.')
ax.plot(-X_new[0], X_new[1], X_new[2], 'r.')
axis('off')

cam1 = camera.Camera(P1)
cam2 = camera.Camera(P2[ind])
cam3 = camera.Camera(P3[ind_new])
x1p = cam1.project(X)
x2p = cam2.project(X)
x3p = cam3.project(X_new)

print("相机投影矩阵 P1:\n", P1)
print("相机投影矩阵 P2:\n", P2[ind])
print("相机投影矩阵 P3:\n", P3[ind_new])
print("部分三维点坐标 X:\n", X[:4])
print("部分三维点坐标 X_new:\n", X_new[:4])

x1p = dot(K, x1p)
x2p = dot(K1, x2p)
x3p = dot(K2, x3p)
figure()
imshow(im1)
gray()
plot(x1p[0], x1p[1], 'o')
plot(l1[0], l1[1], 'r.')
axis('off')
figure()
imshow(im2)
gray()
plot(x2p[0], x2p[1], 'o')
plot(l2[0], l2[1], 'r.')
axis('off')
figure()
imshow(im3)
gray()
plot(x3p[0], x3p[1], 'o')
plot(l3[0], l3[1], 'r.')
axis('off')
show()

（2）实验结果

a.如图8所示，打印本质矩阵E
图8

b.如图9所示，打印相机投影矩阵，实验中选取了三张不同视图的图片
图9

c.如图10所示，打印部分三维坐标
图10

d.实现可视化，如图11、12所示，在原图像上绘制了投影的二维点，使用 Matplotlib 绘制三维点云，但是三维重建的效果较差。
图11
图12

2. 3 对拍摄物（鼠标）的多视图三维重建

（1）SFM.py文件

python 复制代码

import cv2
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

images = ["D:\\Computer vision\\1118\\picture1.jpg", 
          "D:\\Computer vision\\1118\\picture2.jpg",
          "D:\\Computer vision\\1118\\picture6.jpg", 
          "D:\\Computer vision\\1118\\picture5.jpg",
          "D:\\Computer vision\\1118\\picture10.jpg",
          "D:\\Computer vision\\1118\\picture8.jpg",
          "D:\\Computer vision\\1118\\picture7.jpg",
          "D:\\Computer vision\\1118\\picture9.jpg"]
imgs = [cv2.imread(img) for img in images]
gray_imgs = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in imgs]

orb = cv2.ORB_create()

keypoints_and_descriptors = [orb.detectAndCompute(img, None) for img in gray_imgs]
keypoints = [kp_desc[0] for kp_desc in keypoints_and_descriptors]
descriptors = [kp_desc[1] for kp_desc in keypoints_and_descriptors]

bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(descriptors[0], descriptors[1])

good_matches = sorted(matches, key=lambda x: x.distance)[:50]

pts1 = np.float32([keypoints[0][m.queryIdx].pt for m in good_matches])
pts2 = np.float32([keypoints[1][m.trainIdx].pt for m in good_matches])

img_matches = cv2.drawMatches(imgs[0], keypoints[0], imgs[1], keypoints[1], good_matches, None)
cv2.imshow('Matches', img_matches)
cv2.waitKey(0)
cv2.destroyAllWindows()

K = np.array([[2394, 0, 932], [0, 2398, 628], [0, 0, 1]])

pts1_normalized = cv2.undistortPoints(np.expand_dims(pts1, axis=1), K, None)
pts2_normalized = cv2.undistortPoints(np.expand_dims(pts2, axis=1), K, None)

F, mask = cv2.findFundamentalMat(pts1_normalized, pts2_normalized, cv2.FM_8POINT)
print("基础矩阵F：",F)

E = K.T @ F @ K
print("本质矩阵E：",E)

_, R, t, mask = cv2.recoverPose(E, pts1_normalized, pts2_normalized)

points4D = cv2.triangulatePoints(np.hstack((np.eye(3), np.zeros((3, 1)))), np.hstack((R, t)), pts1_normalized.T, pts2_normalized.T).reshape(-1, 4)
points3D = points4D[:, :3] / points4D[:, 3:]

print("3D Points:\n", points3D)

valid_points = points3D[~np.any(np.isinf(points3D), axis=1) & ~np.any(np.isnan(points3D), axis=1)]

print("有效点数:", valid_points.shape[0])
print("最小值:", valid_points.min(axis=0))
print("最大值:", valid_points.max(axis=0))

ax_lim = (valid_points.max(axis=0) - valid_points.min(axis=0)) * 0.1
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(valid_points[:, 0], valid_points[:, 1], valid_points[:, 2])
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_xlim(valid_points[:, 0].min() - ax_lim[0], valid_points[:, 0].max() + ax_lim[0])
ax.set_ylim(valid_points[:, 1].min() - ax_lim[1], valid_points[:, 1].max() + ax_lim[1])
ax.set_zlim(valid_points[:, 2].min() - ax_lim[2], valid_points[:, 2].max() + ax_lim[2])
plt.show()

(2)实验结果

a.如图13，打印基础矩阵
图13

b.如图14，打印本质矩阵
图14

c.如图15，输出三维点坐标
图15

d.如图16、17，实现三维坐标的可视化
图16
图17

三、实验小结

本次实验通过SFM实现三维重建，但是从实验结果中可以看到，三维重建后的可视化效果较差，可能涉及到以下原因：1.输入图像样本数太少，导致模型的效果较差；2.标定矩阵K值设定不准确导致模型训练效果较差；3.在对鼠标进行三维重建时，有效点有47个，但是将三维点可视化时却看到三维点远少于47个，可能是因为数据的数值范围差异较大导致。

[Computer Vision]实验五：SFM

一、实验 内容

二、实验过程

2.1 使用Matplotlib绘制三维数据、绘制外极点和极线、三角剖分

2.2 多视图 三维重建

2. 3 对拍摄物（鼠标）的多视图 三维重建

三、实验小结

一、实验内容

2.2 多视图三维重建

2. 3 对拍摄物（鼠标）的多视图三维重建