Vins-Fusion之 SFM 滑窗内相机位姿及特征点3D估计(十三)

在上一篇中解决滑窗内参考帧和最新帧之间的旋转和平移问题,以参考帧作为基准帧,在本篇中求解滑窗内所有帧的位姿和3D点。

具体代码如下:

cpp 复制代码
//这是全局 SFM 重建的核心函数,采用增量式方法恢复所有帧的位姿和3D点
bool GlobalSFM::construct(int frame_num, Quaterniond* q, Vector3d* T, int l,
			  const Matrix3d relative_R, const Vector3d relative_T,
			  vector<SFMFeature> &sfm_f, map<int, Vector3d> &sfm_tracked_points)
{
	feature_num = sfm_f.size();
	q[l].w() = 1;
	q[l].x() = 0;
	q[l].y() = 0;
	q[l].z() = 0;
	T[l].setZero();//建立原点坐标系
	q[frame_num - 1] = q[l] * Quaterniond(relative_R);//最新帧camera→world 的旋转,Eigen 中从右向左
	T[frame_num - 1] = relative_T;

	Matrix3d c_Rotation[frame_num];//相机旋转
	Vector3d c_Translation[frame_num];//相机平移
	Quaterniond c_Quat[frame_num];
	double c_rotation[frame_num][4];//相机旋转四元数
	double c_translation[frame_num][3];//相机平移		
	Eigen::Matrix<double, 3, 4> Pose[frame_num];//相机位姿
	//q[l]表示第 l 帧 相机在世界坐标系下的姿态(camera→world 的旋转)
	c_Quat[l] = q[l].inverse();
	c_Rotation[l] = c_Quat[l].toRotationMatrix();
	c_Translation[l] = -1 * (c_Rotation[l] * T[l]);
	Pose[l].block<3, 3>(0, 0) = c_Rotation[l];
	Pose[l].block<3, 1>(0, 3) = c_Translation[l];

	c_Quat[frame_num - 1] = q[frame_num - 1].inverse();
	c_Rotation[frame_num - 1] = c_Quat[frame_num - 1].toRotationMatrix();
	c_Translation[frame_num - 1] = -1 * (c_Rotation[frame_num - 1] * T[frame_num - 1]);
	Pose[frame_num - 1].block<3, 3>(0, 0) = c_Rotation[frame_num - 1];//Pose:世界到相机坐标系
	Pose[frame_num - 1].block<3, 1>(0, 3) = c_Translation[frame_num - 1];

	for (int i = l; i < frame_num - 1 ; i++)// 从参考帧到最新帧
	{
		if (i > l)
		{
			Matrix3d R_initial = c_Rotation[i - 1];
			Vector3d P_initial = c_Translation[i - 1];
			if(!solveFrameByPnP(R_initial, P_initial, i, sfm_f))//正向求解
				return false;
			c_Rotation[i] = R_initial;
			c_Translation[i] = P_initial;
			c_Quat[i] = c_Rotation[i];
			Pose[i].block<3, 3>(0, 0) = c_Rotation[i];
			Pose[i].block<3, 1>(0, 3) = c_Translation[i];
		}
		triangulateTwoFrames(i, Pose[i], frame_num - 1, Pose[frame_num - 1], sfm_f);
	}
	for (int i = l + 1; i < frame_num - 1; i++)
		triangulateTwoFrames(l, Pose[l], i, Pose[i], sfm_f);
	for (int i = l - 1; i >= 0; i--)
	{
		//solve pnp
		Matrix3d R_initial = c_Rotation[i + 1];
		Vector3d P_initial = c_Translation[i + 1];
		if(!solveFrameByPnP(R_initial, P_initial, i, sfm_f))//反向求解
			return false;
		c_Rotation[i] = R_initial;
		c_Translation[i] = P_initial;
		c_Quat[i] = c_Rotation[i];
		Pose[i].block<3, 3>(0, 0) = c_Rotation[i];
		Pose[i].block<3, 1>(0, 3) = c_Translation[i];
		//triangulate
		triangulateTwoFrames(i, Pose[i], l, Pose[l], sfm_f);
	}
	for (int j = 0; j < feature_num; j++)
	{
		if (sfm_f[j].state == true)
			continue;
		if ((int)sfm_f[j].observation.size() >= 2)
		{
			Vector2d point0, point1;
			int frame_0 = sfm_f[j].observation[0].first;
			point0 = sfm_f[j].observation[0].second;
			int frame_1 = sfm_f[j].observation.back().first;
			point1 = sfm_f[j].observation.back().second;
			Vector3d point_3d;
			triangulatePoint(Pose[frame_0], Pose[frame_1], point0, point1, point_3d);
			sfm_f[j].state = true;
			sfm_f[j].position[0] = point_3d(0);
			sfm_f[j].position[1] = point_3d(1);
			sfm_f[j].position[2] = point_3d(2);
			//cout << "trangulated : " << frame_0 << " " << frame_1 << "  3d point : "  << j << "  " << point_3d.transpose() << endl;
		}		
	}
	/*之前用PnP和三角化得到的位姿和3D点不够精确,存在累积误差,所以需要进行BA优化*/
	//准备优化参数
	ceres::Problem problem;
	ceres::LocalParameterization* local_parameterization = new ceres::QuaternionParameterization();
	for (int i = 0; i < frame_num; i++)
	{
		c_translation[i][0] = c_Translation[i].x();
		c_translation[i][1] = c_Translation[i].y();
		c_translation[i][2] = c_Translation[i].z();
		c_rotation[i][0] = c_Quat[i].w();
		c_rotation[i][1] = c_Quat[i].x();
		c_rotation[i][2] = c_Quat[i].y();
		c_rotation[i][3] = c_Quat[i].z();
		problem.AddParameterBlock(c_rotation[i], 4, local_parameterization);
		problem.AddParameterBlock(c_translation[i], 3);
		// 固定参考帧 l 的旋转(作为世界坐标系基准)
		//参考帧位姿固定,避免整个解在空间/尺度上漂移
		if (i == l)
		{
			problem.SetParameterBlockConstant(c_rotation[i]);
		}
		// 固定参考帧 l 和最后一帧的平移(建立尺度基准)
		if (i == l || i == frame_num - 1)
		{
			problem.SetParameterBlockConstant(c_translation[i]);
		}
	}
	//添加残差块
	for (int i = 0; i < feature_num; i++)
	{
		if (sfm_f[i].state != true)// 跳过未三角化的点
			continue;
		for (int j = 0; j < int(sfm_f[i].observation.size()); j++)
		{
			int l = sfm_f[i].observation[j].first;
			// 获取这个点在帧 l 中的 2D 观测(归一化坐标)
			ceres::CostFunction* cost_function = ReprojectionError3D::Create(
												sfm_f[i].observation[j].second.x(),
												sfm_f[i].observation[j].second.y());
		// 【添加残差块到优化问题】
        // 这个残差块连接了:帧 l 的位姿 + 3D 点 i 的位置
		//c_rotation 、c_translation、position都会被优化
    		problem.AddResidualBlock(cost_function, NULL, c_rotation[l], c_translation[l], 
    								sfm_f[i].position);	 
		}
	}
	//配置优化器
	ceres::Solver::Options options;
	options.linear_solver_type = ceres::DENSE_SCHUR;//使用稠密Schur补结构,适合视觉-SLAM问题
	//options.minimizer_progress_to_stdout = true;
	options.max_solver_time_in_seconds = 0.2;//最大优化时间
	ceres::Solver::Summary summary;
	ceres::Solve(options, &problem, &summary);//执行优化
	//检查是否收敛
	if (summary.termination_type == ceres::CONVERGENCE || summary.final_cost < 5e-03)
	{
		//cout << "vision only BA converge" << endl;
	}
	else
	{
		//cout << "vision only BA not converge " << endl;
		return false;
	}
	//转换旋转
	for (int i = 0; i < frame_num; i++)
	{
		q[i].w() = c_rotation[i][0]; 
		q[i].x() = c_rotation[i][1]; 
		q[i].y() = c_rotation[i][2]; 
		q[i].z() = c_rotation[i][3]; 
		q[i] = q[i].inverse();//相机坐标系 → 世界坐标系
		//cout << "final  q" << " i " << i <<"  " <<q[i].w() << "  " << q[i].vec().transpose() << endl;
	}
	//转换平移
	for (int i = 0; i < frame_num; i++)
	{

		T[i] = -1 * (q[i] * Vector3d(c_translation[i][0], c_translation[i][1], c_translation[i][2]));
		//cout << "final  t" << " i " << i <<"  " << T[i](0) <<"  "<< T[i](1) <<"  "<< T[i](2) << endl;
	}
	// 将优化后的 3D 点存入 sfm_tracked_points(以特征点 ID 为键)
	for (int i = 0; i < (int)sfm_f.size(); i++)
	{
		if(sfm_f[i].state)
			sfm_tracked_points[sfm_f[i].id] = Vector3d(sfm_f[i].position[0], sfm_f[i].position[1], sfm_f[i].poitsion[2]);
	}
	return true;

}

具体分析:

cpp 复制代码
for (int i = l; i < frame_num - 1 ; i++)// 从参考帧到最新帧
	{
		if (i > l)
		{
			Matrix3d R_initial = c_Rotation[i - 1];
			Vector3d P_initial = c_Translation[i - 1];
			if(!solveFrameByPnP(R_initial, P_initial, i, sfm_f))//正向求解
				return false;
			c_Rotation[i] = R_initial;
			c_Translation[i] = P_initial;
			c_Quat[i] = c_Rotation[i];
			Pose[i].block<3, 3>(0, 0) = c_Rotation[i];
			Pose[i].block<3, 1>(0, 3) = c_Translation[i];
		}
		triangulateTwoFrames(i, Pose[i], frame_num - 1, Pose[frame_num - 1], sfm_f);
	}

for (int i = l + 1; i < frame_num - 1; i++)
		triangulateTwoFrames(l, Pose[l], i, Pose[i], sfm_f);

(正向求解)这个模块要解决的是:从参考帧到最新帧的所有帧,估计每帧c_Rotation[i] 和 c_Translation[i] 存储的是相对于世界坐标系的变换(world→camera及世界坐标系下的X/Y/Z(无尺度信息)。例如:参考帧ID=5,最新帧ID=11,求解ID=6/ID=7/ID=8/ID=9/ID=10的Pose和世界坐标系下的X/Y/Z(无尺度信息)。

cpp 复制代码
for (int i = l - 1; i >= 0; i--)
	{
		//solve pnp
		Matrix3d R_initial = c_Rotation[i + 1];
		Vector3d P_initial = c_Translation[i + 1];
		if(!solveFrameByPnP(R_initial, P_initial, i, sfm_f))//反向求解
			return false;
		c_Rotation[i] = R_initial;
		c_Translation[i] = P_initial;
		c_Quat[i] = c_Rotation[i];
		Pose[i].block<3, 3>(0, 0) = c_Rotation[i];
		Pose[i].block<3, 1>(0, 3) = c_Translation[i];
		//triangulate
		triangulateTwoFrames(i, Pose[i], l, Pose[l], sfm_f);
	}
	for (int j = 0; j < feature_num; j++)
	{
		if (sfm_f[j].state == true)
			continue;
		if ((int)sfm_f[j].observation.size() >= 2)
		{
			Vector2d point0, point1;
			int frame_0 = sfm_f[j].observation[0].first;
			point0 = sfm_f[j].observation[0].second;
			int frame_1 = sfm_f[j].observation.back().first;
			point1 = sfm_f[j].observation.back().second;
			Vector3d point_3d;
			triangulatePoint(Pose[frame_0], Pose[frame_1], point0, point1, point_3d);
			sfm_f[j].state = true;
			sfm_f[j].position[0] = point_3d(0);
			sfm_f[j].position[1] = point_3d(1);
			sfm_f[j].position[2] = point_3d(2);
			//cout << "trangulated : " << frame_0 << " " << frame_1 << "  3d point : "  << j << "  " << point_3d.transpose() << endl;
		}		
	}

(反向求解)本模块是求解从开始ID=0到ID=4,共计5帧的Pose和point_3d坐标点。至此整个滑窗内所有帧的位姿和3D点求解完成。

到此时还有个问题就是:用PnP和三角化得到的位姿和3D点不够精确,存在累积误差,所以需要进行BA优化。

所以针对累积误差的问题进行BA优化,找出最优的相机参数及三维空间点的坐标,使得将这些差值的和 最小化!

cpp 复制代码
ceres::Problem problem;
	ceres::LocalParameterization* local_parameterization = new ceres::QuaternionParameterization();
	for (int i = 0; i < frame_num; i++)
	{
		c_translation[i][0] = c_Translation[i].x();
		c_translation[i][1] = c_Translation[i].y();
		c_translation[i][2] = c_Translation[i].z();
		c_rotation[i][0] = c_Quat[i].w();
		c_rotation[i][1] = c_Quat[i].x();
		c_rotation[i][2] = c_Quat[i].y();
		c_rotation[i][3] = c_Quat[i].z();
		problem.AddParameterBlock(c_rotation[i], 4, local_parameterization);
		problem.AddParameterBlock(c_translation[i], 3);
		// 固定参考帧 l 的旋转(作为世界坐标系基准)
		//参考帧位姿固定,避免整个解在空间/尺度上漂移
		if (i == l)
		{
			problem.SetParameterBlockConstant(c_rotation[i]);
		}
		// 固定参考帧 l 和最后一帧的平移(建立尺度基准)
		if (i == l || i == frame_num - 1)
		{
			problem.SetParameterBlockConstant(c_translation[i]);
		}
	}
	//添加残差块
	for (int i = 0; i < feature_num; i++)
	{
		if (sfm_f[i].state != true)// 跳过未三角化的点
			continue;
		for (int j = 0; j < int(sfm_f[i].observation.size()); j++)
		{
			int l = sfm_f[i].observation[j].first;
			// 获取这个点在帧 l 中的 2D 观测(归一化坐标)
			ceres::CostFunction* cost_function = ReprojectionError3D::Create(
												sfm_f[i].observation[j].second.x(),
												sfm_f[i].observation[j].second.y());
		// 【添加残差块到优化问题】
        // 这个残差块连接了:帧 l 的位姿 + 3D 点 i 的位置
		//c_rotation 、c_translation、position都会被优化
    		problem.AddResidualBlock(cost_function, NULL, c_rotation[l], c_translation[l], 
    								sfm_f[i].position);	 
		}
	}
	//配置优化器
	ceres::Solver::Options options;
	options.linear_solver_type = ceres::DENSE_SCHUR;//使用稠密Schur补结构,适合视觉-SLAM问题
	//options.minimizer_progress_to_stdout = true;
	options.max_solver_time_in_seconds = 0.2;//最大优化时间
	ceres::Solver::Summary summary;
	ceres::Solve(options, &problem, &summary);//执行优化
	//检查是否收敛
	if (summary.termination_type == ceres::CONVERGENCE || summary.final_cost < 5e-03)
	{
		//cout << "vision only BA converge" << endl;
	}
	else
	{
		//cout << "vision only BA not converge " << endl;
		return false;
	}
	//转换旋转
	for (int i = 0; i < frame_num; i++)
	{
		q[i].w() = c_rotation[i][0]; 
		q[i].x() = c_rotation[i][1]; 
		q[i].y() = c_rotation[i][2]; 
		q[i].z() = c_rotation[i][3]; 
		q[i] = q[i].inverse();//相机坐标系 → 世界坐标系
		//cout << "final  q" << " i " << i <<"  " <<q[i].w() << "  " << q[i].vec().transpose() << endl;
	}
	//转换平移
	for (int i = 0; i < frame_num; i++)
	{

		T[i] = -1 * (q[i] * Vector3d(c_translation[i][0], c_translation[i][1], c_translation[i][2]));
		//cout << "final  t" << " i " << i <<"  " << T[i](0) <<"  "<< T[i](1) <<"  "<< T[i](2) << endl;
	}
	// 将优化后的 3D 点存入 sfm_tracked_points(以特征点 ID 为键)
	for (int i = 0; i < (int)sfm_f.size(); i++)
	{
		if(sfm_f[i].state)
			sfm_tracked_points[sfm_f[i].id] = Vector3d(sfm_f[i].position[0], sfm_f[i].position[1], sfm_f[i].poitsion[2]);
	}
	return true;

最终:对滑窗内所有帧的c_rotation 、c_translation、position都会被优化,得到最优的Pose和3D点。

相关推荐
持续学习的程序员+12 小时前
部分离线强化学习相关的算法总结(td3+bc/conrft)
算法
范桂飓2 小时前
LLaMA-Factory 大模型微调平台
人工智能·llama
李泽辉_2 小时前
深度学习算法学习(六):深度学习-处理文本:神经网络处理文本、Embedding层
深度学习·学习·算法
高洁012 小时前
AI智能体搭建(1)
人工智能·深度学习·机器学习·transformer·知识图谱
lixzest2 小时前
Transformer 零基础学习指南
人工智能·深度学习·transformer
jackywine62 小时前
系统提示词(System Prompt),AI 的“人设剧本“
人工智能·chatgpt
UnderTurrets2 小时前
From_Diffusion_to_GSFix3D
人工智能·计算机视觉·3d
laplace01232 小时前
agent模型基础
人工智能·语言模型·自然语言处理·agent·rag
ldccorpora2 小时前
Chinese Treebank 5.0数据集介绍,官网编号LDC2005T01
人工智能·深度学习·自然语言处理·动态规划·语音识别