一、DBSCAN算法原理:
(1)DBSCAN通过检查数据集中每点的Eps邻域来搜索簇,如果点p的Eps邻域包含的点多于MinPts个,则创建一个以p为核心对象的簇;
(2)然后,DBSCAN迭代地聚集从这些核心对象直接密度可达的对象,这个过程可能涉及一些密度可达簇的合并;
(3)当没有新的点添加到任何簇时,该过程结束。
简单理解:在每个点的Eps邻域内拥有点的个数多于MinPts,那么认为p为核心对象簇的一员。即dbscan认为密度可达的点即为一个簇,这也是dbscan聚类的核心思想。根据密度可达的定义,在聚类过程中,除了第一个和最后一个点之外(可以为核心对象或者边界对象),其余点必须是核心对象才可以。
二、优缺点
优点:
(1)聚类速度快且能够有效处理噪声点和发现任意形状的空间聚类;
(2)与K-MEANS比较起来,不需要输入要划分的聚类个数;
(3)聚类簇的形状没有偏倚;
(4)可以在需要时输入过滤噪声的参数。
缺点:
(1)当数据量增大时,要求较大的内存支持I/O消耗也很大;
(2)当空间聚类的密度不均匀、聚类间距差相差很大时,聚类质量较差,因为这种情况下参数MinPts和Eps选取困难。
(3)算法聚类效果依赖与距离公式选取,实际应用中常用欧式距离,对于高维数据,存在"维数灾难"。
三、代码实现
cpp
// 依赖项:pcl1.9.1
//时间:2021.06.25
//功能:实现点云dbscan聚类
//
#pragma once
#include <iostream>
#include<string>
#include<vector>
#include<pcl/io/pcd_io.h>
#include<pcl/point_cloud.h>
#include<pcl/point_types.h>
#include<pcl/kdtree/kdtree_flann.h>
#include<pcl/visualization/cloud_viewer.h>
typedef pcl::PointXYZ pointT;
typedef pcl::PointCloud<pointT> cloud;
//const cloud::Ptr& cloud_in,输入点云
//std::vector<std::vector<int>> &clusters_index, 索引
//const double& r, Eps邻域半径,单位:m
//const int& size,Eps邻域包含的点的最小个数MinPts
bool dbscan(const cloud::Ptr& cloud_in, std::vector<std::vector<int>> &clusters_index, const double& r, const int& size)
{
if (!cloud_in->size())
return false;
//LOG()
pcl::KdTreeFLANN<pointT> tree;
tree.setInputCloud(cloud_in);
std::vector<bool> cloud_processed(cloud_in->size(), false);
for (size_t i = 0; i < cloud_in->points.size(); ++i)
{
if (cloud_processed[i])
{
continue;
}
std::vector<int>seed_queue;
//检查近邻数是否大于给定的size(判断是否是核心对象)
std::vector<int> indices_cloud;
std::vector<float> dists_cloud;
if (tree.radiusSearch(cloud_in->points[i], r, indices_cloud, dists_cloud) >= size)
{
seed_queue.push_back(i);
cloud_processed[i] = true;
}
else
{
//cloud_processed[i] = true;
continue;
}
int seed_index = 0;
while (seed_index < seed_queue.size())
{
std::vector<int> indices;
std::vector<float> dists;
if (tree.radiusSearch(cloud_in->points[seed_queue[seed_index]], r, indices, dists) < size)//函数返回值为近邻数量
{
//cloud_processed[i] = true;//不满足<size可能是边界点,也可能是簇的一部分,不能标记为已处理
++seed_index;
continue;
}
for (size_t j = 0; j < indices.size(); ++j)
{
if (cloud_processed[indices[j]])
{
continue;
}
seed_queue.push_back(indices[j]);
cloud_processed[indices[j]] = true;
}
++seed_index;
}
clusters_index.push_back(seed_queue);
}
// std::cout << clusters_index.size() << std::endl;
if (clusters_index.size())
return true;
else
return false;
}
int main()
{
cloud::Ptr cloud_in(new cloud);
std::vector<std::vector<int>> clusters_index;
pcl::io::loadPCDFile<pcl::PointXYZ>("2.pcd", *cloud_in);
dbscan(cloud_in, clusters_index, 0.1, 10);
std::cout << clusters_index.size() << std::endl;
pcl::PointCloud<pcl::PointXYZI> visual_cloud;
visual_cloud.width = cloud_in->size();
visual_cloud.height = 1;
visual_cloud.resize(cloud_in->size());
for (size_t i = 0; i < clusters_index.size(); ++i)
{
for (size_t j = 0; j < clusters_index[i].size(); ++j)
{
visual_cloud.points[clusters_index[i][j]].x = cloud_in->points[clusters_index[i][j]].x;
visual_cloud.points[clusters_index[i][j]].y = cloud_in->points[clusters_index[i][j]].y;
visual_cloud.points[clusters_index[i][j]].z = cloud_in->points[clusters_index[i][j]].z;
visual_cloud.points[clusters_index[i][j]].intensity = 20*(i+100);
//std::cout << clusters_index[i][j] << std::endl;
}
// std::cout << clusters_index[i].size() << std::endl;
}
pcl::visualization::CloudViewer viewer("DBSCAN cloud viewer.");
viewer.showCloud(visual_cloud.makeShared());
while (!viewer.wasStopped())
{
}
pcl::io::savePCDFile("dbscan.pcd", visual_cloud,true);
std::cout << "Hello World!\n";
}
参考:
1.https://blog.csdn.net/baidu_38172402/article/details/81915112
2.https://blog.csdn.net/xinxiangwangzhi_/article/details/118212814