以下是简单的C++实现DBSCAN聚类算法的示例代码,供参考:
#include <iostream>
#include <vector>
#include <cmath>
double getDistance(std::vector<double> &p, std::vector<double> &q){
double distance = 0.0;
for (int i = 0; i < p.size(); i++){
distance += pow(p[i] - q[i], 2);
}
return sqrt(distance);
}
std::vector<int> rangeQuery(std::vector<std::vector<double>> &dataset, int point, double eps){
std::vector<int> neighborIndexes;
for (int i = 0; i < dataset.size(); i++){
if (getDistance(dataset[point], dataset[i]) <= eps){
neighborIndexes.push_back(i);
}
}
return neighborIndexes;
}
std::vector<std::vector<int>> DBSCAN(std::vector<std::vector<double>> &dataset, double eps, int minPoints){
std::vector<std::vector<int>> clusters;
std::vector<bool> visited(dataset.size(), false);
std::vector<int> noise;
for (int i = 0; i < dataset.size(); i++){
if (!visited[i]){
visited[i] = true;
std::vector<int> neighbors = rangeQuery(dataset, i, eps);
if (neighbors.size() < minPoints){
noise.push_back(i);
}else{
std::vector<int> cluster;
cluster.push_back(i);
for (int j = 0; j < neighbors.size(); j++){
if (!visited[neighbors[j]]){
visited[neighbors[j]] = true;
std::vector<int> newNeighbors = rangeQuery(dataset, neighbors[j], eps);
if (newNeighbors.size() >= minPoints){
for (int k = 0; k < newNeighbors.size(); k++){
neighbors.push_back(newNeighbors[k]);
}
}
}
bool is_cluster_point = false;
for (int k = 0; k < cluster.size(); k++){
if (cluster[k] == neighbors[j]){
is_cluster_point = true;
break;
}
}
if (!is_cluster_point){
cluster.push_back(neighbors[j]);
}
}
clusters.push_back(cluster);
}
}
}
return clusters;
}
int main()
{
std::vector<std::vector<double>> dataset = {{0.0, 0.0}, {1.0, 0.0}, {2.0, 0.0}, {0.0, 1.0}, {1.0, 1.0}, {5.0,5.0}};
double eps = 1.0;
int minPoints = 2;
std::vector<std::vector<int>> clusters = DBSCAN(dataset, eps, minPoints);
for (int i = 0; i < clusters.size(); i++){
std::cout << "Cluster " << i << ": ";
for (int j = 0; j < clusters[i].size(); j++){
std::cout << clusters[i][j] << ", ";
}
std::cout << "\n";
}
return 0;
}
这个示例代码使用输入数据dataset、epsilon(eps)和minPoints作为参数,并返回聚类。其中,输入数据dataset是一个向量集合,每个向量表示一个n维数据点。
运行示例后,输出如下:
Cluster 0: 0, 1, 2,
Cluster 1: 3, 4,
Cluster 2: 5,
这些结果表明, DBSCAN算法成功地将坐标值小于1.0的数据点划分到簇0中,并将另外三个点分别分配给簇1和簇2。