DBSCAN算法c++实现

 首先计算出距离每个点e以内的点,如果个数>=minPts,则找出它的直接密度可达和间接可达的点,用visited标记点是否已经在簇中,循环直到最后一个点。

#include <fstream>
#include <vector>
#include <iostream>
#include <cmath>
#include <set>
#include <sstream>
using namespace std;

struct Point
{
    double x;
    double y;
};

double distance(const Point& a, const Point& b)
{
    return sqrt(pow(a.x - b.x,2) + pow(a.y - b.y,2));
}

vector <set<int>> getPointSet(const vector<Point> data, double e)
{
    vector<set<int>> PointSet(data.size());         //用到了PointSet[i],要先进行初始化
    //vector<set<int>> PointSet;
    for(int i=0;i<data.size();i++)
    {
        for (int j = 0; j < data.size(); j++)
        {
            if (distance(data[i], data[j]) <= e)
            {
                PointSet[i].insert(j);         //j还是j+1
            }
        }
    }
    return PointSet;
}

vector<set<int>> DAIANA(vector<set<int>> point,int MinPts)
{
    vector<bool> visted(point.size(),false);
    vector<set<int>> cluster;
    for (int i = 0; i < point.size(); i++)
    {
        if (!visted[i])
        {
            if (point[i].size() >= MinPts)              //{几个}  
            {
                for (set<int>::const_iterator j = point[i].begin(); j != point[i].end(); j++)
                {
                    visted[*j] = true;
                    if (point[*j].size() >= MinPts)         //间接相连
                    {
                        for (auto k = point[*j].begin(); k != point[*j].end(); k++)
                        {
                            visted[*k] = true;
                            point[i].insert(*k);
                        }
                        //point[i].insert(point[*j].begin(), point[*j].end());
                    }
                }
                cluster.push_back(point[i]);
            }
        }
       
    }
    return cluster;
}


vector<Point> ReadData(string filename)
{
    vector<Point> data;
    ifstream file(filename);
    if (file.is_open())
    {
        string line;
        while (getline(file, line))
        {
            istringstream iss(line);
            double x, y;
            string token;
            Point point;
            if (getline(iss, token, ',') && istringstream(token) >> point.x &&
                getline(iss, token, ',') && istringstream(token) >> point.y) {
                data.push_back(point);
            }
        }
    }
    else
    {
        cout << "open fail";
    }
    file.close();
    return data;
}

int main()
{ 
    vector<Point> dataset = ReadData("data.txt");
    double e;
    int MinPts;
    cin >> e >> MinPts;

    vector <set<int>> point;
    vector<set<int>> cluster;
    point=getPointSet(dataset, e);
    cluster=DAIANA(point, MinPts);
    for (int i = 0; i < cluster.size(); i++)
    {
        cout << "{";
        int count = 0;
        for (auto j = cluster[i].begin(); j != cluster[i].end(); j++)
        {
            
                cout << *j+1 ;
                if (++count < cluster[i].size())
                {
                    cout << ",";
                }
        }
        cout << "}" << endl;
    }
}

或 

判断是否是核心点,如果是,将它的直接密度可达点放进set<int> neighbors;再判断这些直接密度可达点里如果有核心点则又将该直接密度可达点放进set<int> subNeighbors,最后neighbors.insert(subNeighbors.begin(), subNeighbors.end());

std::vector<std::vector<Point>> dbscan(const std::vector<Point>& dataset, double epsilon, int minPts) {
    std::vector<std::vector<Point>> clusters;
    std::vector<bool> visited(dataset.size(), false);
    
    for (int i = 0; i < dataset.size(); ++i) {
        if (visited[i]) continue;
        
        visited[i] = true;
        std::vector<Point> cluster;
        
        std::set<int> neighbors;
        for (int j = 0; j < dataset.size(); ++j) {
            if (calculateDistance(dataset[i], dataset[j]) <= epsilon) {
                neighbors.insert(j);
            }
        }
        
        if (neighbors.size() >= minPts) {
            for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {
                int index = *it;
                if (!visited[index]) {
                    visited[index] = true;
                    std::set<int> subNeighbors;
                    
                    for (int j = 0; j < dataset.size(); ++j) {
                        if (calculateDistance(dataset[index], dataset[j]) <= epsilon) {
                            subNeighbors.insert(j);
                        }
                    }
                    
                    if (subNeighbors.size() >= minPts) {
                        neighbors.insert(subNeighbors.begin(), subNeighbors.end());
                    }
                }
            }
        }
        
        for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {
            cluster.push_back(dataset[*it]);
        }
        
        if (!cluster.empty()) {
            clusters.push_back(cluster);
        }
    }
    
    return clusters;
}

数据集:参考数据挖掘原理与算法第四版DBSCAN例子

1.0, 0.0
4.0, 0.0
0.0, 1.0
1.0, 1.0
2.0, 1.0
3.0,1.0    
4.0,1.0    
5.0,1.0    
0.0,2.0    
1.0,2.0    
4.0,2.0    
1.0,3.0

结果

  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值