模拟效果:
代码:
#include<iostream>
#include<vector>
#include<windows.h>
#include<time.h>
#include<algorithm>
using namespace std;
#define random(x) (rand()%x)
#define oo 9999999
#define R 0.5
#define N 100
const int minpoints = 10;
class A{
public:
int no;
double val;
A(int i, double num):no(i),val(num){}
};
vector<A> v;
vector<vector<A> > cluster;
bool cmp(A q, A w){
return q.val < w.val;
}
double abs(double a){
if(a >=0)
return a;
return (-1)*a;
}
void printV(vector<A> c){
for(int i = 0; i < c.size(); i++)
cout<<c[i].val<<',';
cout<<endl;
}
void dbscan(int &k){
//超过最大迭代次数k或者当剩下的点少于原始数据的十分之一的时候,结束
if(k >= 5||v.size()<=N/10)
return;
vector<A> temp;
int n = v.size();
int start = random(n);
int r = min(start + minpoints/2, n-1), l = max(start - minpoints/2, 0);
//找到一个非独立点作为起始点:
while(v[r].val - v[l].val > 2*R){
temp.push_back(v[start]);
v.erase(v.begin()+start);
cluster.push_back(temp);
temp.clear();
k++;
start = random(n);
r = min(start + minpoints/2, n-1), l = max(start - minpoints/2, 0);
}
n = v.size();
//从起始点开始扩展这个类
while(r < n && v[r].val - v[r-1].val <= R )
r++;
r--;
while(l >= 0 && v[l+1].val - v[l].val <= R)
l--;
l++;
//扩展完毕,放入一个分类中
while(l <= r--){
temp.push_back(v[l]);
v.erase(v.begin() + l);
}
cluster.push_back(temp);
k++;
dbscan(k);
}
int main()
{
for(int i = 0; i < N; i++){
srand((unsigned)clock());
Sleep(1);
if(i % 10 == 0){//模拟偏差数据
A a(i, 10 + (random(200) - 99) / 10.0);
v.push_back(a);
}
else{//模拟真实数据
A a(i, 10 + (random(20) - 10) / 10.0);
v.push_back(a);
}
}
sort(v.begin(), v.end(), cmp);
int i = 0, k = 0;
srand((unsigned)clock());
dbscan(k);
//聚类结果
for(i = 0; i < k; i++){
printf("第%d类\n", i+1);
printV(cluster[i]);
}
//剩下的点自成一类
printf("第%d类\n", i);
for(int j = 0; j < v.size(); j++){
cout<<v[j].val<<',';
}
return 0;
}