DBSCAN算法的Java实现

最新推荐文章于 2022-12-18 22:03:01 发布

西二旗小码农

最新推荐文章于 2022-12-18 22:03:01 发布

阅读量8.2k

点赞数

分类专栏：数据挖掘相关搜索引擎相关编程语言相关文章标签：算法 java import distance string merge

搜索引擎相关同时被 3 个专栏收录

34 篇文章 0 订阅

订阅专栏

编程语言相关

30 篇文章 0 订阅

订阅专栏

数据挖掘相关

14 篇文章 0 订阅

订阅专栏

DBSCAN是一种基于密度的聚类算法，它的基本原理就是给定两个参数，ξ和minp，其中 ξ可以理解为半径，算法将在这个半径内查找样本，minp是一个以ξ为半径查找到的样本个数n的限制条件，只要n>=minp，查找到的样本点就是核心样本点，算法的具体描述见参考文件1，下边是这个算法的java实现：

　　首先定义一个Point类，代表样本点

<!--[endif]-->
 
　　package com.sunzhenxing;
 
　　public class Point {
 
　　private int x;
 
　　private int y;
 
　　private boolean isKey;
 
　　private boolean isClassed;
 
　　public boolean isKey() {
 
　　return isKey;
 
　　}
 
　　public void setKey(boolean isKey) {
 
　　this.isKey = isKey;
 
　　this.isClassed=true;
 
　　}
 
　　public boolean isClassed() {
 
　　return isClassed;
 
　　}
 
　　public void setClassed(boolean isClassed) {
 
　　this.isClassed = isClassed;
 
　　}
 
　　public int getX() {
 
　　return x;
 
　　}
 
　　public void setX(int x) {
 
　　this.x = x;
 
　　}
 
　　public int getY() {
 
　　return y;
 
　　}
 
　　public void setY(int y) {
 
　　this.y = y;
 
　　}
 
　　public Point(){
 
　　x=0;
 
　　y=0;
 
　　}
 
　　public Point(int x,int y){
 
　　this.x=x;
 
　　this.y=y;
 
　　}
 
　　public Point(String str){
 
　　String[] p=str.split(",");
 
　　this.x=Integer.parseInt(p[0]);
 
　　this.y=Integer.parseInt(p[1]);
 
　　}
 
　　public String print(){
 
　　return "<"+this.x+","+this.y+">";
 
　　}
 
　　}

然后定义一个工具类，为算法的实现服务：

package com.sunzhenxing;
 
　　import java.io.BufferedReader;
 
　　import java.io.FileReader;
 
　　import java.io.IOException;
 
　　import java.util.*;
 
　　public class Utility {
 
　　/**
 
　　* 测试两个点之间的距离
 
　　* @param p 点
 
　　* @param q 点
 
　　* @return 返回两个点之间的距离
 
　　*/
 
　　public static double getDistance(Point p,Point q){
 
　　int dx=p.getX()-q.getX();
 
　　int dy=p.getY()-q.getY();
 
　　double distance=Math.sqrt(dx*dx+dy*dy);
 
　　return distance;
 
　　}

/**
 
　　* 检查给定点是不是核心点
 
　　* @param lst 存放点的链表
 
　　* @param p 待测试的点
 
　　* @param e e半径
 
　　* @param minp 密度阈值
 
　　* @return 暂时存放访问过的点
 
　　*/
 
　　public static List<Point> isKeyPoint(List<Point> lst,Point p,int e,int minp){
 
　　int count=0;
 
　　List<Point> tmpLst=new ArrayList<Point>();
 
　　for(Iterator<Point> it=lst.iterator();it.hasNext();){
 
　　Point q=it.next();
 
　　if(getDistance(p,q)<=e){
 
　　++count;
 
　　if(!tmpLst.contains(q)){
 
　　tmpLst.add(q);
 
　　}
 
　　}
 
　　}
 
　　if(count>=minp){
 
　　p.setKey(true);
 
　　return tmpLst;
 
　　}
 
　　return null;
 
　　}
 
　　public static void setListClassed(List<Point> lst){
 
　　for(Iterator<Point> it=lst.iterator();it.hasNext();){
 
　　Point p=it.next();
 
　　if(!p.isClassed()){
 
　　p.setClassed(true);
 
　　}
 
　　}
 
　　}
 
　　/**
 
　　* 如果b中含有a中包含的元素，则把两个集合合并
 
　　* @param a
 
　　* @param b
 
　　* @return a
 
　　*/
 
　　public static boolean mergeList(List<Point> a,List<Point> b){
 
　　boolean merge=false;
 
　　for(int index=0;index<b.size();++index){
 
　　if(a.contains(b.get(index))){
 
　　merge=true;
 
　　break;
 
　　}
 
　　}
 
　　if(merge){
 
　　for(int index=0;index<b.size();++index){
 
　　if(!a.contains(b.get(index))){
 
　　a.add(b.get(index));
 
　　}
 
　　}
 
　　}
 
　　return merge;
 
　　}
 
　　/**
 
　　* 返回文本中的点集合
 
　　* @return 返回文本中点的集合
 
　　* @throws IOException
 
　　*/
 
　　public static List<Point> getPointsList() throws IOException{
 
　　List<Point> lst=new ArrayList<Point>();
 
　　String txtPath="src\\com\\sunzhenxing\\points.txt";
 
　　BufferedReader br=new BufferedReader(new FileReader(txtPath));
 
　　String str="";
 
　　while((str=br.readLine())!=null && str!=""){
 
　　lst.add(new Point(str));
 
　　}
 
　　br.close();
 
　　return lst;
 
　　}
 
　　}



 

最后在主程序中实现算法，如下所示：
package com.sunzhenxing;
 
　　import java.io.*;
 
　　import java.util.*;
 
　　public class Dbscan {
 
　　private static List<Point> pointsList=new ArrayList<Point>();//存储所有点的集合
 
　　private static List<List<Point>> resultList=new ArrayList<List<Point>>();//存储DBSCAN算法返回的结果集
 
　　private static int e=2;//e半径
 
　　private static int minp=3;//密度阈值
 
　　/**
 
　　* 提取文本中的的所有点并存储在pointsList中
 
　　* @throws IOException
 
　　*/
 
　　private static void display(){
 
　　int index=1;
 
　　for(Iterator<List<Point>> it=resultList.iterator();it.hasNext();){
 
　　List<Point> lst=it.next();
 
　　if(lst.isEmpty()){
 
　　continue;
 
　　}
System.out.println("-----第"+index+"个聚类-----");
 
　　for(Iterator<Point> it1=lst.iterator();it1.hasNext();){
 
　　Point p=it1.next();
 
　　System.out.println(p.print());
 
　　}
 
　　index++;
 
　　}
 
　　}
 
　　//找出所有可以直达的聚类
 
　　private static void applyDbscan(){
 
　　try {
 
　　pointsList=Utility.getPointsList();
 
　　for(Iterator<Point> it=pointsList.iterator();it.hasNext();){
 
　　Point p=it.next();
 
　　if(!p.isClassed()){
 
　　List<Point> tmpLst=new ArrayList<Point>();
 
　　if((tmpLst=Utility.isKeyPoint(pointsList, p, e, minp)) != null){
 
　　//为所有聚类完毕的点做标示
 
　　Utility.setListClassed(tmpLst);
 
　　resultList.add(tmpLst);
 
　　}
 
　　}
 
　　}
 
　　} catch (IOException e) {
 
　　// TODO Auto-generated catch block
 
　　e.printStackTrace();
 
　　}
 
　　}
 
　　//对所有可以直达的聚类进行合并，即找出间接可达的点并进行合并
 
　　private static List<List<Point>> getResult(){
 
　　applyDbscan();//找到所有直达的聚类
 
　　int length=resultList.size();
 
　　for(int i=0;i<length;++i){
 
　　for(int j=i+1;j<length;++j){
 
　　if(Utility.mergeList(resultList.get(i), resultList.get(j))){
 
　　resultList.get(j).clear();
 
　　}
 
　　}
 
　　}
 
　　return resultList;
 
　　}
 
　　/**
 
　　* 程序主函数
 
　　* @param args
 
　　*/
 
　　public static void main(String[] args) {
 
　　getResult();
 
　　display();
 
　　//System.out.println(Utility.getDistance(new Point(0,0), new Point(0,2)));
 
　　}
 
　　}



下边是一个小测试， 即使用src\\com\\sunzhenxing\\points.txt文件的内容进行测试，points.txt的文件内容是：
　　0,0
　　0,1
　　0,2
　　0,3
　　0,4
　　0,5
　　12,1
　　12,2
　　12,3
　　12,4
　　12,5
　　12,6
　　0,6
　　0,7
　　12,7
　　0,8
　　0,9
　　1,1
　　最后算法的结果是：
　　-----第1个聚类-----
　　<0,0>
　　<0,1>
　　<0,2>
　　<1,1>
　　<0,3>
　　<0,4>
　　<0,5>
　　<0,6>
　　<0,7>
　　<0,8>
　　<0,9>
　　-----第2个聚类-----
　　<12,1>
　　<12,2>
　　<12,3>
　　<12,4>
　　<12,5>
　　<12,6>
　　<12,7>
　　大家画一下坐标就可以理解实验的结论了。