DBSCAN算法的Java实现

最新推荐文章于 2022-12-18 22:03:01 发布

liqiuguo

最新推荐文章于 2022-12-18 22:03:01 发布

阅读量715

点赞数

分类专栏：数据挖掘文章标签：机器学习数据挖掘算法

数据挖掘专栏收录该内容

41 篇文章 0 订阅

订阅专栏

DBSCAN是一种基于密度的聚类算法，它的基本原理就是给定两个参数，ξ和minp，其中 ξ可以理解为半径，算法将在这个半径内查找样本，minp是一个以ξ为半径查找到的样本个数n的限制条件，只要n>=minp，查找到的样本点就是核心样本点，算法的具体描述见参考文件1，下边是这个算法的java实现：

　　首先定义一个Point类，代表样本点

[java]view plain copy 
   
 <!--[endif]-->  
    
 　　package com.sunzhenxing;  
    
 　　public class Point {  
    
 　　private int x;  
    
 　　private int y;  
    
 　　private boolean isKey;  
    
 　　private boolean isClassed;  
    
 　　public boolean isKey() {  
    
 　　return isKey;  
    
 　　}  
    
 　　public void setKey(boolean isKey) {  
    
 　　this.isKey = isKey;  
    
 　　this.isClassed=true;  
    
 　　}  
    
 　　public boolean isClassed() {  
    
 　　return isClassed;  
    
 　　}  
    
 　　public void setClassed(boolean isClassed) {  
    
 　　this.isClassed = isClassed;  
    
 　　}  
    
 　　public int getX() {  
    
 　　return x;  
    
 　　}  
    
 　　public void setX(int x) {  
    
 　　this.x = x;  
    
 　　}  
    
 　　public int getY() {  
    
 　　return y;  
    
 　　}  
    
 　　public void setY(int y) {  
    
 　　this.y = y;  
    
 　　}  
    
 　　public Point(){  
    
 　　x=0;  
    
 　　y=0;  
    
 　　}  
    
 　　public Point(int x,int y){  
    
 　　this.x=x;  
    
 　　this.y=y;  
    
 　　}  
    
 　　public Point(String str){  
    
 　　String[] p=str.split(",");  
    
 　　this.x=Integer.parseInt(p[0]);  
    
 　　this.y=Integer.parseInt(p[1]);  
    
 　　}  
    
 　　public String print(){  
    
 　　return "<"+this.x+","+this.y+">";  
    
 　　}  
    
 　　}

然后定义一个工具类，为算法的实现服务：

[java]view plain copy 
   
 package com.sunzhenxing;  
    
 　　import java.io.BufferedReader;  
    
 　　import java.io.FileReader;  
    
 　　import java.io.IOException;  
    
 　　import java.util.*;  
    
 　　public class Utility {  
    
 　　/** 
   
 　　* 测试两个点之间的距离 
   
 　　* @param p 点 
   
 　　* @param q 点 
   
 　　* @return 返回两个点之间的距离 
   
 　　*/  
    
 　　public static double getDistance(Point p,Point q){  
    
 　　int dx=p.getX()-q.getX();  
    
 　　int dy=p.getY()-q.getY();  
    
 　　double distance=Math.sqrt(dx*dx+dy*dy);  
    
 　　return distance;  
    
 　　}

[java]view plain copy 
   
 <pre class="java" name="code">/** 
   
 　　* 检查给定点是不是核心点 
   
 　　* @param lst 存放点的链表 
   
 　　* @param p 待测试的点 
   
 　　* @param e e半径 
   
 　　* @param minp 密度阈值 
   
 　　* @return 暂时存放访问过的点 
   
 　　*/  
    
 　　public static List<Point> isKeyPoint(List<Point> lst,Point p,int e,int minp){  
    
 　　int count=0;  
    
 　　List<Point> tmpLst=new ArrayList<Point>();  
    
 　　for(Iterator<Point> it=lst.iterator();it.hasNext();){  
    
 　　Point q=it.next();  
    
 　　if(getDistance(p,q)<=e){  
    
 　　++count;  
    
 　　if(!tmpLst.contains(q)){  
    
 　　tmpLst.add(q);  
    
 　　}  
    
 　　}  
    
 　　}  
    
 　　if(count>=minp){  
    
 　　p.setKey(true);  
    
 　　return tmpLst;  
    
 　　}  
    
 　　return null;  
    
 　　}  
    
 　　public static void setListClassed(List<Point> lst){  
    
 　　for(Iterator<Point> it=lst.iterator();it.hasNext();){  
    
 　　Point p=it.next();  
    
 　　if(!p.isClassed()){  
    
 　　p.setClassed(true);  
    
 　　}  
    
 　　}  
    
 　　}  
    
 　　/** 
   
 　　* 如果b中含有a中包含的元素，则把两个集合合并 
   
 　　* @param a 
   
 　　* @param b 
   
 　　* @return a 
   
 　　*/  
    
 　　public static boolean mergeList(List<Point> a,List<Point> b){  
    
 　　boolean merge=false;  
    
 　　for(int index=0;index<b.size();++index){  
    
 　　if(a.contains(b.get(index))){  
    
 　　merge=true;  
    
 　　break;  
    
 　　}  
    
 　　}  
    
 　　if(merge){  
    
 　　for(int index=0;index<b.size();++index){  
    
 　　if(!a.contains(b.get(index))){  
    
 　　a.add(b.get(index));  
    
 　　}  
    
 　　}  
    
 　　}  
    
 　　return merge;  
    
 　　}  
    
 　　/** 
   
 　　* 返回文本中的点集合 
   
 　　* @return 返回文本中点的集合 
   
 　　* @throws IOException 
   
 　　*/  
    
 　　public static List<Point> getPointsList() throws IOException{  
    
 　　List<Point> lst=new ArrayList<Point>();  
    
 　　String txtPath="src\\com\\sunzhenxing\\points.txt";  
    
 　　BufferedReader br=new BufferedReader(new FileReader(txtPath));  
    
 　　String str="";  
    
 　　while((str=br.readLine())!=null && str!=""){  
    
 　　lst.add(new Point(str));  
    
 　　}  
    
 　　br.close();  
    
 　　return lst;  
    
 　　}  
    
 　　}  
 </pre>  
 <p><br>  
  </p>  
 <pre></pre>  
 <p>最后在主程序中实现算法，如下所示：</p>  
 <pre class="java" name="code">package com.sunzhenxing;  
    
 　　import java.io.*;  
    
 　　import java.util.*;  
    
 　　public class Dbscan {  
    
 　　private static List<Point> pointsList=new ArrayList<Point>();//存储所有点的集合  
    
 　　private static List<List<Point>> resultList=new ArrayList<List<Point>>();//存储DBSCAN算法返回的结果集  
    
 　　private static int e=2;//e半径  
    
 　　private static int minp=3;//密度阈值  
    
 　　/** 
   
 　　* 提取文本中的的所有点并存储在pointsList中 
   
 　　* @throws IOException 
   
 　　*/  
    
 　　private static void display(){  
    
 　　int index=1;  
    
 　　for(Iterator<List<Point>> it=resultList.iterator();it.hasNext();){  
    
 　　List<Point> lst=it.next();  
    
 　　if(lst.isEmpty()){  
    
 　　continue;  
    
 　　}  
 System.out.println("-----第"+index+"个聚类-----");  
    
 　　for(Iterator<Point> it1=lst.iterator();it1.hasNext();){  
    
 　　Point p=it1.next();  
    
 　　System.out.println(p.print());  
    
 　　}  
    
 　　index++;  
    
 　　}  
    
 　　}  
    
 　　//找出所有可以直达的聚类  
    
 　　private static void applyDbscan(){  
    
 　　try {  
    
 　　pointsList=Utility.getPointsList();  
    
 　　for(Iterator<Point> it=pointsList.iterator();it.hasNext();){  
    
 　　Point p=it.next();  
    
 　　if(!p.isClassed()){  
    
 　　List<Point> tmpLst=new ArrayList<Point>();  
    
 　　if((tmpLst=Utility.isKeyPoint(pointsList, p, e, minp)) != null){  
    
 　　//为所有聚类完毕的点做标示  
    
 　　Utility.setListClassed(tmpLst);  
    
 　　resultList.add(tmpLst);  
    
 　　}  
    
 　　}  
    
 　　}  
    
 　　} catch (IOException e) {  
    
 　　// TODO Auto-generated catch block  
    
 　　e.printStackTrace();  
    
 　　}  
    
 　　}  
    
 　　//对所有可以直达的聚类进行合并，即找出间接可达的点并进行合并  
    
 　　private static List<List<Point>> getResult(){  
    
 　　applyDbscan();//找到所有直达的聚类  
    
 　　int length=resultList.size();  
    
 　　for(int i=0;i<length;++i){  
    
 　　for(int j=i+1;j<length;++j){  
    
 　　if(Utility.mergeList(resultList.get(i), resultList.get(j))){  
    
 　　resultList.get(j).clear();  
    
 　　}  
    
 　　}  
    
 　　}  
    
 　　return resultList;  
    
 　　}  
    
 　　/** 
   
 　　* 程序主函数 
   
 　　* @param args 
   
 　　*/  
    
 　　public static void main(String[] args) {  
    
 　　getResult();  
    
 　　display();  
    
 　　//System.out.println(Utility.getDistance(new Point(0,0), new Point(0,2)));  
    
 　　}  
    
 　　}  
 </pre>  
 <p><br>  
 下边是一个小测试， 即使用src\\com\\sunzhenxing\\points.txt文件的内容进行测试，points.txt的文件内容是：</p>  
 <p>　　0,0</p>  
 <p>　　0,1</p>  
 <p>　　0,2</p>  
 <p>　　0,3</p>  
 <p>　　0,4</p>  
 <p>　　0,5</p>  
 <p>　　12,1</p>  
 <p>　　12,2</p>  
 <p>　　12,3</p>  
 <p>　　12,4</p>  
 <p>　　12,5</p>  
 <p>　　12,6</p>  
 <p>　　0,6</p>  
 <p>　　0,7</p>  
 <p>　　12,7</p>  
 <p>　　0,8</p>  
 <p>　　0,9</p>  
 <p>　　1,1</p>  
 <p>　　最后算法的结果是：</p>  
 <p>　　-----第1个聚类-----</p>  
 <p>　　<0,0></p>  
 <p>　　<0,1></p>  
 <p>　　<0,2></p>  
 <p>　　<1,1></p>  
 <p>　　<0,3></p>  
 <p>　　<0,4></p>  
 <p>　　<0,5></p>  
 <p>　　<0,6></p>  
 <p>　　<0,7></p>  
 <p>　　<0,8></p>  
 <p>　　<0,9></p>  
 <p>　　-----第2个聚类-----</p>  
 <p>　　<12,1></p>  
 <p>　　<12,2></p>  
 <p>　　<12,3></p>  
 <p>　　<12,4></p>  
 <p>　　<12,5></p>  
 <p>　　<12,6></p>  
 <p>　　<12,7></p>  
 <p>　　大家画一下坐标就可以理解实验的结论了。</p>  
 <p><br>  
  </p>