[学习] 数据挖掘-聚类算法(K-means,层次聚类)

最新推荐文章于 2024-08-28 15:25:23 发布

wty19

最新推荐文章于 2024-08-28 15:25:23 发布

阅读量1.2k

点赞数

分类专栏：数据挖掘 JAVA 文章标签：数据挖掘学习算法

本文链接：https://blog.csdn.net/wty19/article/details/21161959

版权

数据挖掘同时被 2 个专栏收录

5 篇文章 0 订阅

订阅专栏

JAVA

5 篇文章 0 订阅

订阅专栏

1.参考文章：

http://blog.csdn.net/jwh_bupt/article/details/7685809
http://coolshell.cn/articles/7779.html

2.自己的小实现留念：

位置数据结构：

public   class Location {
    int x;
    int y;
    String name;
    
    @Override
    public int hashCode(){
        return x*Matrix.MAX_CONTAINER_Y + y;
    }

}

矩阵生成类：

public class Matrix {
    public final static int MAX_CONTAINER_X = 50;

    public final static int MAX_CONTAINER_Y = 50;

    // 生成随即数据列
    public static List<Location> genRandom(int size) {
        List<Location> list = new ArrayList<Location>();
        for (int i = 1; i < size + 1; i++) {
            int x = (int) (Math.random() * MAX_CONTAINER_X);
            int y = (int) (Math.random() * MAX_CONTAINER_Y);
            Location l = new Location();
            l.x = x;
            l.y = y;
            l.name = i+"";
            list.add(l);
        }
        return list;
    }
    //试图打印可视化矩阵
    public static void showPic(List<Location> list) {
        String[][] pic = new String[MAX_CONTAINER_X][MAX_CONTAINER_Y];
        for (Location l : list) {
            pic[l.x][l.y] = l.name;
        }
        for (int i = 0; i < MAX_CONTAINER_X; i++) {
            for (int j = 0; j < MAX_CONTAINER_Y; j++) {
                String a = pic[i][j];
                if (StringUtils.isNotBlank(a))
                    System.out.print(a);
                else
                    System.out.print(" ");
            }
            System.out.println("");
        }
    }
    //计算2点距离
    public static double getDistance(Location l1,Location l2){
        return Math.sqrt( Math.pow(l1.x -l2.x,2) + Math.pow(l1.y -l2.y,2));  
    }
    //计算中心点位置
    public static Location centerLocation(List<Location> list){
        int x = 0;
        int y = 0;
        String t = "";
        for(Location l: list){
            x = x + l.x;
            y = y + l.y;
            t = t + l.name +"," ;
        }
        Location ls = new Location();
        ls.x = x/list.size();
        ls.y = y/list.size();
        ls.name = t;
        return ls;
    }
    
    public static void printGroup(Map<Integer, List<Location>> lsmax){
        for(Integer k :lsmax.keySet()){
            System.out.print("==>" + k + ":");
            for(Location kaas : lsmax.get(k)){
                System.out.print(" " + kaas.name + ",");
            }
            System.out.println(" #");
        }
    }
    
    public static void printGroup(List<Location> lsmax){
        for(int k = 0;k < lsmax.size();k++){
            System.out.print("==>" + k + ":");
            System.out.print(" " + lsmax.get(k).name);
            System.out.println(" #");
        }
    }
}

层次聚类：

public class Hierarchical {
      
    private static int h;
    
    public void Recursion(List<Location> ls1){
        int x = 0,y = 0;
        //这里有很多想法，最简单的是无限小电视每次都只合并最小，或者首轮不相关最小，再合并，减少了一个次方的计算量
        double distance= -1;
        for(int i=0;i<ls1.size();i++){
            for(int j=0;j<ls1.size();j++){
                if(i !=j){
                    double temp = Matrix.getDistance(ls1.get(i), ls1.get(j));
                    if(distance == -1 || temp < distance){
                        distance = temp;
                        x = i;
                        y = j;
                    } 
                        
                }
                
            }
            
        }
        List<Location> ls2 = new ArrayList<Location>();
        Location t1 = ls1.get(x);
        Location t2 = ls1.get(y);
        ls2.add(t1);
        ls2.add(t2);        
        Location nLocation = Matrix.centerLocation(ls2);
        ls1.remove(t1);
        ls1.remove(t2);
        ls1.add(nLocation);
        
        if(ls1.size() <= h)
        {
            Matrix.printGroup(ls1);
            return;
        }else{
            Recursion(ls1);
        }
        
    }
    public void startH(List<Location> ls1,int k){
        h = k; 
        Recursion(ls1);
    }
    
    

}

K-means：


public class Kmeans {
      

    
    public void Recursion(List<Location> ls1,List<Location> ls2){
        Map<Integer,List<Location>> lsmax = new HashMap<Integer,List<Location>>();
        for(Location l1:ls1){
            double dist = Matrix.getDistance(l1,ls2.get(0));
            int i = 0;
            for(int j = 0; j< ls2.size();j++){
                Location l2 = ls2.get(j);
                double temp = Matrix.getDistance(l1,l2);
                if(temp < dist){
                    dist = temp;
                    i = j;
                }
            }
            List<Location> s = lsmax.get(i);
            if(s == null){
                s = new ArrayList<Location>();
                lsmax.put(i, s);
            }
            s.add(l1);
        }
        List<Location> newLocation = new ArrayList<Location>();
        for(Integer k :lsmax.keySet()){
            Location nl = Matrix.centerLocation(lsmax.get(k));
            if(ls2.get(k).x != nl.x || ls2.get(k).y != nl.y){
                newLocation.add(nl);
            }
        }
        if(newLocation.size() == 0) {
            Matrix.printGroup(lsmax);
            return;
        }else{
            Recursion(ls1,newLocation);
        }
       
    }
    public void startK( List<Location> ls1,int k){
        List<Location> point = Matrix.genRandom(k);
        Recursion(ls1,point);
    }  

}

测试：

public class Test {
    public static void main(String args[]){
        List<Location> randomMap = Matrix.genRandom(15);
        Matrix.showPic(randomMap);
        System.out.println("Hierarchical:");
        Hierarchical h = new Hierarchical();
        h.startH(randomMap,3);
        System.out.println("Kmeans:");
        Kmeans k = new Kmeans();
        k.startK(randomMap,3);
    }
}

结果：

Hierarchical:
==>0: 2,9, #
==>1: 3,5,6,, #
==>2: 4,10,13,,15,11,8,12,,,7,1,14,,,,,, #
Kmeans:
==>0: 2,9,, 3,5,6,,, #
==>1: 4,10,13,,15,11,8,12,,,7,1,14,,,,,,, #

Hierarchical:
==>0: 13,2,7,, #
==>1: 12,6,8,, #
==>2: 5,9,,14,10,11,,,15,1,3,4,,,,, #
Kmeans:
==>0: 13,2,7,,, #
==>1: 12,6,8,,, #
==>2: 5,9,,14,10,11,,,15,1,3,4,,,,,, #