使用apache common math 中的聚类方法DBSCAN与Kmeans

使用apache common math 中的聚类方法

1)DBSCAN的使用

public class DBSCAN {

    /**
     * �����inputPathΪjaccardCoding.txt·��
     */
    public static final String inputPath = "D:\\jaccardCoding.txt";
    static List<Location> locations = new ArrayList<>();

    public static void main(String[] args) throws IOException {
        // TODO Auto-generated method stub
        DBSCAN dbscan = new DBSCAN();
        Map<NodePair, Double> nodesPairMap = dbscan.getCodingFileMap(inputPath);
        dbscan.getDBSCANResult(locations, nodesPairMap,0.5,10);
    }




    /**
     * @author YYH
     * @param locations �ڵ���
     * @param nodesPairMap Ȩ��ӳ��
     * @param eps   the distance that defines the ��-neighborhood of a point 
     * @param minPts the minimum number of density-connected points required to form a cluster 
     */
    public void getDBSCANResult(
            List<Location> locations,
            Map<NodePair, Double> nodesPairMap,
            double eps,
            int minPts){
        List<LocationWrapper> clusterInput = new ArrayList<LocationWrapper>(locations.size());
        for (Location location : locations){
            clusterInput.add(new LocationWrapper(location));
        }
    //   initialize a new clustering algorithm. 
    //       we use KMeans++ with 10 clusters and 10000 iterations maximum.
    //       we did not specify a distance measure; the default (euclidean distance) is used.
        JaccardDistance jaccardDistance = new JaccardDistance(nodesPairMap);

        DBSCANClusterer<LocationWrapper> clusterer = new DBSCANClusterer<LocationWrapper>(eps, minPts,jaccardDistance);
        List<Cluster<LocationWrapper>> clusterResults = clusterer.cluster(clusterInput);

        // output the clusters
        for (int i=0; i<clusterResults.size(); i++){
            System.out.println("Cluster " + i);
            for (LocationWrapper locationWrapper : clusterResults.get(i).getPoints()){
                System.out.println(locationWrapper.getLocation());
            }
            System.out.println();
        }

    }


    /**
     * ��ȡjaccardCoding.txt�ļ��������locations��Map(�������jaccardҪ��)
     * @param inputPath
     * @return
     * @throws IOException
     */
    public Map<NodePair, Double> getCodingFileMap(String inputPath) throws IOException{
        BufferedReader bReader = FileUtil.getReader(inputPath);
        Map<NodePair, Double> nodesPairMap = new HashMap<>();
        Set<Location> locationSet = new HashSet<>();
        String txtLine = "";
        while ((txtLine = bReader.readLine())!=null){
            String[] values = txtLine.split("\t");
            NodePair nodePair = new NodePair();
            nodePair.setNode1(values[0]);
            nodePair.setNode2(values[1]);
            nodesPairMap.put(nodePair, Double.valueOf(values[2]));
            locationSet.add( new Location(Double.valueOf(values[0]))); //����ֻ�ǰѽڵ㻻��String2double���͵�
            locationSet.add( new Location(Double.valueOf(values[1]))); //����ֻ�ǰѽڵ㻻��String2double���͵�

        }
        locations.addAll(locationSet);
        if(bReader != null){
            bReader.close();
        }
        return nodesPairMap;
    }

    /**
     * @author YYH
     * �ı���DistanceMeasure,�����������Լ��Ĺ��캯�������һ����Լ����ڲ��࣬����ô������ѽ
     * ��jaccardϵ���ļ���ת��һ�£�����jaccard����
     *
     */
    public static class JaccardDistance implements DistanceMeasure{
        private static final long serialVersionUID = 1L;
        public static final String outPath = "D:\\Test.txt";
        public Map<NodePair, Double> nodesPairMap;
        public JaccardDistance(Map<NodePair, Double> nodesPairMap){
            this.nodesPairMap = nodesPairMap;
        }
        public JaccardDistance(){
        }

        /**
         * ���������һЩ��Ϣ���ò�����ʱ����Ե���
         */
        @Override
        public double compute(double[] a, double[] b) throws DimensionMismatchException {
            BufferedWriter bwriter = FileUtil.getWriter(outPath);
            double value = 0.;
            String node1 = String.valueOf(a[0]);
            String node2 = String.valueOf(b[0]);
            node1 = node1.substring(0, node1.indexOf('.'));
            node2 = node2.substring(0, node2.indexOf('.'));
            NodePair nodePair = new NodePair(node1,node2);
            if(nodesPairMap.containsKey(nodePair)){
                value= nodesPairMap.get(nodePair);
            }

            System.out.println(node1+" : "+node2+" : "+value);
            try {
                bwriter.write(node1+" : "+node2+" : "+value);
                bwriter.newLine();
                bwriter.flush();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            if(bwriter != null){
                try {
                    bwriter.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            return value;
        }

    }



    /**
     * @author YYH
     * Location �ڵ�ı�ţ����߱�ǣ�
     * points�ڵ���Ϣ�����ھ���ļ���
     *
     */
    public static class LocationWrapper implements Clusterable {
        private double[] points;
        private Location location;

        public LocationWrapper(Location location) {
            this.location = location;
            this.points = new double[] { location.getX() };
        }

        public Location getLocation() {
            return location;
        }

        public double[] getPoint() {
            return points;
        }
    }


}

2)Kmeans++的使用

/**
 * @author YYH
 *
 */
public class KmeansPlusPlus {
    /**
     * �����inputPathΪjaccardCoding.txt·��
     */
    public static final String inputPath = "D:\\jaccardCoding.txt";
    static List<Location> locations = new ArrayList<>();

    public static void main(String[] args) throws IOException {
        // TODO Auto-generated method stub
        KmeansPlusPlus kmeans = new KmeansPlusPlus();
        Map<NodePair, Double> nodesPairMap = kmeans.getCodingFileMap(inputPath);
        kmeans.getKMeansResult(locations, nodesPairMap,2,10000);
    }



    /**
     * @author Administrator
     * @param locations   �ڵ���
     * @param nodesPairMap Ȩ��ӳ��
     * @param classNumber   Ҫ���ֵ�����
     * @param maxIterations  ���Ҫ�����Ĵ���
     */
    public void getKMeansResult(
            List<Location> locations,
            Map<NodePair, Double> nodesPairMap,
            int classNumber,
            int maxIterations){
        List<LocationWrapper> clusterInput = new ArrayList<LocationWrapper>(locations.size());
        for (Location location : locations){
            clusterInput.add(new LocationWrapper(location));
        }
    //   initialize a new clustering algorithm. 
    //       we use KMeans++ with 10 clusters and 10000 iterations maximum.
    //       we did not specify a distance measure; the default (euclidean distance) is used.
        JaccardDistance jaccardDistance = new JaccardDistance(nodesPairMap);

        KMeansPlusPlusClusterer<LocationWrapper> clusterer = new KMeansPlusPlusClusterer<LocationWrapper>(classNumber, maxIterations,jaccardDistance);
        List<CentroidCluster<LocationWrapper>> clusterResults = clusterer.cluster(clusterInput);

        // output the clusters
        for (int i=0; i<clusterResults.size(); i++){
            System.out.println("Cluster " + i);
            for (LocationWrapper locationWrapper : clusterResults.get(i).getPoints()){
                System.out.println(locationWrapper.getLocation());
            }
            System.out.println();
        }

    }


    /**
     * ��ȡjaccardCoding.txt�ļ��������locations��Map(�������jaccardҪ��)
     * @param inputPath
     * @return
     * @throws IOException
     */
    public Map<NodePair, Double> getCodingFileMap(String inputPath) throws IOException{
        BufferedReader bReader = FileUtil.getReader(inputPath);
        Map<NodePair, Double> nodesPairMap = new HashMap<>();
        Set<Location> locationSet = new HashSet<>();
        String txtLine = "";
        while ((txtLine = bReader.readLine())!=null){
            String[] values = txtLine.split("\t");
            NodePair nodePair = new NodePair();
            nodePair.setNode1(values[0]);
            nodePair.setNode2(values[1]);
            nodesPairMap.put(nodePair, Double.valueOf(values[2]));
            locationSet.add( new Location(Double.valueOf(values[0]))); //����ֻ�ǰѽڵ㻻��String2double���͵�
            locationSet.add( new Location(Double.valueOf(values[1]))); //����ֻ�ǰѽڵ㻻��String2double���͵�

        }
        locations.addAll(locationSet);
        if(bReader != null){
            bReader.close();
        }
        return nodesPairMap;
    }

    /**
     * @author YYH
     * �ı���DistanceMeasure,�����������Լ��Ĺ��캯�������һ����Լ����ڲ��࣬����ô������ѽ
     * ��jaccardϵ���ļ���ת��һ�£�����jaccard����
     *
     */
    public static class JaccardDistance implements DistanceMeasure{
        private static final long serialVersionUID = 1L;
        public static final String outPath = "D:\\Test.txt";
        public Map<NodePair, Double> nodesPairMap;
        public JaccardDistance(Map<NodePair, Double> nodesPairMap){
            this.nodesPairMap = nodesPairMap;
        }
        public JaccardDistance(){
        }

        /**
         * ���������һЩ��Ϣ���ò�����ʱ����Ե���
         */
        @Override
        public double compute(double[] a, double[] b) throws DimensionMismatchException {
            BufferedWriter bwriter = FileUtil.getWriter(outPath);
            double value = 0.;
            String node1 = String.valueOf(a[0]);
            String node2 = String.valueOf(b[0]);
            node1 = node1.substring(0, node1.indexOf('.'));
            node2 = node2.substring(0, node2.indexOf('.'));
            NodePair nodePair = new NodePair(node1,node2);
            if(nodesPairMap.containsKey(nodePair)){
                value= nodesPairMap.get(nodePair);
            }

            System.out.println(node1+" : "+node2+" : "+value);
            try {
                bwriter.write(node1+" : "+node2+" : "+value);
                bwriter.newLine();
                bwriter.flush();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            if(bwriter != null){
                try {
                    bwriter.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            return value;
        }

    }



    /**
     * @author YYH
     * Location �ڵ�ı�ţ����߱�ǣ�
     * points�ڵ���Ϣ�����ھ���ļ���
     *
     */
    public static class LocationWrapper implements Clusterable {
        private double[] points;
        private Location location;

        public LocationWrapper(Location location) {
            this.location = location;
            this.points = new double[] { location.getX() };
        }

        public Location getLocation() {
            return location;
        }

        public double[] getPoint() {
            return points;
        }
    }


}

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值