聚类算法中基于链接的算法大致有三种:单链接算法(single link),平均链接算法(average link),最小生成数算法(minimum spanning tree)。现在实现单链接算法,其他算法以后再续吧。
单链接算法的过程是 首先生成各个元素的距离矩阵,根据距离和阀值的比对来控制生成的聚类个数,阀值越大,生成的聚类越少,直到同属一类。
下面例子实现了根据经纬度来实现城市的聚类。
单链接算法的过程是 首先生成各个元素的距离矩阵,根据距离和阀值的比对来控制生成的聚类个数,阀值越大,生成的聚类越少,直到同属一类。
下面例子实现了根据经纬度来实现城市的聚类。
- package singlelink;
- import java.util.ArrayList;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Set;
- public class SingleLinkTest {
- public static void main(String[] args) {
- List<City> citys = new ArrayList<City>();
- City city0 = new City();
- city0.setName("北 京");
- city0.setX(116.28);
- city0.setY(39.54);
- citys.add(city0);
- City city1 = new City();
- city1.setName("上 海");
- city1.setX(121.29);
- city1.setY(31.14);
- citys.add(city1);
- City city2 = new City();
- city2.setName("天 津");
- city2.setX(117.11);
- city2.setY(39.09);
- citys.add(city2);
- City city3 = new City();
- city3.setName("重 庆");
- city3.setX(106.32);
- city3.setY(29.32);
- citys.add(city3);
- City city4 = new City();
- city4.setName("哈尔滨");
- city4.setX(126.41);
- city4.setY(45.45);
- citys.add(city4);
- City city5 = new City();
- city5.setName("长 春");
- city5.setX(125.19);
- city5.setY(43.52);
- citys.add(city5);
- City city6 = new City();
- city6.setName("南 京");
- city6.setX(118.50);
- city6.setY(32.02);
- citys.add(city6);
- City city7 = new City();
- city7.setName("武 汉");
- city7.setX(114.21);
- city7.setY(30.37);
- citys.add(city7);
- City city8 = new City();
- city8.setName("台 北");
- city8.setX(121.31);
- city8.setY(25.03);
- citys.add(city8);
- City city9 = new City();
- city9.setName("香 港");
- city9.setX(114.10);
- city9.setY(22.18);
- citys.add(city9);
- SingleLink sing = new SingleLink(citys);
- List<Set<City>> list = sing.compute();
- for (Set<City> list0 : list) {
- System.out.println("=============");
- for (City city : list0) {
- System.out.println(city.getName() + " : (" + city.getX()+","+city.getY()+")");
- }
- }
- }
- }
- /**
- * 聚类之 单链接算法
- *
- * @author duyf
- *
- */
- class SingleLink {
- private List<City> data;
- // 默认阀值
- private double distanceX = 8;
- public SingleLink(List<City> list) {
- data = list;
- }
- public List<Set<City>> compute() {
- List<Set<City>> list = new ArrayList<Set<City>>();
- // 距离矩阵
- double[][] ds = new double[data.size()][data.size()];
- for (int i = 0; i < data.size(); i++) {
- City city1 = data.get(i);
- for (int j = i + 1; j < data.size(); j++) {
- City city2 = data.get(j);
- ds[i][j] = getDistance(city1, city2);
- // 矩阵 对称性
- ds[j][i] = ds[i][j];
- }
- ds[i][i] = 0.0;
- }
- for (int i = 0; i < ds.length; i++) {
- for (int j = 0; j < ds.length; j++) {
- System.out.print((int) ds[i][j] + ",");
- }
- System.out.println();
- }
- boolean[] hasUsed = new boolean[ds.length];
- for (int i = 0; i < ds.length; i++) {
- Set<City> setDs = new HashSet<City>();
- if (hasUsed[i]) {
- continue;
- }
- for (int j = i; j < ds.length; j++) {
- if (ds[i][j] <= distanceX && hasUsed[j]==false) {
- setDs.add(data.get(j));
- hasUsed[j] = true;
- }
- }
- if (setDs.size() > 0) {
- list.add(setDs);
- }
- }
- return list;
- }
- // 计算空间距离
- private double getDistance(City city1, City city2) {
- double distance=Math.pow(city1.getX()-city2.getX(),2)+Math.pow(city1.getY()-city2.getY(),2);
- return Math.sqrt(distance);
- }
- }
- /**
- * 城市
- *
- * @author duyf
- *
- */
- class City {
- private String name;
- // 经度
- private double x;
- // 纬度
- private double y;
- public double getX() {
- return x;
- }
- public void setX(double x) {
- this.x = x;
- }
- public double getY() {
- return y;
- }
- public void setY(double y) {
- this.y = y;
- }
- public String getName() {
- return name;
- }
- public void setName(String name) {
- this.name = name;
- }
- public boolean equals(Object obj) {
- if (obj == null) {
- return false;
- }
- if (this == obj) {
- return true;
- }
- City other = (City) obj;
- if (this.getX() == other.getX() && this.getY() == other.getY()) {
- return true;
- }
- return false;
- }
- }