1.参考文章:
http://blog.csdn.net/jwh_bupt/article/details/7685809
http://coolshell.cn/articles/7779.html
2.自己的小实现留念:
位置数据结构:
public class Location {
int x;
int y;
String name;
@Override
public int hashCode(){
return x*Matrix.MAX_CONTAINER_Y + y;
}
}
矩阵生成类:
public class Matrix {
public final static int MAX_CONTAINER_X = 50;
public final static int MAX_CONTAINER_Y = 50;
// 生成随即数据列
public static List<Location> genRandom(int size) {
List<Location> list = new ArrayList<Location>();
for (int i = 1; i < size + 1; i++) {
int x = (int) (Math.random() * MAX_CONTAINER_X);
int y = (int) (Math.random() * MAX_CONTAINER_Y);
Location l = new Location();
l.x = x;
l.y = y;
l.name = i+"";
list.add(l);
}
return list;
}
//试图打印可视化矩阵
public static void showPic(List<Location> list) {
String[][] pic = new String[MAX_CONTAINER_X][MAX_CONTAINER_Y];
for (Location l : list) {
pic[l.x][l.y] = l.name;
}
for (int i = 0; i < MAX_CONTAINER_X; i++) {
for (int j = 0; j < MAX_CONTAINER_Y; j++) {
String a = pic[i][j];
if (StringUtils.isNotBlank(a))
System.out.print(a);
else
System.out.print(" ");
}
System.out.println("");
}
}
//计算2点距离
public static double getDistance(Location l1,Location l2){
return Math.sqrt( Math.pow(l1.x -l2.x,2) + Math.pow(l1.y -l2.y,2));
}
//计算中心点位置
public static Location centerLocation(List<Location> list){
int x = 0;
int y = 0;
String t = "";
for(Location l: list){
x = x + l.x;
y = y + l.y;
t = t + l.name +"," ;
}
Location ls = new Location();
ls.x = x/list.size();
ls.y = y/list.size();
ls.name = t;
return ls;
}
public static void printGroup(Map<Integer, List<Location>> lsmax){
for(Integer k :lsmax.keySet()){
System.out.print("==>" + k + ":");
for(Location kaas : lsmax.get(k)){
System.out.print(" " + kaas.name + ",");
}
System.out.println(" #");
}
}
public static void printGroup(List<Location> lsmax){
for(int k = 0;k < lsmax.size();k++){
System.out.print("==>" + k + ":");
System.out.print(" " + lsmax.get(k).name);
System.out.println(" #");
}
}
}
层次聚类:
public class Hierarchical {
private static int h;
public void Recursion(List<Location> ls1){
int x = 0,y = 0;
//这里有很多想法,最简单的是无限小电视每次都只合并最小,或者首轮不相关最小,再合并,减少了一个次方的计算量
double distance= -1;
for(int i=0;i<ls1.size();i++){
for(int j=0;j<ls1.size();j++){
if(i !=j){
double temp = Matrix.getDistance(ls1.get(i), ls1.get(j));
if(distance == -1 || temp < distance){
distance = temp;
x = i;
y = j;
}
}
}
}
List<Location> ls2 = new ArrayList<Location>();
Location t1 = ls1.get(x);
Location t2 = ls1.get(y);
ls2.add(t1);
ls2.add(t2);
Location nLocation = Matrix.centerLocation(ls2);
ls1.remove(t1);
ls1.remove(t2);
ls1.add(nLocation);
if(ls1.size() <= h)
{
Matrix.printGroup(ls1);
return;
}else{
Recursion(ls1);
}
}
public void startH(List<Location> ls1,int k){
h = k;
Recursion(ls1);
}
}
K-means:
public class Kmeans {
public void Recursion(List<Location> ls1,List<Location> ls2){
Map<Integer,List<Location>> lsmax = new HashMap<Integer,List<Location>>();
for(Location l1:ls1){
double dist = Matrix.getDistance(l1,ls2.get(0));
int i = 0;
for(int j = 0; j< ls2.size();j++){
Location l2 = ls2.get(j);
double temp = Matrix.getDistance(l1,l2);
if(temp < dist){
dist = temp;
i = j;
}
}
List<Location> s = lsmax.get(i);
if(s == null){
s = new ArrayList<Location>();
lsmax.put(i, s);
}
s.add(l1);
}
List<Location> newLocation = new ArrayList<Location>();
for(Integer k :lsmax.keySet()){
Location nl = Matrix.centerLocation(lsmax.get(k));
if(ls2.get(k).x != nl.x || ls2.get(k).y != nl.y){
newLocation.add(nl);
}
}
if(newLocation.size() == 0) {
Matrix.printGroup(lsmax);
return;
}else{
Recursion(ls1,newLocation);
}
}
public void startK( List<Location> ls1,int k){
List<Location> point = Matrix.genRandom(k);
Recursion(ls1,point);
}
}
测试:
public class Test {
public static void main(String args[]){
List<Location> randomMap = Matrix.genRandom(15);
Matrix.showPic(randomMap);
System.out.println("Hierarchical:");
Hierarchical h = new Hierarchical();
h.startH(randomMap,3);
System.out.println("Kmeans:");
Kmeans k = new Kmeans();
k.startK(randomMap,3);
}
}
结果:
Hierarchical:
==>0: 2,9, #
==>1: 3,5,6,, #
==>2: 4,10,13,,15,11,8,12,,,7,1,14,,,,,, #
Kmeans:
==>0: 2,9,, 3,5,6,,, #
==>1: 4,10,13,,15,11,8,12,,,7,1,14,,,,,,, #
Hierarchical:
==>0: 13,2,7,, #
==>1: 12,6,8,, #
==>2: 5,9,,14,10,11,,,15,1,3,4,,,,, #
Kmeans:
==>0: 13,2,7,,, #
==>1: 12,6,8,,, #
==>2: 5,9,,14,10,11,,,15,1,3,4,,,,,, #