大三狗模式识别课后练手,仅供参考。若有不合理或是有待改进之处,希望能得到大神的指导与建议!
算法思路
main(){
chooseDefaultCenter();
while(!isSame){
findCluster();
calcNewCenter();
judgeIsSame();
}
}
程序结构
-public class K-MeansAlgo (算法主体实现部分)
-public static double calcDis(Sample samp1, Sample samp2)
-private static void chooseDefaultCenter(int centerCount)
-private static void findCluster()
-private static void calcNewCenter()
-private static void judgeIsSame()
-private static void printResult()
-public static void main(String[] args)
-class Sample (定义样本类)
代码段
Sample类
class Sample{
private double posx, posy;//标记样本的坐标
private boolean isCenter;//是否为聚类中心
private Sample nearestSamp; //记录距离最近的聚类中心
private ArrayList<Sample> sampList = new ArrayList<Sample>(); //利用ArrayList储存聚类中心下的所有非中心样本,便于计算新的聚类中心
public Sample(double d, double e) {
this.posx = d;
this.posy = e;
this.isCenter = false; //缺省值为非聚类中心
}
public double getX() {
return this.posx;
}
public double getY() {
return this.posy;
}
public void setNearestSamp(Sample temp_samp) {
this.nearestSamp = temp_samp;
}
public Sample getNearestSamp() {
return this.nearestSamp;
}
public void setCenter() {
this.isCenter = true;
}
public boolean getCenter() {
return this.isCenter;
}
public void addSamp(Sample samp) {
this.sampList.add(samp);
}
public int getunCenterNum() {
return this.sampList.size();
}
//获取该聚类中心下的所有非中心样本
public Sample getunCenterItem(int temp_index) {
return this.sampList.get(temp_index);
}
}
public static double calcDis(Sample samp1, Sample samp2)
//封装计算两点距离方法
public static double calcDis(Sample samp1, Sample samp2) {
double temp_dis;
int temp_square;
temp_square = (int) ((int) Math.pow((samp1.getX() - samp2.getX()),2) + Math.pow((samp1.getY() - samp2.getY()),2));
temp_dis = Math.sqrt(temp_square);
return temp_dis;
}
private static void chooseDefaultCenter(int centerCount)
//选取初始聚类中心个数
private static void chooseDefaultCenter(int centerCount) {
//将原本非聚类中心的样本移除指定数量至聚类中心数组中
for(int i=0; i < centerCount; i++) {
//实例化新样本对象作为聚类中心
Sample new_samp = new Sample(unCenterSample.get(i).getX(),unCenterSample.get(i).getY());
preCenterSample.add(new_samp);
}
}
private static void findCluster()
//对样本进行聚类
private static void findCluster() {
double min_dis = 999.9;//暂时设置最大值,若不合理再议
for(int m = 0; m < unCenterSample.size(); m++) {
for(int n = 0; n < preCenterSample.size(); n++) {
double temp_dis = calcDis(preCenterSample.get(n), unCenterSample.get(m));
//若该距离短,则设置该样本的最短距离与最近样本
if(temp_dis < min_dis) {
min_dis = temp_dis;
unCenterSample.get(m).setNearestSamp(preCenterSample.get(n));
}
else {
//继续循环
}
}
//**以不变的非中心样本确定其聚类中心,并为中心的数组添加非中心样本
unCenterSample.get(m).getNearestSamp().addSamp(unCenterSample.get(m));
//**
//最小距离重置
min_dis = 999.99;
}
}
private static void calcNewCenter()
//根据聚类结果计算新的聚类中心
private static void calcNewCenter() {
for(int i = 0; i<preCenterSample.size(); i++) {
int temp_x = 0, temp_y = 0;
for(int m = 0; m< preCenterSample.get(i).getunCenterNum(); m ++) {
temp_x += preCenterSample.get(i).getunCenterItem(m).getX();
temp_y += preCenterSample.get(i).getunCenterItem(m).getY();
}
//切记此处进行分母的强制转换,否则浮点类型除以整型结果错误的转换为整型
Sample new_center = new Sample(temp_x/ (double)preCenterSample.get(i).getunCenterNum(), temp_y/ (double)preCenterSample.get(i).getunCenterNum());
newCenterSample.add(new_center);
}
}
private static void judgeIsSame()
//根据聚类结果,判断该次迭代产生的聚类中心与上一次迭代的聚类中心是否一致
private static void judgeIsSame() {
boolean equal = true;
for(int i = 0; i<preCenterSample.size(); i++) {
if(preCenterSample.get(i).getX()!= newCenterSample.get(i).getX() || preCenterSample.get(i).getY()!= newCenterSample.get(i).getY() ) {
equal = false; //新样本中心与原中心不一致
}
if(equal == false)
break;//若有不一致,中断循环,清空preCenterSample,并将newCenterSample里的聚类中心样本添加至preCenterSample
}
if(equal ==false) {
preCenterSample.clear();
for(int i = 0; i<newCenterSample.size(); i ++) {
preCenterSample.add(newCenterSample.get(i));
}
newCenterSample.clear();
isSame = false;
}
else {
System.out.println("聚类结束,下列为聚类结果");
isSame = true;
printResult();
}
}
private static void printResult()
//打印结果
private static void printResult() {
for(int i =0 ; i < preCenterSample.size(); i++) {
System.out.println("聚类中心: ");
System.out.println("(" + preCenterSample.get(i).getX()+ "," + preCenterSample.get(i).getY() + ") ");
System.out.println("非聚类中心为:");
for(int m = 0; m < preCenterSample.get(i).getunCenterNum(); m ++) {
System.out.print("(" + preCenterSample.get(i).getunCenterItem(m).getX()+ "," + preCenterSample.get(i).getunCenterItem(m).getY() + ") ");
}
System.out.println();
}
}
采用教材例子检验算法正确性
public static void main(String[] args) {
//centerNum = 2;
Sample s1 = new Sample(0,0);
Sample s2 = new Sample(1,0);
Sample s3 = new Sample(0,1);
Sample s4 = new Sample(1,1);
Sample s5 = new Sample(2,1);
Sample s6 = new Sample(1,2);
Sample s7 = new Sample(2,2);
Sample s8 = new Sample(3,2);
Sample s9 = new Sample(6,6);
Sample s10 = new Sample(7,6);
Sample s11 = new Sample(8,6);
Sample s12 = new Sample(6,7);
Sample s13 = new Sample(7,7);
Sample s14 = new Sample(8,7);
Sample s15 = new Sample(9,7);
Sample s16 = new Sample(7,8);
Sample s17 = new Sample(8,8);
Sample s18 = new Sample(9,8);
Sample s19 = new Sample(8,9);
Sample s20 = new Sample(9,9);
unCenterSample.add(s1);
unCenterSample.add(s2);
unCenterSample.add(s3);
unCenterSample.add(s4);
unCenterSample.add(s5);
unCenterSample.add(s6);
unCenterSample.add(s7);
unCenterSample.add(s8);
unCenterSample.add(s9);
unCenterSample.add(s10);
unCenterSample.add(s11);
unCenterSample.add(s12);
unCenterSample.add(s13);
unCenterSample.add(s14);
unCenterSample.add(s15);
unCenterSample.add(s16);
unCenterSample.add(s17);
unCenterSample.add(s18);
unCenterSample.add(s19);
unCenterSample.add(s20);
chooseDefaultCenter(centerNum);
while(!isSame) {
findCluster();
calcNewCenter();
judgeIsSame();
}
}
}