实验任务三
K-means算法
1、 系统设计要求
已知近年来亚洲各球队的积分如下表所示:
ps:上图是采集的亚洲15只球队在2006年-2010年间大型比赛的战绩(澳大利亚未收录)。数据做了如下预处理:对于世界杯,进入决赛圈则取其最终排名,没有进入决赛圈的,打入预选赛十强赛赋予40,预选赛小组未出线的赋予50。对于亚洲杯,前四名取其排名,八强赋予5,十六强赋予9,预选赛没出现的赋予17
1、程序设计任务:
实现k-means算法,将上述球队划分成三类球队,并输出每类球队中具体包含哪些球队。
亚洲一流:,………… (:球队名)
亚洲二流:,………… (:球队名)
亚洲三流:,………… (*:球队名)
2、 设计思路与方案
1) 设计思路
- 将图片中的数据初始化后再将数据进行规格化,数据部分存储在一个二维数组中,国家名字存储在一个字符串数组中。
- 初始化三个聚类中心。
- 通过规格化后的数据与三个聚类中心通过公示进行第一次聚类,计算出的数据存储在一个二维数组中,并且通过数据比较对国家进行分流。
- 更新聚类中心,重复步骤3,如果前一次的聚类结果与本次聚类结果相同,则可输出结果。
2) 程序总体框图
3) 程序算法模块
- 数据规格化:
public void normalization(){
float[] max=new float[3];
float[] min={100,100,100};
for(int j=0;j<3;j++){
for(int l=0;l<country.length;l++){
if(max[j]<data[l][j]){
max[j]=data[l][j];
}
if(min[j]>data[l][j]){
min[j]=data[l][j];
}
}
}
for(int i=0;i<country.length;i++){
for(int j=0;j<3;j++){
data[i][j]=(data[i][j]-min[j])/(max[j]-min[j]);
}
}
}
2.初始化聚类中心:
public Center(float[] center1_copy,float[] center2_copy,float[] center3_copy){
for(int i=0;i<3;i++){
center1[i]=center1_copy[i];
center2[i]=center2_copy[i];
center3[i]=center3_copy[i];
//System.out.println(center1[i]+" "+center2[i]+" "+center3[i]);
}
}
3.(1)聚类计算并循环判断:
while(right){
for(int i=0;i<initCountry.length;i++){
double min=1000;
int j=0;
calculate[i][j]=Math.sqrt((dataobject.data[i][j]-center.center1[j])*(dataobject.data[i][j]-center.center1[j])+(dataobject.data[i][j+1]-center.center1[j+1])*(dataobject.data[i][j+1]-center.center1[j+1])+(dataobject.data[i][j+2]-center.center1[j+2])*(dataobject.data[i][j+2]-center.center1[j+2]));
calculate[i][j+1]=Math.sqrt((dataobject.data[i][j]-center.center2[j])*(dataobject.data[i][j]-center.center2[j])+(dataobject.data[i][j+1]-center.center2[j+1])*(dataobject.data[i][j+1]-center.center2[j+1])+(dataobject.data[i][j+2]-center.center2[j+2])*(dataobject.data[i][j+2]-center.center2[j+2]));
calculate[i][j+2]=Math.sqrt((dataobject.data[i][j]-center.center3[j])*(dataobject.data[i][j]-center.center3[j])+(dataobject.data[i][j+1]-center.center3[j+1])*(dataobject.data[i][j+1]-center.center3[j+1])+(dataobject.data[i][j+2]-center.center3[j+2])*(dataobject.data[i][j+2]-center.center3[j+2]));
for(int t=0;t<3;t++){
//System.out.printf("%16f",calculate[i][t]);
if(calculate[i][t]<min){
min=calculate[i][t];
order[i]=t;
}
}
//System.out.printf("\n");
}
right=center.renewCenter(order, dataobject.data);
}
3.(2)聚类中心更新函数:
//该部分较繁琐冗余,可自行优化
public boolean renewCenter(int[] order,float[][] data){
boolean result=false;
int n;
float f_all,s_all,t_all;
for(int i=0;i<order.length;i++){
if(order[i]!=Order_new[i]){
Order_new[i]=order[i];
result=true;
}
}
if(result==false){return result;}
n=0;
f_all=0;
s_all=0;
t_all=0;
for(int j=0;j<15;j++){
if(order[j]==0){
n++;
f_all=data[j][0]+f_all;
s_all=data[j][1]+s_all;
t_all=data[j][2]+t_all;
}
}
center1[0]=f_all/n;
center1[1]=s_all/n;
center1[2]=t_all/n;
n=0;
f_all=0;
s_all=0;
t_all=0;
for(int j=0;j<15;j++){
if(order[j]==1){
n++;
f_all=data[j][0]+f_all;
s_all=data[j][1]+s_all;
t_all=data[j][2]+t_all;
}
}
center2[0]=f_all/n;
center2[1]=s_all/n;
center2[2]=t_all/n;
n=0;
f_all=0;
s_all=0;
t_all=0;
for(int j=0;j<15;j++){
if(order[j]==2){
n++;
f_all=data[j][0]+f_all;
s_all=data[j][1]+s_all;
t_all=data[j][2]+t_all;
}
}
center3[0]=f_all/n;
center3[1]=s_all/n;
center3[2]=t_all/n;
/*for(int k=0;k<3;k++){
System.out.println(center1[k]+" "+center2[k]+" "+center3[k]);
}*/
return result;
}
4)运行结果示例
3.完整代码(Java)
class Kmeans{
public static void main(String[] args){
int[][] initData={{50,50,9},{28,9,4},{17,15,3},{25,40,5},{28,40,2},{50,50,1},{50,40,9},{50,40,9},{40,40,5},{50,50,9},{50,50,5},{50,50,9},{40,40,9},{40,32,17},{50,50,9}};
String[] initCountry={"中国","日本","韩国","伊朗","沙特","伊拉克","卡塔尔","阿联酋","乌兹别克斯坦","泰国","越南","阿曼","巴林","朝鲜","印尼"};
float[] initCenter1=new float[3];
float[] initCenter2=new float[3];
float[] initCenter3=new float[3];
double[][] calculate=new double[initCountry.length][3];
int[] order=new int[15];
boolean right=true;
Data dataobject=new Data(initData,initCountry);
dataobject.normalization();
for(int l=0;l<3;l++){
initCenter1[l]=dataobject.data[1][l];
initCenter2[l]=dataobject.data[13][l];
initCenter3[l]=dataobject.data[9][l];
}
Center center=new Center(initCenter1, initCenter2, initCenter3);
while(right){
for(int i=0;i<initCountry.length;i++){
double min=1000;
int j=0;
calculate[i][j]=Math.sqrt((dataobject.data[i][j]-center.center1[j])*(dataobject.data[i][j]-center.center1[j])+(dataobject.data[i][j+1]-center.center1[j+1])*(dataobject.data[i][j+1]-center.center1[j+1])+(dataobject.data[i][j+2]-center.center1[j+2])*(dataobject.data[i][j+2]-center.center1[j+2]));
calculate[i][j+1]=Math.sqrt((dataobject.data[i][j]-center.center2[j])*(dataobject.data[i][j]-center.center2[j])+(dataobject.data[i][j+1]-center.center2[j+1])*(dataobject.data[i][j+1]-center.center2[j+1])+(dataobject.data[i][j+2]-center.center2[j+2])*(dataobject.data[i][j+2]-center.center2[j+2]));
calculate[i][j+2]=Math.sqrt((dataobject.data[i][j]-center.center3[j])*(dataobject.data[i][j]-center.center3[j])+(dataobject.data[i][j+1]-center.center3[j+1])*(dataobject.data[i][j+1]-center.center3[j+1])+(dataobject.data[i][j+2]-center.center3[j+2])*(dataobject.data[i][j+2]-center.center3[j+2]));
for(int t=0;t<3;t++){
if(calculate[i][t]<min){
min=calculate[i][t];
order[i]=t;
}
}
}
right=center.renewCenter(order, dataobject.data);
}
for(int i=0;i<3;i++){
System.out.println((i+1)+"流:");
for(int j=0;j<15;j++){
if(order[j]==i){
System.out.print(initCountry[j]+" ");
}
}
System.out.printf("\n");
}
/*for(int i=0;i<3;i++){
System.out.println((i+1)+"流:");
for(int j=0;j<15;j++){
if(order[j]==i){
System.out.print(initCountry[j]+" ");
}
}
System.out.printf("\n");
}*/
}
}
class Data{
float[][] data;
String[] country;
private Data(){}
public Data(int[][] data_copy,String[] country_copy){
data=new float[country_copy.length][3];
country=new String[country_copy.length];
for(int i=0;i<country_copy.length;i++){
for(int j=0;j<3;j++){
data[i][j]=data_copy[i][j];
}
country[i]=country_copy[i];
}
}
public void normalization(){
float[] max=new float[3];
float[] min={100,100,100};
for(int j=0;j<3;j++){
for(int l=0;l<country.length;l++){
if(max[j]<data[l][j]){
max[j]=data[l][j];
}
if(min[j]>data[l][j]){
min[j]=data[l][j];
}
}
}
for(int i=0;i<country.length;i++){
for(int j=0;j<3;j++){
data[i][j]=(data[i][j]-min[j])/(max[j]-min[j]);
}
}
/*for(int k=0;k<3;k++){
System.out.println(max[k]+" "+min[k]);
}*/
}
}
class Center{
float[] center1=new float[3];
float[] center2=new float[3];
float[] center3=new float[3];
int[] Order_new=new int[15];
public Center(){}
public Center(float[] center1_copy,float[] center2_copy,float[] center3_copy){
for(int i=0;i<3;i++){
center1[i]=center1_copy[i];
center2[i]=center2_copy[i];
center3[i]=center3_copy[i];
//System.out.println(center1[i]+" "+center2[i]+" "+center3[i]);
}
}
public boolean renewCenter(int[] order,float[][] data){
boolean result=false;
int n;
float f_all,s_all,t_all;
for(int i=0;i<order.length;i++){
if(order[i]!=Order_new[i]){
Order_new[i]=order[i];
result=true;
}
}
if(result==false){return result;}
n=0;
f_all=0;
s_all=0;
t_all=0;
for(int j=0;j<15;j++){
if(order[j]==0){
n++;
f_all=data[j][0]+f_all;
s_all=data[j][1]+s_all;
t_all=data[j][2]+t_all;
}
}
center1[0]=f_all/n;
center1[1]=s_all/n;
center1[2]=t_all/n;
n=0;
f_all=0;
s_all=0;
t_all=0;
for(int j=0;j<15;j++){
if(order[j]==1){
n++;
f_all=data[j][0]+f_all;
s_all=data[j][1]+s_all;
t_all=data[j][2]+t_all;
}
}
center2[0]=f_all/n;
center2[1]=s_all/n;
center2[2]=t_all/n;
n=0;
f_all=0;
s_all=0;
t_all=0;
for(int j=0;j<15;j++){
if(order[j]==2){
n++;
f_all=data[j][0]+f_all;
s_all=data[j][1]+s_all;
t_all=data[j][2]+t_all;
}
}
center3[0]=f_all/n;
center3[1]=s_all/n;
center3[2]=t_all/n;
/*for(int k=0;k<3;k++){
System.out.println(center1[k]+" "+center2[k]+" "+center3[k]);
}*/
return result;
}
}