基本的K-Means算法的Java实现

一、基本K均值算法

1:选择K个点作为初始质心 

2:repeat  

    2.1:将每个点指派到最近的质心,形成K个簇  

    2.2:重新计算每个簇的质心  

3:until 簇不发生变化或达到最大迭代次数  


二、数据集介绍

Iris也称鸢尾花卉数据集,是一类多重变量分析的数据集。通过花萼长度,花萼宽度,花瓣长度,花瓣宽度4个属性预测鸢尾花卉属于(Setosa,Versicolour,Virginica)三个种类中的哪一类。
原数据集下载地址: http://archive.ics.uci.edu/ml/
本文使用的数据集txt文件,可在附件中下载。

三、实现

1. Data类
//package javatruple;
package kmeans;
public class Data {
	
	/* (non-Javadoc)
	 * @see java.lang.Object#toString()
	 */
	@Override
	public String toString() {
		return "Data [index=" + index + ", first=" + first + ", second=" + second + ", third=" + third + ", forth="
				+ forth + "]";
	}
	int index;
	double first;
	double second;
	double third;
	double forth;
	
	public Data(int index0,Double first0,Double second0,Double third0,Double forth0){
		this.index=index0;
		this.first=first0;
		this.second=second0;
		this.third=third0;
		this.forth=forth0;
	}
	public int getindex(){
		return index;
	} 
	public double getfirst(){
		return first;
	}
	public double getsecond(){
		return second;
	}
	public double getthird(){
		return third;
	}
	public double getforth(){
		return forth;
	}

}

2. KM类
//package javatruple;
package kmeans;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Iterator;
import java.util.Random;
import java.lang.Math;

import java.util.Vector;

public class KM {
	/**
	 * 功能:Java读取txt文件的内容 步骤:1:先获得文件句柄 2:获得文件句柄当做是输入一个字节码流,需要对这个输入流进行读取
	 * 3:读取到输入流后,需要读取生成字节流 4:一行一行的输出。readline()。 备注:需要考虑的是异常情况
	 * 
	 * @param filePath
	 */
	public static Vector<Data> Iris= new Vector();
	public static int k=3;
	public static Data[] means=new Data[k];
	public static double oldSSE=(double)10;
	public static double newSSE=(double)0;

	public static void readTxtFile(String filePath) {
		/*
		 * try { String encoding="GBK"; File file=new File(filePath);
		 * if(file.isFile() && file.exists()){ //判断文件是否存在 InputStreamReader read
		 * = new InputStreamReader( new
		 * FileInputStream(file),encoding);//考虑到编码格式 BufferedReader
		 * bufferedReader = new BufferedReader(read); String lineTxt = null;
		 * while((lineTxt = bufferedReader.readLine()) != null){
		 * System.out.println(lineTxt); } read.close(); }else{
		 * System.out.println("找不到指定的文件"); } } catch (Exception e) {
		 * System.out.println("读取文件内容出错"); e.printStackTrace(); } }
		 */

		try {
			String encoding = "GBK";
			File file = new File(filePath);
			if (file.isFile() && file.exists()) { // 判断文件是否存在
				InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);// 考虑到编码格式
				BufferedReader bufferedReader = new BufferedReader(read);
				String lineTxt = null;
				int index = 0;
				while ((lineTxt = bufferedReader.readLine()) != null) {
					index++;
					Double[] dt = {(double)0,(double) 0,(double) 0,(double) 0};
					String[] tmp = lineTxt.split(",");
					for (int i = 0; i < 4; i++ ) {
						Double a = Double.parseDouble(tmp[i]);
						dt[i]=a;
					}
					Data temp= new Data(index, dt[0], dt[1], dt[2], dt[3]);
					Iris.addElement(temp);
					//Iris.addElement(temp);
					//System.out.println(lineTxt);
				}
				read.close();
			} else {
				System.out.println("找不到指定的文件");
			}
		} catch (Exception e) {
			System.out.println("读取文件内容出错");
			e.printStackTrace();
		}

	}
	public static void KMeans(){
		Vector<Data> cluster[]=new Vector[k];
		cluster[0]=new Vector();
		cluster[1]=new Vector();
		cluster[2]=new Vector();
		
		Random random = new Random();
		for(int i=0;i<k;i++){
			int rand=random.nextInt(150);
			means[i]=Iris.get(rand);
		}
		while (Math.pow((newSSE - oldSSE), 2) >= 1) {
			cluster[0].clear();
			cluster[1].clear();
			cluster[2].clear();
			oldSSE=newSSE;
			newSSE=(double)0;
			double test1=oldSSE;
			Iterator it = Iris.iterator();
			Data particle;
			while (it.hasNext()) {
				int label = 0;
				particle = (Data) it.next();
				label = mark(particle);
				cluster[label].addElement(particle);
			}
			computeCentroid(cluster);
		}
		
		for(int i=0;i<k;i++){
			System.out.println("第"+(i+1)+"簇:");
			Iterator ii=cluster[i].iterator();
			while(ii.hasNext()){
				System.out.println((Data)ii.next());
			}
		}
		System.out.println(oldSSE);
		
	}
	public static int mark(Data particle0){
		int label = 0;
		double distance=(double)1000000;
		double temp=(double) 0;
		for(int i=0;i<k;i++){
			double sub1=particle0.getfirst()-means[i].getfirst();
			double sub2=particle0.getsecond()-means[i].getsecond();
			double sub3=particle0.getthird()-means[i].getthird();
			double sub4=particle0.getforth()-means[i].getforth();
			double test1=Math.pow(sub1,2);
			double test2=Math.pow(sub2,2);
			double test3=Math.pow(sub3,2);
			double test4=Math.pow(sub4,2);
			temp=(double)(Math.pow(sub1,2)+Math.pow(sub2,2)+Math.pow(sub3,2)+Math.pow(sub4,2));	
			if(temp<distance){
				distance=temp;
				label=i;
			}
		}
		newSSE=newSSE+distance;
		return label;
	}
	
	public static void computeCentroid(Vector<Data>[] cluster0){
		for(int i=0;i<k;i++){
			double meanfirst=(double)0;
			double meansecond=(double)0;
			double meanthird=(double)0;
			double meanforth=(double)0;
			Iterator ii=cluster0[i].iterator();
			Data temp;
			int size=cluster0[i].size();
			while(ii.hasNext()){
				temp=(Data)ii.next();
				meanfirst=(double)(meanfirst+(double)temp.getfirst()/size);
				meansecond=(double)(meansecond+(double)temp.getsecond()/size);
				meanthird=(double)(meanthird+(double)temp.getthird()/size);
				meanforth=(double)(meanforth+(double)temp.getforth()/size);
			}
			means[i]=new Data(0,meanfirst,meansecond,meanthird,meanforth);
		}
	}
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String filePath = "C:\\Users\\Xing\\Desktop\\123.txt";
		readTxtFile(filePath);
		KMeans();
	}
}

四、实验结果展示






  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值