- import java.io.BufferedReader;
- import java.io.FileNotFoundException;
- import java.io.FileReader;
- import java.io.IOException;
- public class KAverage {
- private int sampleCount = 0;
- private int dimensionCount = 0;
- private int centerCount = 0;
- private double[][] sampleValues;
- private double[][] centers;
- private double[][] tmpCenters;
- private String dataFile = "";
- /**
- * 通过构造器传人数据文件
- */
- public KAverage(String dataFile) throws NumberInvalieException {
- this.dataFile = dataFile;
- }
- /**
- * 第一行为s;d;c含义分别为样例的数目,每个样例特征的维数,聚类中心个数 文件格式为d[,d]...;d[,d]... 如:1,2;2,3;1,5
- * 每一维之间用,隔开,每个样例间用;隔开。结尾没有';' 可以有多行
- */
- private int initData(String fileName) {
- String line;
- String samplesValue[];
- String dimensionsValue[] = new String[dimensionCount];
- BufferedReader in;
- try {
- in = new BufferedReader(new FileReader(fileName));
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- return -1;
- }
- /*
- * 预处理样本,允许后面几维为0时,不写入文件
- */
- for (int i = 0; i < sampleCount; i++) {
- for (int j = 0; j < dimensionCount; j++) {
- sampleValues[i][j] = 0;
- }
- }
- int i = 0;
- double tmpValue = 0.0;
- try {
- line = in.readLine();
- String params[] = line.split(";");
- if (params.length != 3) { // 必须为3个参数,否则错误
- return -1;
- }
- /**
- * 获取参数
- */
- this.sampleCount = Integer.parseInt(params[0]);
- this.dimensionCount = Integer.parseInt(params[1]);
- this.centerCount = Integer.parseInt(params[2]);
- if (sampleCount <= 0 || dimensionCount <= 0 || centerCount <= 0) {
- throw new NumberInvalieException("input number <= 0.");
- }
- if (sampleCount < centerCount) {
- throw new NumberInvalieException(
- "sample number < center number");
- }
- sampleValues = new double[sampleCount][dimensionCount + 1];
- centers = new double[centerCount][dimensionCount];
- tmpCenters = new double[centerCount][dimensionCount];
- while ((line = in.readLine()) !=