单机环境下的K-Means聚类算法运行实例
参考书籍《Mahout in Action》:要资源的可以找我~(中英文都有)
在eclipse平台上实现K-Means实例
代码如下:
package kmeans;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.clustering.kmeans.Kluster;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
public class testkmeans {
public static final double[][] points = {
{2, 4}, {4, 2}, {6, 2}, {5, 3}, {5, 5}, {7, 5},
{5, 15}, {6, 17}, {4, 14}, {5, 13}, {9, 15}, {3, 14}, {7, 13},
{20, 16}, {19, 15}, {17, 15}, {16, 14}, {14, 18}, {22, 10}, {17, 17}, {16, 13}, {18, 14}, {17, 13},
{22, 26}, {24, 23}, {25, 25}, {26, 22}, {26, 26}, {26, 28}, {28, 18}, {28, 28}};
//将已经转换为Vector类型的点存到序列文件中
public static void writePointsToFile(List<Vector> points,
String fileName,
FileSystem fs,
Configuration conf) throws IOException {
Path path = new Path(fileName);
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
path, LongWritable.class, VectorWritable.class);
long recNum = 0;
VectorWritable vec = new VectorWritable();
for (Vector point : points) {
vec.set(point);
writer.append(new LongWritable(recNum&#