MapReduce 运行Kmeans代码

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DefaultStringifier;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.MultipleOutputFormat;
import org.apache.hadoop.mapred.lib.MultipleOutputs;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.Progressable;

class Help {

	static final boolean DEBUG = false;

	public static void debug(Object o, String s) {
		if (DEBUG) {
			System.out.println(s + ":" + o.toString());
		}
	}

	public static List<ArrayList<Double>> getOldCenters(String inputPath) {
		List<ArrayList<Double>> result = new ArrayList<ArrayList<Double>>();
		Configuration conf = new Configuration();
		try {
			FileSystem hdfs = FileSystem.get(conf);
			Path inPath = new Path(inputPath);
			FSDataInputStream fsIn = hdfs.open(inPath);
			LineReader lineIn = new LineReader(fsIn, conf);
			Text line = new Text();
			while (lineIn.readLine(line) > 0) {

				String record = line.toString();
				String[] fields = record.split(",");
				List<Double> tmpList = new ArrayList<Double>();
				for (int i = 0; i < fields.length; i++)
					tmpList.add(Double.parseDouble(fields[i]));
				result.add((ArrayList<Double>) tmpList);
			}
			fsIn.close();
		} catch (IOException e) {

			e.printStackTrace();
		}

		return result;
	}

	public static void deleteLastResult(String path) {
		Configuration conf = new Configuration();
		try {
			FileSystem hdfs = FileSystem.get(conf);
			Path inPath = new Path(path);
			hdfs.delete(inPath);
		} catch (IOException e) {

		}
	}

	public static void copyOriginalCenters(String src, String dst) {
		Configuration conf = new Configuration();
		try {
			FileSystem hdfs = FileSystem.get(conf);
			hdfs.copyFromLocalFile(new Path(src), new Path(dst));
		} catch (IOException e) {

		}
	}

	public static boolean isFinished(String oldPath, String newPath,
			String KPath, String dtBegIdxPath, double threshold)
			throws IOException {

		int dataBeginIndex = Integer.parseInt(dtBegIdxPath);
		int K = Integer.parseInt(KPath);
		List<ArrayList<Double>> oldCenters = Help.getOldCenters(oldPath);
		List<ArrayList<Double>> newCenters = new ArrayList<ArrayList<Double>>();
		Configuration conf = new Configuration();
		FileSystem hdfs = FileSystem.get(conf);

		for (int t = 0; t < K; t++) {
			Path inPath = new Path(newPath + t);
			if (!hdfs.exists(inPath))
				break;
			FSDataInputStream fsIn = hdfs.open(inPath);
			LineReader lineIn = new LineReader(fsIn, conf);
			Text line = new Text();
			while (lineIn.readLine(line) > 0) {
				String tmp = line.toString();
				Help.debug("tmp", tmp);
				
				if(tmp.length()<5)//处理在集群上出现的key与value不在一行的情况
				{
					lineIn.readLine(line);
					tmp = line.toString();
					String []fields = tmp.split(",");
					List<Double> tmpList = new ArrayList<Double>();
					for (int i = 0; i < fields.length; i++)
						tmpList.add(Double.parseDouble(fields[i]));
					newCenters.add((ArrayList<Double>) tmpList);
					continue;
				}
					
				String[] tmpLine = tmp.split("	");
				Help.debug(tmpLine[1].toString(), tmpLine.toString());
				String record = tmpLine[1];
				String[] fields = record.split(",");
				List<Double> tmpList = new ArrayList<Double>();
				for (int i = 0; i < fields.length; i++)
					tmpList.add(Double.parseDouble(fields[i]));
				newCenters.add((ArrayList<Double>) tmpList);
			}
			fsIn.close();
		}

		// System.out.println("oldCenter size:"+oldCenters.size()+"\nnewCenters size:"+newCenters.size());

		double distance = 0;
		for (int i = 0; i < K; i++) {
			for (int j = dataBeginIndex; j < oldCenters.get(0).size(); j++) {
				double t1 = Math.abs(oldCenters.get(i).get(j));
				double t2 = Math.abs(newCenters.get(i).get(j));
				distance += Math.pow((t1 - t2) / (t1 + t2), 2);
			}
		}
		if (distance <= threshold) {
			return true;
		}

		Help.deleteLastResult(oldPath);
		FSDataOutputStream os = hdfs.create(new Path(oldPath));

		for (int i = 0; i < newCenters.size(); i++) {
			String text = "";
			for (int j = 0; j < newCenters.get(i).size(); j++) {
				if (j == 0)
					text += newCenters.get(i).get(j);
				else
					text += "," + newCenters.get(i).get(j);
			}
			text += "\n";
			os.write(text.getBytes(), 0, text.length());
		}
		os.close();
		// ///
		return false;
	}
}

public class Kmeans {

	// static List<ArrayList<Double>> centers ;
	// static int K;
	// static int dataBeginIndex;

	public static class KmeansMapper extends
			Mapper<Object, Text, IntWritable, Text> {

		public void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {

			String line = value.toString();
			String[] fields = line.split(",");

			List<ArrayList<Double>> centers = Help.getOldCenters(context
					.getConfiguration().get("centersPath"));
			int dataBeginIndex = Integer.parseInt(context.getConfiguration()
					.get("dtBegIdxPath"));
			int K = Integer.parseInt(context.getConfiguration().get("KPath"));

			double minDistance = 99999999;
			int centerIndex = K;
			for (int i = 0; i < K; i++) {
				double currentDistance = 0;
				for (int j = dataBeginIndex; j < fields.length; j++) {
					double t1 = Math.abs(centers.get(i).get(j));
					double t2 = Math.abs(Double.parseDouble(fields[j]));
					currentDistance += Math
  • 5
    点赞
  • 24
    收藏
    觉得还不错? 一键收藏
  • 19
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 19
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值