mapreduce-java 案例

最新推荐文章于 2024-07-26 02:53:26 发布

呼啦圈521

最新推荐文章于 2024-07-26 02:53:26 发布

阅读量637

点赞数

分类专栏： mapreduce 文章标签： mapreduce

本文链接：https://blog.csdn.net/niuxinzan/article/details/24024925

版权

mapreduce 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

package cn.com.cennavi.test;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;

public class MapReduceUtil {
	public static class Map extends MapReduceBase implements
			Mapper<LongWritable, Text, Text, IntWritable> {
		private final static IntWritable one = new IntWritable(1);
		private Text word = new Text();

		public void map(LongWritable key, Text value,
				OutputCollector<Text, IntWritable> output, Reporter reporter)
				throws IOException {
			String line = value.toString();
			StringTokenizer tokenizer = new StringTokenizer(line);
			while (tokenizer.hasMoreTokens()) {
				word.set(tokenizer.nextToken());
				output.collect(word, one);
			}
		}
	}

	public static class Reduce extends MapReduceBase implements
			Reducer<Text, IntWritable, Text, IntWritable> {
		public void reduce(Text key, Iterator<IntWritable> values,
				OutputCollector<Text, IntWritable> output, Reporter reporter)
				throws IOException {
			int sum = 0;
			while (values.hasNext()) {
				sum += values.next().get();
			}
			output.collect(key, new IntWritable(sum));
		}
	}

	public static void main(String[] args) throws Exception {
		JobConf conf = new JobConf(MapReduceUtil.class);
		conf.setJobName("wordcount");

		conf.setOutputKeyClass(Text.class);
		conf.setOutputValueClass(IntWritable.class);

		conf.setMapperClass(Map.class);
		conf.setCombinerClass(Reduce.class);
		conf.setReducerClass(Reduce.class);

		conf.setInputFormat(TextInputFormat.class);
		conf.setOutputFormat(TextOutputFormat.class);

		FileInputFormat.setInputPaths(conf, new Path(args[0]));
		FileOutputFormat.setOutputPath(conf, new Path(args[1]));

		JobClient.runJob(conf);
	}
}

所用jar包：

hadoop-mapreduce-client-app-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-common-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-core-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-hs-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-hs-plugins-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-jobclient-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-jobclient-2.0.0-cdh4.5.0-tests.jar

hadoop-mapreduce-client-shuffle-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-examples-2.0.0-cdh4.5.0.jar

呼啦圈521

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
mapreduce-java 案例

package cn.com.cennavi.test;import java.io.IOException;import java.util.Iterator;import java.util.StringTokenizer;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWrit
复制链接

扫一扫

专栏目录