老api写法的wordcount程序

最新推荐文章于 2023-05-01 09:38:59 发布

波哥的技术积累

最新推荐文章于 2023-05-01 09:38:59 发布

阅读量606

点赞数

分类专栏： hadoop 文章标签： hadoop

本文链接：https://blog.csdn.net/woshisap/article/details/42322583

版权

hadoop 专栏收录该内容

29 篇文章 0 订阅

订阅专栏

package com.ccse.hadoop.old;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.FileInputFormat;


/**
 * 老api的写法
 * @author woshiccna
 *
 */
public class WordCountApp {

	public final static String INPUT_PATH = "hdfs://chaoren1:9000/mapinput";
	public final static String OUTPUT_PATH = "hdfs://chaoren1:9000/mapoutput";
	
	public static void main(String[] args) throws IOException, URISyntaxException {
		JobConf conf = new JobConf(WordCountApp.class);
		conf.setJobName("wordcount");
		
		Configuration config = new Configuration();
		FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), config);
		fileSystem.delete(new Path(OUTPUT_PATH), true);
		
		conf.setMapperClass(MyMapper.class);
		conf.setOutputKeyClass(Text.class);
		conf.setOutputValueClass(LongWritable.class);
		conf.setInputFormat(TextInputFormat.class);
		FileInputFormat.setInputPaths(conf, new Path(INPUT_PATH));
		
		conf.setReducerClass(MyReducer.class);
		conf.setOutputKeyClass(Text.class);
		conf.setOutputValueClass(LongWritable.class);
		conf.setOutputFormat(TextOutputFormat.class);
		FileOutputFormat.setOutputPath(conf, new Path(OUTPUT_PATH));
		
		JobClient.runJob(conf);
	}
	
	public static class MyMapper extends MapReduceBase 
	         implements Mapper<LongWritable, Text, Text, LongWritable> {

		private Text word = new Text();
		private LongWritable writable = new LongWritable(1);
		
		@Override
		public void map(LongWritable key, Text value,
				OutputCollector<Text, LongWritable> output, Reporter reporter)
				throws IOException {
			if (value != null) {
				String line = value.toString();
				StringTokenizer tokenizer = new StringTokenizer(line);
				while (tokenizer.hasMoreElements()) {
					word = new Text(tokenizer.nextToken());
				    output.collect(word, writable);
				}
			}
		}
	}
	
	public static class MyReducer extends MapReduceBase implements 
	        Reducer<Text, LongWritable, Text, LongWritable> {

		@Override
		public void reduce(Text key, Iterator<LongWritable> values,
				OutputCollector<Text, LongWritable> output, Reporter reporter)
				throws IOException {
			long sum = 0;
			while (values.hasNext()) {
				LongWritable value = values.next();
				sum += value.get();
			}
			output.collect(key, new LongWritable(sum));
		}
	}

}