mapreduce编程练习（一）简单的练习 WordCount

最新推荐文章于 2023-10-02 01:04:25 发布

junzhou134

最新推荐文章于 2023-10-02 01:04:25 发布

阅读量1.3k

点赞数

分类专栏： hadoop 文章标签： hadoop maprdeuce入门简单

本文链接：https://blog.csdn.net/m0_37138008/article/details/72519096

版权

hadoop 专栏收录该内容

14 篇文章 0 订阅

订阅专栏

入门训练：WordCount

问题描述：对一个或多个输入文件中的单词进行计数统计，比如一个文件的输入文件如下

输出格式：

运行代码实例：

package hadoopLearn;

import java.io.IOException;
import java.net.URI;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.P;


public class WordCount extends Configured implements Tool {
	
	private static double count = 0; 
	
	public static class CountMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
		private Text word = new Text();
		private LongWritable one = new LongWritable(1);
		
		@Override
		protected void map(LongWritable key,Text value,Mapper<LongWritable, Text, Text, LongWritable>.Context context)
				throws IOException,InterruptedException{
					System.out.println("line pos:" + key.toString());
					String line = value.toString();
					StringTokenizer tokenizer = new StringTokenizer(line);
					while (tokenizer.hasMoreElements()) {
						count ++;
						word.set(tokenizer.nextToken());
						context.write(word, one);
					}
				}
	}
	
	public static class CountReducer extends Reducer<Text, LongWritable, Text, DoubleWritable>{
		private DoubleWritable result = new DoubleWritable();

		@Override
		protected void reduce(Text key, Iterable<LongWritable> values,
				Reducer<Text, LongWritable, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
			int sum = 0;			
			for(LongWritable v : values){
				sum += v.get();
			}
			result.set(sum);
			context.write(key, result);
		}
	}
	
	static FileSystem fs = null;
	static Configuration conf=null;
	public static void init() throws Exception{
		//读取classpath下的xxx-site.xml 配置文件，并解析其内容，封装到conf对象中
		conf = new Configuration();
		//也可以在代码中对conf中的配置信息进行手动设置，会覆盖掉配置文件中的读取的值
		conf.set("fs.defaultFS", "hdfs://192.168.41.136:9000/");
		//根据配置信息，去获取一个具体文件系统的客户端操作实例对象
		 fs = FileSystem.get(new URI("hdfs://192.168.41.136:9000/"),conf,"hadoop");
	}
	
	public int run(String[] args) throws Exception {
		Job job = Job.getInstance(getConf(),"WordCount");
		job.setJarByClass(WordCount.class);
		job.setMapperClass(CountMapper.class);	
		job.setReducerClass(CountReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);
		Path in = new Path("/WordCount/input");
		if(fs.exists(in)){
			FileInputFormat.addInputPath(job, in);
		}else{
			System.out.println("输入文件不存在！");
		}
		Path os = new Path("/WordCount/output");
		int flage = 0;
		if(fs.exists(os)){
			System.out.println("输出文件已经存在！重新新建路径！");
			 fs.delete(os, true);  
			 FileOutputFormat.setOutputPath(job, os);
			 flage = job.waitForCompletion(false) ? 0:1;
		}else{
			FileOutputFormat.setOutputPath(job, os);
			flage = job.waitForCompletion(false) ? 0:1;
		}
		return  flage;
	}

	public static void main(String[] args) throws Exception {
		init();
		int res = ToolRunner.run(new WordCount(), args);
		System.exit(res);
	}
}