MapReduct例子

最新推荐文章于 2022-05-10 14:11:09 发布

q269399361

最新推荐文章于 2022-05-10 14:11:09 发布

阅读量624

点赞数

分类专栏： Hadoop学习笔记

本文链接：https://blog.csdn.net/hucblog/article/details/38338197

版权

Hadoop学习笔记专栏收录该内容

10 篇文章 0 订阅

订阅专栏

1.WordCount的实现

Map 端：一行行的读取文件，程序转换为中间的KEY/VALUE

例如：两行

My name is liu gang ->My 1 name 1 is 1 liu 1 gang 1

What is your name -> What 1 is 1 your 1 name 1

Reduce端：

相同的KEY肯定会在一起，经过Reduce方法的出来后形成最终的key/value

My 1 name 2 is 2 liu 1 gang 1 What 1 your 1

下面是WordCount的实现

map类

package com.hadoop.mapreduce.wordcount;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMap extends Mapper<Object, Text, Text, IntWritable>{

	private IntWritable intValue=new IntWritable(1);
	private Text KeyValue=new Text("key");
	@Override
	protected void map(Object key, Text value,
			Context context)
			throws IOException, InterruptedException {
			//获得一行数据
		String lineValue=value.toString();
		//分割数据
		StringTokenizer token=new StringTokenizer(lineValue, " ");
		while (token.hasMoreElements()) {
			KeyValue.set(token.nextToken());
			context.write(KeyValue, intValue);
		}
	}
}

Reduce类

package com.hadoop.mapreduce.wordcount;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{

	private IntWritable intValue=new IntWritable(0);
	@Override
	protected void reduce(Text arg0, Iterable<IntWritable> arg1,
			Context arg2)
			throws IOException, InterruptedException {
		int sum=0;
		while(arg1.iterator().hasNext()) { 
			sum+=arg1.iterator().next().get();
		} 
		intValue.set(sum);
		arg2.write(arg0, intValue);
	}

}

main

package com.hadoop.mapreduce.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WordCount extends Configured implements Tool{

	/**
	 * @param args
	 * @throws Exception 
	 */
	public static void main(String[] args) throws Exception {
		ToolRunner.run(new Configuration(),new WordCount(), args);
		System.exit(0);
	}

	@Override
	public int run(String[] arg0) throws Exception {
		Configuration conf=getConf();
		
		Job job=new Job(conf, "WordCount");
		job.setJarByClass(WordCount.class);
		
		FileInputFormat.addInputPath(job, new Path(arg0[0]));
		FileOutputFormat.setOutputPath(job, new Path(arg0[1]));
		
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		job.setMapperClass(WordCountMap.class);
		job.setReducerClass(WordCountReduce.class);
		
		job.waitForCompletion(true);
		return job.isSuccessful()?0:1;
	}
}