package com.hadoop.mr;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCountMapReduce {
//Mapper Class
public static class WordCountMapper extends Mapper
{
// hello world ==> input <0,'hello world'> ===>
private Text outputkey = new Text();
public static final IntWritable outputvalue = new IntWritable(1);f
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
// key 0 value hello world
String lineValue = value.toString();
StringTokenizer st = new StringTokenizer(lineValue); // hello world hdfs
while(st.hasMoreTokens()){
String str = st.nextToken();
outputkey.set(str);
context.write(outputkey, outputvalue);
}
}
}
//Reducer Class key (hello,list(1,5,6))
public static class WordCountReducer extends Reducer
{
private IntWritable outputvalue = new IntWritable();
@Override
protected void reduce(Text key, Iterable
values,Context context) throws IOException, InterruptedException { int sum = 0; for(IntWritable value :values){ // int a = value.get(); // sum = sum+a; sum+=value.get(); } outputvalue.set(sum); context.write(key, outputvalue); } } public static void main(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = Job.getInstance(conf, WordCountMapReduce.class.getSimpleName()); job.setJarByClass(WordCountMapReduce.class); Path inpath = new Path(args[0]); FileInputFormat.addInputPath(job,inpath); Path outpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outpath); job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //3.5 set reducer job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //###选择true 会将mr运行的日志信息打印出来 boolean flag = job.waitForCompletion(true); System.exit(flag?0:1); } }
MapReduce WordCount
最新推荐文章于 2022-09-03 11:34:35 发布