大纲
- 演示实例讲解
- 演示编写MapReduce实例
MapReduce代码
创建 linecount Java 项目
代码如下:
package com.trendwise.java;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class Linecount {
//This is the Mapper Module, i.e. Map.java
public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable obj = new IntWritable(1);
private Text words = new Text("Total Lines are");
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
output.collect(words, obj);
}
}
// This is the Reducer Module, i.e. Reduce.java
public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output,
Reporter reporter) throws IOException {
int sum1 = 0;
while (values.hasNext()) {
sum1 += values.next().get();
}
output.collect(key, new IntWritable(sum1));
}
}
//This is the Driver Module
public static void main(String[] args) throws Exception {
JobConf config = new JobConf(Linecount.class);
config.setJobName("Linecount");
config.setOutputKeyClass(Text.class);
config.setOutputValueClass(IntWritable.class);
config.setMapperClass(Map.class);
config.setCombinerClass(Reduce.class);
config.setReducerClass(Reduce.class);
config.setInputFormat(TextInputFormat.class);
config.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(config, new Path(args[0]));
FileOutputFormat.setOutputPath(config, new Path(args[1]));
JobClient.runJob(config);
}
}
创建Java类,在一个类里面创建MapReduce程序,有利于直观查看代码,不利于java代码的管理,功能多就会混乱,对于简单测试环境可以使用。
eclipse,可以安装MapReduce插件便于本地化来开发测试
http://www.cnblogs.com/baixl/p/4154429.html 来自白大虾博客园hadoop2x版本
http://www.cnblogs.com/edisonchou/p/4297521.html 来自博客园另一位大神 hadoop1x版本
来自网易云课堂