1. Map 和 Reduce 的具体原理就不描述了,这里简单用一个代码的例子做个介绍(Map Reduce的流程)
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
// 这里的IntWritable(1)相当于是对“1”的一种统计方式,当作1就可以
private Text word = new Text();
// Text可以看作是String的子类,看作和String大致相同就可以
public void map(O