Mapper有setup(),map(),cleanup()和run()四个方法。
1.setup()一般是用来进行一些map()前的准备工作。
2.map()则一般承担主要的处理工作。
3.cleanup()则是收尾工作如关闭文件或者执行map()后的K-V分发等。
4.run()方法提供了setup->map->cleanup()的执行模板。
// // Source code recreated from a .class file by IntelliJ IDEA // (powered by FernFlower decompiler) // package org.apache.hadoop.mapreduce; import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Stable;
@Public @Stable public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> { public Mapper() { } /** * 预处理,仅在map task启动时运行一次 * @param context * @throws IOException * @throws InterruptedException */ protected void setup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException { } /** * 对于InputSplit中的每一对<key, value>都会运行一次 * @param key * @param value * @param context * @throws IOException * @throws InterruptedException */ protected void map(KEYIN key, VALUEIN value, Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException { context.write(key, value); } /** * 扫尾工作,比如关闭流等 * @param context * @throws IOException * @throws InterruptedException */ protected void cleanup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException { } /** * map task的驱动器 * @param context * @throws IOException * @throws InterruptedException */ public void run(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException { this.setup(context); try { while(context.nextKeyValue()) { this.map(context.getCurrentKey(), context.getCurrentValue(), context); } } finally { this.cleanup(context); } } public abstract class Context implements MapContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> { public Context() { } } }