鉴于平时的hadoop实践问题或粗读hadoop源码而记录形成”hadoop初读”系列部分。每篇主要以问题-解答两部分组成:问题是实践中的异常或疑惑;解答部分为源码的粗读理解(以注释形式给出说明)。
问题代码(主要实现多输入,报No input paths specified in job):
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ReduceSideJoin extends Configured implements Tool {
private static final Logger logger = LoggerFactory
.getLogger(ReduceSideJoin.class);
public static class LeftOutJoinReducer extends
Reducer<IntWritable, OrderGoodsInfoWritble, IntWritable, OrderGoodsInfoWritble> {
private ArrayList<OrderGoodsInfoWritble> leftTable = new ArrayList&l