多输入问题(多种格式数据的输入处理)

在hadoop中,可以处理多种输入格式的文件。如下给出的例子是同时输入文本和二进制文件。

Mapper
public class WCSeqMapper extends Mapper<IntWritable, Text, Text, IntWritable> {

    @Override
    protected void map(IntWritable key, Text value, Context context) throws IOException, InterruptedException {

        Text keyOut = new Text();
        IntWritable valueOut = new IntWritable();
        String[] arr = value.toString().split(" ");
        for (String s : arr) {
            keyOut.set(s);
            valueOut.set(1);
            context.write(keyOut, valueOut);
        }
    }
}

public class WCTextMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        Text keyOut = new Text();
        IntWritable valueOut = new IntWritable();
        String[] arr = value.toString().split(" ");
        for (String s : arr) {
            keyOut.set(s);
            valueOut.set(1);
            context.write(keyOut, valueOut);
        }
    }
}
Reducer
public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

    /**
     * reduce
     */
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        int count = 0;
        for (IntWritable iw : values) {
            count = count + iw.get();
        }
        //输出
        context.write(key, new IntWritable(count));
    }
}
主函数
/**
 * 多输入问题
 * > 文本格式输入
 * > 二进制格式文件输入
 *
 */
public class WCAppMulti {

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        conf.set("fs.defaultFS", "file:///");

        Job job = Job.getInstance(conf);


        //设置job的各种属性
        job.setJobName("WCAppMulti");           //作业名称
        job.setJarByClass(WCAppMulti.class);    //搜索类
        job.setInputFormatClass(TextInputFormat.class);  //设置输入格式

        //多个输入
        MultipleInputs.addInputPath(job, new Path("file:///F:/hadoop/mr/txt"), TextInputFormat.class, WCTextMapper.class);
        MultipleInputs.addInputPath(job, new Path("file:///F:/hadoop/mr/seq"), SequenceFileInputFormat.class, WCSeqMapper.class);

        //设置输出
        FileOutputFormat.setOutputPath(job, new Path(args[0]));

        //设置最大切片数
        FileInputFormat.setMaxInputSplitSize(job, 10);
        //设置最小切片数
        FileInputFormat.setMinInputSplitSize(job, 1);

        job.setReducerClass(WCReducer.class); //reducer类

        job.setNumReduceTasks(3);             //reduce个数

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.waitForCompletion(true);
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值