手写MR之自定义输出

前言

  • 为了实现控制最终文件的输出路径和输出格式,可以通过自定义outputFormat来进行实现。
  • 但是由于MR机制问题,驱动类里必须声明一个输出目录,而且必须是不存在的!否则在校验阶段无法提交job!

需求

读取一个txt文件,将每行包含zhengkw的合并到一个文件中,剩下的行合并到另一个文件中!

Mapper

package com.zhengkw.outputformat;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @ClassName:OutTxtMapper
 * @author: zhengkw
 * @description:
 * @date: 20/02/27下午 4:05
 * @version:1.0
 * @since: jdk 1.8
 */
public class OutTxtMapper extends Mapper<LongWritable, Text, NullWritable, Text> {
    // Text line = new Text();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {


        context.write(NullWritable.get(), value);
    }
}

Reducer

package com.zhengkw.outputformat;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @ClassName:OutTxtReducer
 * @author: zhengkw
 * @description:
 * @date: 20/02/27下午 4:05
 * @version:1.0
 * @since: jdk 1.8
 */
public class OutTxtReducer extends Reducer<NullWritable, Text, Text, NullWritable> {

    Text k = new Text();

    @Override
    protected void reduce(NullWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        for (Text value : values) {
            k.set(value + "\r\n");
            context.write(k, NullWritable.get());
        }
    }


}

OutTxtOutputFormat

package com.zhengkw.outputformat;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @ClassName:OutTxtOutputFormat
 * @author: zhengkw
 * @description:
 * @date: 20/02/27下午 4:16
 * @version:1.0
 * @since: jdk 1.8
 */
public class OutTxtOutputFormat extends FileOutputFormat<Text, NullWritable> {
    @Override
    public RecordWriter getRecordWriter(TaskAttemptContext Context) throws IOException, InterruptedException {
        OutTxtRecordWriter otrw = new OutTxtRecordWriter(Context);

        return otrw;
    }
}

OutTxtRecordWriter

package com.zhengkw.outputformat;

import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import java.io.IOException;

import static java.lang.System.out;


/**
 * @ClassName:OutTxtRecordWriter
 * @author: zhengkw
 * @description:
 * @date: 20/02/27下午 4:17
 * @version:1.0
 * @since: jdk 1.8
 */
public class OutTxtRecordWriter extends RecordWriter<Text, NullWritable> {
    FileSystem fs = null;
    Path zhengkw = new Path("f:/outputFormat/zhengkw.log");
    Path other = new Path("f:/outputFormat/other.log");
    FSDataOutputStream ofsz = null;
    FSDataOutputStream ofso = null;

    public OutTxtRecordWriter() {
        super();
    }

    public OutTxtRecordWriter(TaskAttemptContext context) {
        this();
        //获取上下文中的conf信息创建一个文件系统对象
        try {
            fs = FileSystem.get(context.getConfiguration());

            //打开输出流将内容分类写到2个路径下
            ofsz = fs.create(zhengkw);
            ofso = fs.create(other);


        } catch (IOException e) {
            e.printStackTrace();
            out.println("没有找到配置信息");
        }
    }

    @Override
    public void write(Text text, NullWritable nullWritable) throws IOException, InterruptedException {


        String contents = text.toString();
        if (contents.contains(".zhengkw.")) {
            ofsz.write(contents.getBytes());
        } else {
            ofso.write(contents.getBytes());
        }

    }

    @Override
    public void close(TaskAttemptContext Context) throws IOException, InterruptedException {
        IOUtils.closeStream(ofsz);
        IOUtils.closeStream(ofso);
    }
}

驱动类

package com.zhengkw.outputformat;


import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @ClassName:OutTxtFormat
 * @author: zhengkw
 * @description:
 * @date: 20/02/27下午 5:10
 * @version:1.0
 * @since: jdk 1.8
 */
public class OutTxtFormatDriver {
    public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {
        // 输入路径
        Path inputPath = new Path("F:\\mrinput\\outputformat");
        // 输出路径
        Path outputPath = new Path("f:/nothing");

        Configuration conf = new Configuration();

        //判断输出路径是否已经存在 存在则删除
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(outputPath)) {
            fs.delete(outputPath, true);
        }


        //用配置文件反射实例化job对象
        Job job = Job.getInstance(conf);

        // 2 设置jar加载路径
        job.setJarByClass(OutTxtFormatDriver.class);

        // 3 设置map和reduce类
        job.setMapperClass(OutTxtMapper.class);
        job.setReducerClass(OutTxtReducer.class);

        // 4 设置map输出
        job.setMapOutputValueClass(Text.class);
        job.setMapOutputKeyClass(NullWritable.class);

        // 5 设置最终输出kv类型
        //  job.setOutputKeyClass(Text.class);
        //job.setOutputValueClass(IntWritable.class);

        job.setOutputFormatClass(OutTxtOutputFormat.class);

        // 6 设置输入和输出路径
        FileInputFormat.setInputPaths(job, inputPath);
        //必须设置 job提交时会检验文件夹是否存在,不存在则不会提交job
        FileOutputFormat.setOutputPath(job, outputPath);

        // 7 提交
        boolean result = job.waitForCompletion(true);

        System.exit(result ? 0 : 1);
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值