MapReduce自定义文件输出名

最新推荐文章于 2023-05-09 20:44:07 发布

小强签名设计

最新推荐文章于 2023-05-09 20:44:07 发布

阅读量1.5k

点赞数 1

分类专栏： hadoop 文章标签： MapReduce 自定义输出文件名

本文链接：https://blog.csdn.net/m0_37739193/article/details/102616376

版权

hadoop 专栏收录该内容

20 篇文章 4 订阅

订阅专栏

文章目录

前言

MapReduce默认情况下，一个reducer产生一个文件，以name-r-nnnnn来命名，其中默认的name为part，nnnnn从(00000开始递增)，保证了每个reducer不会产生重复的文件。

一、仅替代文件名part，输出结果为score-r-00000

1.使用org.apache.hadoop.mapreduce.lib.output.MultipleOutputs类
2.MultipleOutputs类需要在Reduce的setup()方法初始化，最好在cleanup()中关闭
3.这个时候还会生产成part-r-000000这种文件，发现是里面是空的，需要LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

代码样例：

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;

/**
 * Created by HuiQ on 2019-10-16.
 */
public class WordCount {

    public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>{
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
        @Override
        public void map(Object key,Text value,Context context) throws IOException, InterruptedException {
            String[] words = value.toString().split(" ");
            for (String str: words){
                word.set(str);
                context.write(word,one);
            }
        }
    }

    public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {

        private MultipleOutputs<Text, IntWritable> multipleOutputs;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            multipleOutputs = new MultipleOutputs<Text, IntWritable>(context);
        }

        @Override
        public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
            int total=0;
            for (IntWritable val : values){
                total++;
            }
            // 自定义输出文件名
            multipleOutputs.write(key, new IntWritable(total), "score");
        }

        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            multipleOutputs.close();
        }
    }

    public static void main (String[] args) throws Exception{
        Configuration conf = new Configuration();

        Job job = new Job(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        // 去掉临时输出目录会生成part-r-00000或者part-m-00000的空文件
        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); // 注意：想全部自定义文件名这行一定不能有，否则最终生成的还是part-r-00000
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path("/huiqiang/output"));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

二、要想全部自定义文件名，需要重写RecordWriter

自定义reducer类输出是通过重写FileOutputFormat类和RecordWriter类实现的。具体操作是通过重写RecordWriter类中的write方法，然后通过FileOutFormat类返回一个RecordWriter对象。

代码样例：

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * Created by HuiQ on 2019-10-16.
 */
public class WordCount {

    public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>{
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
        @Override
        public void map(Object key,Text value,Context context) throws IOException, InterruptedException {
            String[] words = value.toString().split(" ");
            for (String str: words){
                word.set(str);
                context.write(word,one);
            }
        }
    }

    public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
    
        @Override
        public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
            int total=0;
            for (IntWritable val : values){
                total++;
            }
            context.write(key, new IntWritable(total));
        }
    }

    // 注意：1.必须要把static关键字加上 2.FileOutputFormat<Text,IntWritable>中的数据类型一定要和reduce端<Text,IntWritable>输出对应上
    public static class MyFileOutputFormat extends FileOutputFormat<Text,IntWritable>{
        @Override
        public RecordWriter<Text, IntWritable> getRecordWriter(TaskAttemptContext job)throws IOException, InterruptedException {

            FileSystem fileSystem=FileSystem.newInstance(job.getConfiguration());
            //自定义的输出路径
            final FSDataOutputStream title=fileSystem.create(new Path("/huiqiang/output/test.txt"));
            RecordWriter<Text,IntWritable> recordWriter=new RecordWriter<Text, IntWritable>() {

                @Override
                public void close(TaskAttemptContext arg0) throws IOException,
                        InterruptedException {
                    if(title!=null){
                        title.close();
                    }
                }

                @Override
                public void write(Text key, IntWritable value) throws IOException,
                        InterruptedException {
                    String fenGe=" ";
                    String charSet="UTF-8";
                    System.out.println("key="+key.toString());
                    //输出key
                    title.write(key.toString().getBytes(charSet),0,key.toString().getBytes(charSet).length);
                    //输出key和value的分隔符
                    title.write(fenGe.getBytes(charSet),0,fenGe.getBytes(charSet).length);
                    //输出value
                    title.write(value.toString().getBytes(charSet),0,value.toString().getBytes(charSet).length);
                    title.write("\n".getBytes(charSet),0,"\n".getBytes(charSet).length);
                    title.flush();
                }
            };
            return recordWriter;
        }
    }

    public static void main (String[] args) throws Exception{
        Configuration conf = new Configuration();

        Job job = new Job(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        job.setOutputFormatClass(MyFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        MyFileOutputFormat.setOutputPath(job, new Path("/huiqiang/output"));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

参考：MapReduce重写FileInputFormat和FileOutputFormat

三、补充：Hadoop之HDFS的FileSystem接口

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
 * @author: huiq
 * @createTime: 2021/9/15 18:06
 * @description:
 */
public class classOperatingFiles {

    static Configuration conf = new Configuration();
    static FileSystem hdfs;
    static {
//        String path ="C:\\Users\\9\\Desktop\\hadoop-2.6.0\\etc\\hadoop";
//        conf.addResource(new Path(path + "core-site.xml"));
//        conf.addResource(new Path(path + "hdfs-site.xml"));
//        conf.addResource(new Path(path + "mapred-site.xml"));
        conf.set("fs.defaultFS", "hdfs://bigdatanode01:8020/");
        try {
            hdfs =FileSystem.get(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void createDir(String dir)throws IOException {
        Path path = new Path(dir);
        hdfs.mkdirs(path);
        System.out.println("newdir \t" + conf.get("fs.default.name") + dir);
    }


    public void copyFile(String localSrc,String hdfsDst) throws IOException{
        Path src = new Path(localSrc);
        Path dst = new Path(hdfsDst);
        hdfs.copyFromLocalFile(src,dst);

        //list all the files in thecurrent direction
        FileStatus files[] =hdfs.listStatus(dst);
        System.out.println("Uploadto \t" + conf.get("fs.default.name") + hdfsDst);
        for (FileStatus file : files){
            System.out.println(file.getPath());
        }
    }

    public void createFile(String fileName,String fileContent) throws IOException {
        Path dst = new Path(fileName);
        byte[] bytes =fileContent.getBytes();
        FSDataOutputStream output =hdfs.create(dst);
        output.write(bytes);
        System.out.println("newfile \t" + conf.get("fs.default.name") + fileName);
    }


    public void listFiles(String dirName)throws IOException {
        Path f = new Path(dirName);
        FileStatus[] status =hdfs.listStatus(f);
        System.out.println(dirName +" has all files:");
        for (int i = 0; i<status.length; i++) {
            System.out.println(status[i].getPath().toString());
        }
    }


    public void deleteFile(String fileName)throws IOException {
        Path f = new Path(fileName);
        boolean isExists =hdfs.exists(f);
        if (isExists) {      //if exists, delete
            boolean isDel =hdfs.delete(f,true);
            System.out.println(fileName+ "  delete? \t" + isDel);
        } else {
            System.out.println(fileName+ "  exist? \t" + isExists);
        }
    }

    public static void main(String[] args)throws IOException {
        classOperatingFiles ofs = new classOperatingFiles();
        System.out.println("\n=======createdir=======");
        String dir ="/huiq";
        ofs.createDir(dir);
//        System.out.println("\n=======copyfile=======");
//        String src ="/home/ictclas/Configure.xml";
//        ofs.copyFile(src, dir);
        System.out.println("\n=======createa file=======");
        String fileContent ="Hello, world! Just a test.";
        ofs.createFile(dir+"/word.txt",fileContent);
    }
}