mapreduce 多output

最新推荐文章于 2020-08-08 21:54:02 发布

tmac6438

最新推荐文章于 2020-08-08 21:54:02 发布

阅读量421

点赞数

本文链接：https://blog.csdn.net/tmac6438/article/details/16985993

版权

import java.io.DataOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class LineRecordWriter<K, V> extends RecordWriter<K, V> {
    private static final String utf8 = "UTF-8";
    private static final byte[] newline;
    static {
        try {
            newline = "\n".getBytes(utf8);
        } catch (UnsupportedEncodingException uee) {
            throw new IllegalArgumentException("can't find " + utf8 + " encoding");
        }
    }
    protected DataOutputStream out;
    private final byte[] keyValueSeparator;
    public LineRecordWriter(DataOutputStream out, String keyValueSeparator) {
        this.out = out;
        try {
            this.keyValueSeparator = keyValueSeparator.getBytes(utf8);
        } catch (UnsupportedEncodingException uee) {
            throw new IllegalArgumentException("can't find " + utf8 + " encoding");
        }
    }
    public LineRecordWriter(DataOutputStream out) {
        this(out, "\t");
    }
    private void writeObject(Object o) throws IOException {
        if (o instanceof Text) {
            Text to = (Text) o;
            out.write(to.getBytes(), 0, to.getLength());
        } else {
            out.write(o.toString().getBytes(utf8));
        }
    }
    public synchronized void write(K key, V value) throws IOException {
        boolean nullKey = key == null || key instanceof NullWritable;
        boolean nullValue = value == null || value instanceof NullWritable;
        if (nullKey && nullValue) {
            return;
        }
       /* if (!nullKey) {
            writeObject(key);
        }
        if (!(nullKey || nullValue)) {
            out.write(keyValueSeparator);
        } */
        if (!nullValue) {
            writeObject(value);
        }
        out.write(newline);
    }
    public synchronized void close(TaskAttemptContext context) throws IOException {
        out.close();
    }

}

import java.io.DataOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.FileAlreadyExistsException;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.ReflectionUtils;
public abstract class MultipleOutputFormat<K extends WritableComparable, V extends Writable>
        extends FileOutputFormat<K, V> {
    private MultiRecordWriter writer = null;
    public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException,
            InterruptedException {
        if (writer == null) {
            writer = new MultiRecordWriter(job, getTaskOutputPath(job));
        }
        return writer;
    }
    private Path getTaskOutputPath(TaskAttemptContext conf) throws IOException {
        Path workPath = null;
        OutputCommitter committer = super.getOutputCommitter(conf);
        if (committer instanceof FileOutputCommitter) {
            workPath = ((FileOutputCommitter) committer).getWorkPath();
        } else {
            Path outputPath = super.getOutputPath(conf);
            if (outputPath == null) {
                throw new IOException("Undefined job output-path");
            }
            workPath = outputPath;
        }
        return workPath;
    }

    protected abstract String generateFileNameForKeyValue(K key, V value, TaskAttemptContext job);//Configuration conf);
    public class MultiRecordWriter extends RecordWriter<K, V> {

        private HashMap<String, RecordWriter<K, V>> recordWriters = null;
        private TaskAttemptContext job = null;

        private Path workPath = null;
        public MultiRecordWriter(TaskAttemptContext job, Path workPath) {
            super();
            this.job = job;
            this.workPath = workPath;
            recordWriters = new HashMap<String, RecordWriter<K, V>>();
        }
        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            Iterator<RecordWriter<K, V>> values = this.recordWriters.values().iterator();
            while (values.hasNext()) {
                values.next().close(context);
            }
            this.recordWriters.clear();
        }
        @Override
        public void write(K key, V value) throws IOException, InterruptedException {

            String baseName = generateFileNameForKeyValue(key, value, job);
            RecordWriter<K, V> rw = this.recordWriters.get(baseName);
            if (rw == null) {
                rw = getBaseRecordWriter(job, baseName);
                this.recordWriters.put(baseName, rw);
            }

            rw.write(key, value);//change
        }

        private RecordWriter<K, V> getBaseRecordWriter(TaskAttemptContext job, String baseName)
                throws IOException, InterruptedException {
            Configuration conf = job.getConfiguration();
            boolean isCompressed = getCompressOutput(job);
            String keyValueSeparator = "***"; //change
            //ods_aos_mps/2013/09/01/part-r-000
           // String pathname=baseName.substring(0, baseName.indexOf("/")); //change
            workPath = new Path(job.getConfiguration().get("mapred.output.dir"));
            RecordWriter<K, V> recordWriter = null;
            if (isCompressed) {
                Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job,
                        GzipCodec.class);

                CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);
                Path file = new Path(workPath+"/"+baseName + codec.getDefaultExtension()); //change
                FSDataOutputStream fileOut = file.getFileSystem(conf).create(file, true);
                recordWriter = new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator);
            } else {
                Path file = new Path(workPath+"/"+baseName); //change
                FSDataOutputStream fileOut = file.getFileSystem(conf).create(file, true);
                recordWriter = new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
            }
            return recordWriter;
        }
    }

    @Override
    public void checkOutputSpecs(JobContext job)
           throws FileAlreadyExistsException, IOException {
       //
    }
}

import java.text.NumberFormat;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskID;

public class MyMutiFile extends MultipleOutputFormat<Text, Text>{
   private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
      static {
        NUMBER_FORMAT.setMinimumIntegerDigits(5);
        NUMBER_FORMAT.setGroupingUsed(false);
      }
   public static String FILENAME ="";
   public static String getFILENAME() {
       return FILENAME;
   }
   public static void setFILENAME(String fILENAME) {
       FILENAME = fILENAME;
   }
   @Override
   protected String generateFileNameForKeyValue(Text key, Text value,
           TaskAttemptContext job) {
          TaskID taskId = job.getTaskAttemptID().getTaskID();
            int partition = taskId.getId();
            StringBuilder result = new StringBuilder();
            result.append(key.toString());
            result.append(job.getConfiguration().get("customTime"));
            result.append("part");
            result.append('-');
            result.append(taskId.isMap() ? 'm' : 'r');
            result.append('-');
            result.append(NUMBER_FORMAT.format(partition));
           // result.append(extension);
            return result.toString();
   }

}

tmac6438

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
mapreduce 多output

package com.autonavi.mutifile.custom;import java.io.DataOutputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import org.apache.hadoop.io.NullWritable; imp
复制链接

扫一扫