sqoop导出多字符分割

最新推荐文章于 2022-05-10 16:21:56 发布

tmac6438

最新推荐文章于 2022-05-10 16:21:56 发布

阅读量1.3k

点赞数

本文链接：https://blog.csdn.net/tmac6438/article/details/16986065

版权

package org.apache.sqoop.mapreduce;

import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.util.*;

/**
* An {@link OutputFormat} that writes plain text files.
* Only writes the key. Does not write any delimiter/newline after the key.
*/
public class RawKeyTextOutputFormat<K, V> extends FileOutputFormat<K, V> {

/**
   * RecordWriter to write to plain text files.
   */
public static class RawKeyRecordWriter<K, V> extends RecordWriter<K, V> {

    private static final String UTF8 = "UTF-8";

    protected DataOutputStream out;

    public RawKeyRecordWriter(DataOutputStream out) {
      this.out = out;
    }

    /**
     * Write the object to the byte stream, handling Text as a special
     * case.
     * @param o the object to print
     * @throws IOException if the write throws, we pass it on
     */
    private void writeObject(Object o) throws IOException {
      if (o instanceof Text) {
        Text to = (Text) o;
        String s = to.toString();
        to.set(s.replaceAll("\001", "\\|\\|"));
        out.write(to.getBytes(), 0, to.getLength());
      } else {
        out.write(o.toString().getBytes(UTF8));
      }
    }

    public synchronized void write(K key, V value) throws IOException {
      writeObject(key);
    }

    public synchronized void close(TaskAttemptContext context)
        throws IOException {
      out.close();
    }

}

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
      throws IOException {
    boolean isCompressed = getCompressOutput(context);
    Configuration conf = context.getConfiguration();
    String ext = "";
    CompressionCodec codec = null;

    if (isCompressed) {
      // create the named codec
      Class<? extends CompressionCodec> codecClass =
        getOutputCompressorClass(context, GzipCodec.class);
      codec = ReflectionUtils.newInstance(codecClass, conf);

      ext = codec.getDefaultExtension();
    }

    Path file = getDefaultWorkFile(context, ext);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    DataOutputStream ostream = fileOut;

    if (isCompressed) {
      ostream = new DataOutputStream(codec.createOutputStream(fileOut));
    }

    return new RawKeyRecordWriter<K, V>(ostream);
}

}

package org.apache.sqoop.mapreduce;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.ReflectionUtils;
import com.cloudera.sqoop.lib.SqoopRecord;
import com.cloudera.sqoop.mapreduce.AutoProgressMapper;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
* Converts an input record from a string representation to a parsed Sqoop
* record and emits that DBWritable to the OutputFormat for writeback to the
* database.
*/
public class TextExportMapper
    extends AutoProgressMapper<LongWritable, Text, SqoopRecord, NullWritable> {

public static final Log LOG =
    LogFactory.getLog(TextExportMapper.class.getName());

private SqoopRecord recordImpl;

public TextExportMapper() {
}

protected void setup(Context context)
      throws IOException, InterruptedException {
    super.setup(context);

    Configuration conf = context.getConfiguration();

    // Instantiate a copy of the user's class to hold and parse the record.
    String recordClassName = conf.get(
        ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY);
    if (null == recordClassName) {
      throw new IOException("Export table class name ("
          + ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY
          + ") is not set!");
    }

    try {
      Class cls = Class.forName(recordClassName, true,
          Thread.currentThread().getContextClassLoader());
      recordImpl = (SqoopRecord) ReflectionUtils.newInstance(cls, conf);
    } catch (ClassNotFoundException cnfe) {
      throw new IOException(cnfe);
    }

    if (null == recordImpl) {
      throw new IOException("Could not instantiate object of type "
          + recordClassName);
    }
}

public void map(LongWritable key, Text val, Context context)
      throws IOException, InterruptedException {
    try {
    String s = val.toString().replaceAll("\\|\\|", "|");
       val.set(s);
      recordImpl.parse(val);
      context.write(recordImpl, NullWritable.get());
    } catch (Exception e) {
      // Something bad has happened
      LOG.error("");
      LOG.error("Exception raised during data export");
      LOG.error("");

      LOG.error("Exception: ", e);
      LOG.error("On input: " + val);

      InputSplit is = context.getInputSplit();
      if (is instanceof FileSplit) {
        LOG.error("On input file: " + ((FileSplit)is).getPath());
      } else if (is instanceof CombineFileSplit) {
        LOG.error("On input file: "
          + context.getConfiguration().get("map.input.file"));
      }
      LOG.error("At position " + key);

      LOG.error("");
      LOG.error("Currently processing split:");
      LOG.error(is);

      LOG.error("");
      LOG.error("This issue might not necessarily be caused by current input");
      LOG.error("due to the batching nature of export.");
      LOG.error("");

      throw new IOException("Can't export data, please check task tracker logs",
        e);
    }
}
}

tmac6438

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
sqoop导出多字符分割

package org.apache.sqoop.mapreduce;import java.io.DataOutputStream;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.
复制链接

扫一扫