sqoop导出多字符分割

package org.apache.sqoop.mapreduce;

import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.util.*;

/**
 * An {@link OutputFormat} that writes plain text files.
 * Only writes the key. Does not write any delimiter/newline after the key.
 */
public class RawKeyTextOutputFormat<K, V> extends FileOutputFormat<K, V> {

  /**
   * RecordWriter to write to plain text files.
   */
  public static class RawKeyRecordWriter<K, V> extends RecordWriter<K, V> {

    private static final String UTF8 = "UTF-8";

    protected DataOutputStream out;

    public RawKeyRecordWriter(DataOutputStream out) {
      this.out = out;
    }

    /**
     * Write the object to the byte stream, handling Text as a special
     * case.
     * @param o the object to print
     * @throws IOException if the write throws, we pass it on
     */
    private void writeObject(Object o) throws IOException {
      if (o instanceof Text) {
        Text to = (Text) o;
        String s = to.toString();
        to.set(s.replaceAll("\001", "\\|\\|"));
        out.write(to.getBytes(), 0, to.getLength());
      } else {
        out.write(o.toString().getBytes(UTF8));
      }
    }

    public synchronized void write(K key, V value) throws IOException {
      writeObject(key);
    }

    public synchronized void close(TaskAttemptContext context)
        throws IOException {
      out.close();
    }

  }

  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
      throws IOException {
    boolean isCompressed = getCompressOutput(context);
    Configuration conf = context.getConfiguration();
    String ext = "";
    CompressionCodec codec = null;

    if (isCompressed) {
      // create the named codec
      Class<? extends CompressionCodec> codecClass =
        getOutputCompressorClass(context, GzipCodec.class);
      codec = ReflectionUtils.newInstance(codecClass, conf);

      ext = codec.getDefaultExtension();
    }

    Path file = getDefaultWorkFile(context, ext);
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    DataOutputStream ostream = fileOut;

    if (isCompressed) {
      ostream = new DataOutputStream(codec.createOutputStream(fileOut));
    }

    return new RawKeyRecordWriter<K, V>(ostream);
  }

}



package org.apache.sqoop.mapreduce;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.ReflectionUtils;
import com.cloudera.sqoop.lib.SqoopRecord;
import com.cloudera.sqoop.mapreduce.AutoProgressMapper;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * Converts an input record from a string representation to a parsed Sqoop
 * record and emits that DBWritable to the OutputFormat for writeback to the
 * database.
 */
public class TextExportMapper
    extends AutoProgressMapper<LongWritable, Text, SqoopRecord, NullWritable> {

  public static final Log LOG =
    LogFactory.getLog(TextExportMapper.class.getName());

  private SqoopRecord recordImpl;

  public TextExportMapper() {
  }

  protected void setup(Context context)
      throws IOException, InterruptedException {
    super.setup(context);

    Configuration conf = context.getConfiguration();

    // Instantiate a copy of the user's class to hold and parse the record.
    String recordClassName = conf.get(
        ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY);
    if (null == recordClassName) {
      throw new IOException("Export table class name ("
          + ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY
          + ") is not set!");
    }

    try {
      Class cls = Class.forName(recordClassName, true,
          Thread.currentThread().getContextClassLoader());
      recordImpl = (SqoopRecord) ReflectionUtils.newInstance(cls, conf);
    } catch (ClassNotFoundException cnfe) {
      throw new IOException(cnfe);
    }

    if (null == recordImpl) {
      throw new IOException("Could not instantiate object of type "
          + recordClassName);
    }
  }


  public void map(LongWritable key, Text val, Context context)
      throws IOException, InterruptedException {
    try {
    String s = val.toString().replaceAll("\\|\\|", "|");
        val.set(s);
      recordImpl.parse(val);
      context.write(recordImpl, NullWritable.get());
    } catch (Exception e) {
      // Something bad has happened
      LOG.error("");
      LOG.error("Exception raised during data export");
      LOG.error("");

      LOG.error("Exception: ", e);
      LOG.error("On input: " + val);

      InputSplit is = context.getInputSplit();
      if (is instanceof FileSplit) {
        LOG.error("On input file: " + ((FileSplit)is).getPath());
      } else if (is instanceof CombineFileSplit) {
        LOG.error("On input file: "
          + context.getConfiguration().get("map.input.file"));
      }
      LOG.error("At position " + key);

      LOG.error("");
      LOG.error("Currently processing split:");
      LOG.error(is);

      LOG.error("");
      LOG.error("This issue might not necessarily be caused by current input");
      LOG.error("due to the batching nature of export.");
      LOG.error("");

      throw new IOException("Can't export data, please check task tracker logs",
        e);
    }
  }
}



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值