import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.util.*;
/**
* An {@link OutputFormat} that writes plain text files.
* Only writes the key. Does not write any delimiter/newline after the key.
*/
public class RawKeyTextOutputFormat<K, V> extends FileOutputFormat<K, V> {
/**
* RecordWriter to write to plain text files.
*/
public static class RawKeyRecordWriter<K, V> extends RecordWriter<K, V> {
private static final String UTF8 = "UTF-8";
protected DataOutputStream out;
public RawKeyRecordWriter(DataOutputStream out) {
this.out = out;
}
/**
* Write the object to the byte stream, handling Text as a special
* case.
* @param o the object to print
* @throws IOException if the write throws, we pass it on
*/
private void writeObject(Object o) throws IOException {
if (o instanceof Text) {
Text to = (Text) o;
String s = to.toString();
to.set(s.replaceAll("\001", "\\|\\|"));
out.write(to.getBytes(), 0, to.getLength());
} else {
out.write(o.toString().getBytes(UTF8));
}
}
public synchronized void write(K key, V value) throws IOException {
writeObject(key);
}
public synchronized void close(TaskAttemptContext context)
throws IOException {
out.close();
}
}
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
throws IOException {
boolean isCompressed = getCompressOutput(context);
Configuration conf = context.getConfiguration();
String ext = "";
CompressionCodec codec = null;
if (isCompressed) {
// create the named codec
Class<? extends CompressionCodec> codecClass =
getOutputCompressorClass(context, GzipCodec.class);
codec = ReflectionUtils.newInstance(codecClass, conf);
ext = codec.getDefaultExtension();
}
Path file = getDefaultWorkFile(context, ext);
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileOut = fs.create(file, false);
DataOutputStream ostream = fileOut;
if (isCompressed) {
ostream = new DataOutputStream(codec.createOutputStream(fileOut));
}
return new RawKeyRecordWriter<K, V>(ostream);
}
}
package org.apache.sqoop.mapreduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.ReflectionUtils;
import com.cloudera.sqoop.lib.SqoopRecord;
import com.cloudera.sqoop.mapreduce.AutoProgressMapper;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Converts an input record from a string representation to a parsed Sqoop
* record and emits that DBWritable to the OutputFormat for writeback to the
* database.
*/
public class TextExportMapper
extends AutoProgressMapper<LongWritable, Text, SqoopRecord, NullWritable> {
public static final Log LOG =
LogFactory.getLog(TextExportMapper.class.getName());
private SqoopRecord recordImpl;
public TextExportMapper() {
}
protected void setup(Context context)
throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
// Instantiate a copy of the user's class to hold and parse the record.
String recordClassName = conf.get(
ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY);
if (null == recordClassName) {
throw new IOException("Export table class name ("
+ ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY
+ ") is not set!");
}
try {
Class cls = Class.forName(recordClassName, true,
Thread.currentThread().getContextClassLoader());
recordImpl = (SqoopRecord) ReflectionUtils.newInstance(cls, conf);
} catch (ClassNotFoundException cnfe) {
throw new IOException(cnfe);
}
if (null == recordImpl) {
throw new IOException("Could not instantiate object of type "
+ recordClassName);
}
}
public void map(LongWritable key, Text val, Context context)
throws IOException, InterruptedException {
try {
String s = val.toString().replaceAll("\\|\\|", "|");
val.set(s);
recordImpl.parse(val);
context.write(recordImpl, NullWritable.get());
} catch (Exception e) {
// Something bad has happened
LOG.error("");
LOG.error("Exception raised during data export");
LOG.error("");
LOG.error("Exception: ", e);
LOG.error("On input: " + val);
InputSplit is = context.getInputSplit();
if (is instanceof FileSplit) {
LOG.error("On input file: " + ((FileSplit)is).getPath());
} else if (is instanceof CombineFileSplit) {
LOG.error("On input file: "
+ context.getConfiguration().get("map.input.file"));
}
LOG.error("At position " + key);
LOG.error("");
LOG.error("Currently processing split:");
LOG.error(is);
LOG.error("");
LOG.error("This issue might not necessarily be caused by current input");
LOG.error("due to the batching nature of export.");
LOG.error("");
throw new IOException("Can't export data, please check task tracker logs",
e);
}
}
}