自定义一个OutputFormat,用于输出<Text , MapWritable >格式的数据
MapWritable的内容是 [Text : LongWritable ]
输出格式 [url url2 :times2 ,url3 :times3 ,...]
参考TextOutputFormat,修改简化之
public class TextAndMapWritableOutputFormat extends
FileOutputFormat<Text, MapWritable> {
@Override
public RecordWriter<Text, MapWritable> getRecordWriter(FileSystem ignored,
JobConf job, String name, Progressable progress) throws IOException {
Path file = FileOutputFormat.getTaskOutputPath(job, name);
FileSystem fs = file.getFileSystem(job);
FSDataOutputStream fileOut = fs.create(file, progress);
return new TextAndMapWritableRecordWriter(fileOut);
}
protected static class TextAndMapWritableRecordWriter implements RecordWriter<Text, MapWritable> {
private static final String utf8 = "UTF-8";
private static final byte[] newline;
private static final byte[] keyValueSeparator;
private static final byte[] colon;
private static final byte[] comma;
static {
try {
newline = "\n".getBytes(utf8);
keyValueSeparator = "\t".getBytes(utf8);
colon = ":".getBytes(utf8);
comma = ",".getBytes(utf8);
} catch (UnsupportedEncodingException uee) {
throw new IllegalArgumentException("can't find " + utf8
+ " encoding");
}
}
protected DataOutputStream out;
public TextAndMapWritableRecordWriter(DataOutputStream out) {
this.out = out;
}
@Override
public synchronized void write(Text key, MapWritable value)
throws IOException {
out.write(key.getBytes(), 0, key.getLength());
out.write(keyValueSeparator);
Iterator<Writable> it = value.keySet().iterator();
while (it.hasNext()) {
Writable k = it.next();
LongWritable v = (LongWritable) value.get(k);
out.write(((Text) k).getBytes());
out.write(colon);
out.write(v.toString().getBytes(utf8));
out.write(comma);
}
out.write(newline);
}
@Override
public synchronized void close(Reporter reporter) throws IOException {
out.close();
}
}
}