自定义一个OutputFormat,用于输出<Text, MapWritable>格式的数据
MapWritable的内容是 [Text: LongWritable]
输出格式 [url url2:times2,url3:times3,...]
参考TextOutputFormat,修改简化之
- public class TextAndMapWritableOutputFormat extends
- FileOutputFormat<Text, MapWritable> {
- @Override
- public RecordWriter<Text, MapWritable> getRecordWriter(FileSystem ignored,
- JobConf job, String name, Progressable progress) throws IOException {
- Path file = FileOutputFormat.getTaskOutputPath(job, name);
- FileSystem fs = file.getFileSystem(job);
- FSDataOutputStream fileOut = fs.create(file, progress);
- return new TextAndMapWritableRecordWriter(fileOut);
- }
- protected static class TextAndMapWritableRecordWriter implements RecordWriter<Text, MapWritable> {
- private static final String utf8 = "UTF-8";
- private static final byte[] newline;
- private static final byte[] keyValueSeparator;
- private static final byte[] colon;
- private static final byte[] comma;
- static {
- try {
- newline = "\n".getBytes(utf8);
- keyValueSeparator = "\t".getBytes(utf8);
- colon = ":".getBytes(utf8);
- comma = ",".getBytes(utf8);
- } catch (UnsupportedEncodingException uee) {
- throw new IllegalArgumentException("can't find " + utf8
- + " encoding");
- }
- }
- protected DataOutputStream out;
- public TextAndMapWritableRecordWriter(DataOutputStream out) {
- this.out = out;
- }
- @Override
- public synchronized void write(Text key, MapWritable value)
- throws IOException {
- out.write(key.getBytes(), 0, key.getLength());
- out.write(keyValueSeparator);
- Iterator<Writable> it = value.keySet().iterator();
- while (it.hasNext()) {
- Writable k = it.next();
- LongWritable v = (LongWritable) value.get(k);
- out.write(((Text) k).getBytes());
- out.write(colon);
- out.write(v.toString().getBytes(utf8));
- out.write(comma);
- }
- out.write(newline);
- }
- @Override
- public synchronized void close(Reporter reporter) throws IOException {
- out.close();
- }
- }
- }