1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
import
java.io.IOException;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.fs.FileSystem;
import
org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.mapred.JobConf;
import
org.apache.hadoop.mapred.RecordWriter;
import
org.apache.hadoop.mapred.lib.MultipleOutputFormat;
import
org.apache.hadoop.util.Progressable;
import
org.w3c.dom.Text;
public
class
MultipleOutputFormatTest
extends
MultipleOutputFormat<Text, IntWritable>{
protected
String generateFileNameForKeyValue(Text key, IntWritable value, Configuration conf) {
char
c = key.toString().toLowerCase().charAt(
0
);
if
(c >=
'a'
&& c <=
'z'
){
return
c +
".txt"
;
}
return
"other.txt"
;
}
@Override
protected
RecordWriter<Text, IntWritable> getBaseRecordWriter(
FileSystem fs, JobConf job, String name, Progressable arg3)
throws
IOException {
// TODO Auto-generated method stub
return
null
;
}
}
|
在教程当中只需要重写generateFileNameForKeyValue
就能达到分文件的效果 但是在实践当中
还需要重写另一个方法 getBaseRecordWriter
还没有清楚其功能 先写着先
conf.setOutputFormat() //通过这个设定我们输出格式
本文转自 拖鞋崽 51CTO博客,原文链接:http://blog.51cto.com/1992mrwang/1206459