import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.lib.MultipleOutputFormat;
import org.apache.hadoop.util.Progressable;
import org.w3c.dom.Text;
public class MultipleOutputFormatTest extends MultipleOutputFormat<Text, IntWritable>{
protected String generateFileNameForKeyValue(Text key, IntWritable value, Configuration conf) {
char c = key.toString().toLowerCase().charAt(0);
if(c >= 'a' && c <= 'z'){
return c + ".txt";
}
return "other.txt";
}
@Override
protected RecordWriter<Text, IntWritable> getBaseRecordWriter(
FileSystem fs, JobConf job, String name, Progressable arg3)
throws IOException {
// TODO Auto-generated method stub
return null;
}
}
在教程当中只需要重写generateFileNameForKeyValue
就能达到分文件的效果 但是在实践当中
还需要重写另一个方法 getBaseRecordWriter
还没有清楚其功能 先写着先
conf.setOutputFormat() //通过这个设定我们输出格式
本文出自 “某人说我技术宅” 博客,请务必保留此出处http://1992mrwang.blog.51cto.com/3265935/1206459