3.驱动类中job.setOutputFormatClass(MyOutputFormat.class);
注意
自定义了OutputFormat,那么在Driver中也要FileOutputFormat.setOutputPath
指定输出路径,因为MapReduce本身还要输出一些额外的文件,如crc文件
相关代码(以WordCount为例)
MyOutputFormat.java
package MapReduceCustomOutputFormat;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MyOutputFormat extends FileOutputFormat<Text, IntWritable> {
@Override
public RecordWriter<Text, IntWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
return new MyRecordWriter(job);
}
}
MyRecordWriter.java
package MapReduceCustomOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
public class MyRecordWriter extends RecordWriter<Text, IntWritable> {
FSDataOutputStream aout;
FSDataOutputStream bout;
public MyRecordWriter(TaskAttemptContext job) throws IOException {
Configuration conf = job.getConfiguration();
FileSystem fs = FileSystem.get(conf);
aout = fs.create(new Path("F:\\Codes\\JavaCodes\\MapReduceLearning\\testdata\\aout.txt"));
bout = fs.create(new Path("F:\\Codes\\JavaCodes\\MapReduceLearning\\testdata\\bout.txt"));
}
@Override
public void write(Text key, IntWritable value) throws IOException {
if(key.toString().contains("guapi") || key.toString().contains("chengzi")){
aout.write((key.toString() + "\t" + value.get() + "\n").getBytes());
}
else{
bout.write((key.toString() + "\t" + value.get() + "\n").getBytes());
}
}
@Override
public void close(TaskAttemptContext context) throws IOException {
aout.close();
bout.close();
}
}