MapReduce--->自定义OutputFormat
代码
MyOutputFormat
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MyOutputFormat extends FileOutputFormat<Text, NullWritable> {
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
return new MyRecordWriter(taskAttemptContext);
}
}
MyRecordWriter
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
public class MyRecordWriter extends RecordWriter<Text, NullWritable> {
private FSDataOutputStream fsDataOutputStream;
private FSDataOutputStream fsDataOutputStream1;
public MyRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException {
FileSystem fileSystem = FileSystem.get(taskAttemptContext.getConfiguration());
fsDataOutputStream = fileSystem.create(new Path("D:\\OutputFormat\\output_good"));
fsDataOutputStream1 = fileSystem.create(new Path("D:\\OutputFormat\\output_bad"));
}
@Override
public void write(Text text, NullWritable nullWritable) throws IOException, InterruptedException {
String [] words = text.toString().split(" ");
if (Integer.parseInt(words[2])<=2){
fsDataOutputStream.write(text.toString().getBytes());
fsDataOutputStream.write("\n".getBytes());
}else {
fsDataOutputStream1.write(text.toString().getBytes());
fsDataOutputStream1.write("\n".getBytes());
}
}
@Override
public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
fsDataOutputStream.close();
fsDataOutputStream1.close();
}
}
Map
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class MAP extends Mapper<LongWritable, Text,Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value,NullWritable.get());
}
}
Driver
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DRI {
public static void main(String[] args) throws Exception {
Job job = Job.getInstance(new Configuration());
job.setMapperClass(MAP.class);
job.setJarByClass(DRI.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(MyOutputFormat.class);
FileInputFormat.setInputPaths(job, "D:\\OutputFormat\\input");
FileOutputFormat.setOutputPath(job,new Path("D:\\OutputFormat\\output"));
boolean b = job.waitForCompletion(true);
System.exit(b ? 0 : 1);
}
}