OutputFormat类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MyOutputFormat extends FileOutputFormat<Text,NullWritable> {
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
FileSystem fileSystem = FileSystem.get(conf);
//好评输出流
FSDataOutputStream fsDataOutputStreamGood = fileSystem.create(new Path("将结果1写入那个文件"));
//中差评输出流
FSDataOutputStream fsDataOutputStreamBad = fileSystem.create(new Path("将结果2写入那个文件"));
MyRW myRW = new MyRW(fsDataOutputStreamGood,fsDataOutputStreamBad);
return myRW;
}
}
RecordWriter类
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
public class MyRW extends RecordWriter<Text, NullWritable> {
FSDataOutputStream fsDataOutputStreamGood=null;
FSDataOutputStream fsDataOutputStreamBad=null;
@Override
public void write(Text text, NullWritable nullWritable) throws IOException, InterruptedException {
String[] split = text.toString().split("\t");
if (split[9].equals("0")){
fsDataOutputStreamGood.write(text.toString().getBytes());
fsDataOutputStreamGood.write("\r\n".getBytes());
}else{
fsDataOutputStreamBad.write(text.toString().getBytes());
fsDataOutputStreamBad.write("\r\n".getBytes());
}
}
@Override
public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
if (fsDataOutputStreamGood!=null){
fsDataOutputStreamGood.close();
}
if (fsDataOutputStreamBad!=null){
fsDataOutputStreamBad.close();
}
}
public MyRW(FSDataOutputStream fsDataOutputStreamGood,FSDataOutputStream fsDataOutputStreamBad) {
this.fsDataOutputStreamGood=fsDataOutputStreamGood;
this.fsDataOutputStreamBad=fsDataOutputStreamBad;
}
public MyRW() {
}
}
map类
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class MyMap extends Mapper<LongWritable, Text,Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value,NullWritable.get());
}
}
驱动类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
public class MyDriver {
public static void main(String[] args) throws Exception {
Configuration conf= new Configuration();
Job job= Job.getInstance(conf);
job.setJarByClass(MyDriver.class);
job.setMapperClass(MyMap.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("C:\\Users\\Administrator\\Desktop\\aaa"));
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputFormatClass(MyOutputFormat.class);
MyOutputFormat.setOutputPath(job,new Path("C:\\Users\\Administrator\\Desktop\\output"));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}