MapReduce自定义OutputFormat
数据及需求
- 上面自己随机生成的一些数据,第一个是地点(只要北京、南京和上海三个),第二个是double类型的数据。要求将这个数据按照类型求和,并且分别输出到不同的文件里面,文件名以地点名命名
代码实现
Mapper阶段
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class MapTest extends Mapper<LongWritable, Text, Text, DoubleWritable> {
Text k = new Text();
DoubleWritable v = new DoubleWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] datas = value.toString().split("\t");
k.set(datas[0]);
v.set(Double.parseDouble(datas[1]));
context.write(k, v);
}
}
Reduce阶段
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class RedTest extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {
DoubleWritable v = new DoubleWritable();
double sum = 0;
@Override
protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
for (DoubleWritable value : values) {
sum += value.get();
}
v.set(sum);
context.write(key, v);
sum = 0;
}
}
自定义OutputFormat
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MyOutputFormat extends FileOutputFormat<Text, DoubleWritable> {
@Override
public RecordWriter<Text, DoubleWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
return new MyRecordWriter(job);
}
}
自定义RecordWriter
请注意:这里的IO一定要是通过FileSystem生成的,而不是自己自己生成的
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.*;
public class MyRecordWriter extends RecordWriter<Text, DoubleWritable> {
FileSystem fileSystem = null;
FSDataOutputStream f1 = null;
FSDataOutputStream f2 = null;
FSDataOutputStream f3 = null;
public MyRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException {
fileSystem = FileSystem.get(taskAttemptContext.getConfiguration());
f1 = fileSystem.create(new Path("/MR/city/bj.txt"), false);
f2 = fileSystem.create(new Path("/MR/city/nj.txt"), false);
f3 = fileSystem.create(new Path("/MR/city/sh.txt"), false);
}
@Override
public void write(Text key, DoubleWritable value) throws IOException, InterruptedException {
if ("北京".equals(key.toString())) {
f1.write((key + "\t" + value).getBytes());
f1.flush();
} else if ("南京".equals(key.toString())) {
f2.write((key + "\t" + value).getBytes());
f2.flush();
} else {
f3.write((key + "\t" + value).getBytes());
f3.flush();
}
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
f1.close();
f2.close();
f3.close();
}
}
Driver阶段
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DriTest {
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://192.168.0.155:9000");
Job job = Job.getInstance(conf);
job.setMapperClass(MapTest.class);
job.setReducerClass(RedTest.class);
job.setJarByClass(DriTest.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setOutputFormatClass(MyOutputFormat.class);
Path path = new Path("file:///home/data/mapreduce/data_out/output");
if (path.getFileSystem(conf).exists(path)){
path.getFileSystem(conf).delete(path,true);
}
FileInputFormat.setInputPaths(job, "file:///home/data/mapreduce/data_out/input");
FileOutputFormat.setOutputPath(job,new Path("file:///home/data/mapreduce/data_out/output"));
boolean b = job.waitForCompletion(true);
System.exit(b ? 0 : 1);
}
}
运行测试
打包
将框住的地方改成你自己的主类
上传运行
hadoop jar MP.jar Test01.DriTest
结果