如果有下面的数,我们想按照每行两个数的乘积的大小来进行排序,就得如下操作:
1 2
1 1
3 2
2 2
5 1
先自定义排序的key,注意实现 WritableComparable这个接口:
package cn.edu.bjut.model;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class DataSortable implements WritableComparable<Object> {
private long first, second;
public DataSortable() {
super();
}
public DataSortable(String first, String second) {
super();
this.first = Long.parseLong(first);
this.second = Long.parseLong(second);
}
@Override
public void readFields(DataInput in) throws IOException {
first = in.readLong();
second = in.readLong();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(first);
out.writeLong(second);
}
@Override
public int compareTo(Object o) {
DataSortable data = (DataSortable) o;
if(this.first * this.second > data.getFirst() * data.getSecond())
return 1;
if(this.first * this.second < data.getFirst() * data.getSecond())
return -1;
return 0;
}
public long getFirst() {
return first;
}
public void setFirst(long first) {
this.first = first;
}
public long getSecond() {
return second;
}
public void setSecond(long second) {
this.second = second;
}
}
然后主方法里面这么写:
package cn.edu.bjut.model;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class NumSort {
static final String INPUT_DIR = "hdfs://172.21.15.189:9000/input";
static final String OUTPUT_DIR = "hdfs://172.21.15.189:9000/output";
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Path path = new Path(OUTPUT_DIR);
FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_DIR), conf);
if(fileSystem.exists(path)) {
fileSystem.delete(path, true);
}
Job job = new Job(conf, "NumSort");
FileInputFormat.setInputPaths(job, INPUT_DIR); //设置输入路径
FileOutputFormat.setOutputPath(job, path); //设置输出路径
job.setJarByClass(DataSortable.class);
job.setMapperClass(MyMapper.class); //设置自定义的mapper类
job.setMapOutputKeyClass(DataSortable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(MyReducer.class); //设置自定义的reduce类
job.setOutputKeyClass(LongWritable.class); //设置输出的key的类型
job.setOutputValueClass(LongWritable.class); //设置输出的value类型
job.waitForCompletion(true); //开始执行
}
/**
* 自定义的map类
* @author Gary
*
*/
static class MyMapper extends Mapper<LongWritable, Text, DataSortable, NullWritable> {
@Override
protected void map(
LongWritable key,
Text value,
Mapper<LongWritable, Text, DataSortable, NullWritable>.Context context)
throws IOException, InterruptedException {
String[] nums = value.toString().split(" ");
DataSortable dataSortable = new DataSortable(nums[0], nums[1]);
context.write(dataSortable, NullWritable.get());
}
}
/**
* 自定义的reduce类
* @author Gary
*
*/
static class MyReducer extends Reducer<DataSortable, NullWritable, LongWritable, LongWritable> {
@Override
protected void reduce(
DataSortable key,
Iterable<NullWritable> value,
Reducer<DataSortable, NullWritable, LongWritable, LongWritable>.Context context)
throws IOException, InterruptedException {
context.write(new LongWritable(key.getFirst()), new LongWritable(key.getSecond()));
}
}
}