- 源码下载
源码下载
- 源数据,排序之前
3 12
1 23
2 34
32 2
12 12
32 3
1 5
23 9
23 3
1 2
2 44
3 12
4 22
- 排序之后 需要的效果; 这里两列都是 升序排列
IntPair [first=1, second=2]
IntPair [first=1, second=5]
IntPair [first=1, second=23]
IntPair [first=2, second=34]
IntPair [first=2, second=44]
IntPair [first=3, second=12]
IntPair [first=4, second=22]
IntPair [first=12, second=12]
IntPair [first=23, second=3]
IntPair [first=23, second=9]
IntPair [first=32, second=2]
IntPair [first=32, second=3]
- 源码
- SecondSort extends Configured implements
package com.bipt.model.wether;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* 二次排序,对两列数据进行排序
* 封装WritableComparable 进行比较
* 先比较第一列;在比较第二列
* ====数据源比如
3 12
1 23
2 34
32 2
12 12
32 3
*/
public class SecondSort extends Configured implements Tool {
public static class SecondSortMap
extends Mapper<LongWritable, Text, IntPair, NullWritable>{
IntPair k = new IntPair();
@Override
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String[] splits = value.toString().split("\\s+");
int first = Integer.parseInt(splits[0]);
int second = Integer.parseInt(splits[1]);
k.set(first, second);
//NullWritable是Writable的一个特殊类,实现方法为空实现,不从数据流中读数据,
//也不写入数据,只充当占位符,如在MapReduce中,如果你不需要使用键或值,
//你就可以将键或值声明为NullWritable,NullWritable是一个不可变的单实例类型。
//获取空值只能NullWritable.get()来获取
context.write(k, NullWritable.get());
}
}
public static class SecondSortReducer
extends Reducer<IntPair, NullWritable, IntPair, NullWritable> {
@Override
protected void reduce(IntPair key, Iterable<NullWritable> values,
Context context)
throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
@Override
public int run(String[] args) throws Exception {
if(args.length < 2){
System.out.println("usage ... <in>...<out>");
System.err.println("你的输入有误...");
System.exit(2);
}
Configuration conf = getConf();
Job job = Job.getInstance(conf);
job.setJobName("SecondSort");
job.setJarByClass(SecondSort.class);
job.setMapOutputKeyClass(IntPair.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(IntPair.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(SecondSortMap.class);
job.setReducerClass(SecondSortReducer.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1 ;
}
public static void main(String[] args) throws Exception {
System.exit(ToolRunner.run(new SecondSort(), args));
}
}
-
- IntPair implements WritableComparable<IntPair
package com.bipt.model.wether;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
/**
* MapReduce 的二次排序算法
* 自定义IntPair类,将示例数据中的key/value封装成一个整体作为Key,
* 同时实现 WritableComparable 接口并重写其方法。
*/
public class IntPair implements WritableComparable<IntPair> {
int first = 0;// 第一个成员变量
int second = 0;// 第二个成员变量
public IntPair(int first, int second) {
this.first = first;
this.second = second;
}
public void set(int left, int right) {
this.first = left;
this.second = right;
}
public IntPair() {
}
@Override
// 反序列化,从流中的二进制转换成IntPair
public void readFields(DataInput in) throws IOException {
first = in.readInt();
second = in.readInt();
}
@Override
// 序列化,将IntPair转化成使用流传送的二进制
public void write(DataOutput out) throws IOException {
out.writeInt(first);
out.writeInt(second);
}
/*
* Comparable接口的comparaTo方法和上面Comparator接口的compare方法类似,
* 这里的this即上面的o1,o即上面的o2
* //this - o 表示升序; o-this倒叙
*/
@Override
public int compareTo(IntPair o) {
//this - o 表示升序; o-this倒叙
int first = this.first-o.first;
if(first == 0){//第一排相同时候,比较第二排
int second = this.second - o.second;
return second;
}
return first;
}
@Override
public String toString() {
//决定了最后结果输出的形式和内容
return "IntPair [first=" + first + ", second=" + second + "]";
}
//
// @Override
// public int hashCode() {
// return first * 157 + second;
// }
//
// @Override
// public boolean equals(Object right) {
// if (right == null)
// return false;
// if (this == right)
// return true;
// if (right instanceof IntPair) {
// IntPair r = (IntPair) right;
// return r.first == first && r.second == second;
// } else {
// return false;
// }
// }
}
注意
控制着升序还是降序,
还有输出的格式
/*
* Comparable接口的comparaTo方法和上面Comparator接口的compare方法类似,
* 这里的this即上面的o1,o即上面的o2
* //this - o 表示升序; o-this倒叙
*/
@Override
public int compareTo(IntPair o) {
//this - o 表示升序; o-this倒叙
int first = this.first-o.first;
if(first == 0){//第一排相同时候,比较第二排
int second = this.second - o.second;
return second;
}
return first;
}
@Override
public String toString() {
//决定了最后结果输出的形式和内容
return "IntPair [first=" + first + ", second=" + second + "]";
}
-
正序排列
public int compare(int o1, int o2) {
return o1 - o2;
} -
逆序排列
public int compare(int o1, int o2) {
return o2 - o1;
} -
Comparable接口的comparaTo方法和上面Comparator接口的compare方法类似,这里的this即上面的o1,o即上面的o2
@Override
public int compareTo(Test o) {
return this.i - o.i;
}