hadoop 中的另一种定制的排序手段就是二次排序(对value进行排序)
二次排序步骤如下
准备工作
对一个文本中数据进行排序,找出每年的最高气温
public void makeData() throws IOException {
FileWriter fw = new FileWriter("F:/hadoop/temp.txt");
for (int i = 0; i < 6000; i++) {
int year = 1970 + new Random().nextInt(100);
int temp = -30 + new Random().nextInt(100);
fw.write("" + year + " " + temp + "\r\n");
}
fw.close();
}
1. 自定义组合key(将key和value组合在一起)
/**
* 自定义组合key
*/
public class ComboKey implements WritableComparable<ComboKey> {
private int year;
private int temp;
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public int getTemp() {
return temp;
}
public void setTemp(int temp) {
this.temp = temp;
}
/**
* 对key进行比较实现
* > 年份升序排列,气温降序排列
*/
@Override
public int compareTo(ComboKey o) {
int y0 = o.getYear();
int t0 = o.getTemp();
//年份相同(升序)
if (year == y0) {
//气温降序
return -(temp - t0);
} else {
return year - y0;
}
}
/**
* 串行化过程
*/
@Override
public void write(DataOutput out) throws IOException {
//年份
out.writeInt(year);
//气温
out.writeInt(temp);
}
@Override
public void readFields(DataInput in) throws IOException {
year = in.readInt();
temp = in.readInt();
}
}
2. 自定义分区类,按照年份进行分区
/**
* map端执行:
* 自定义分区类
* > 将年份相同的分到同一个区
*/
public class YearPartitioner extends Partitioner<ComboKey, NullWritable> {
@Override
public int getPartition(ComboKey key, NullWritable nullWritable, int numPartitions) {
int year = key.getYear();
return year % numPartitions;
}
}
3. 定义分组对比器,按照年份进行分组
/**
* 自定义对比器
* > 按照年份进行分组的对比器实现
*/
public class YearGroupComparator extends WritableComparator {
protected YearGroupComparator() {
super(ComboKey.class, true);
}
public int compare(WritableComparable a, WritableComparable b) {
ComboKey k1 = (ComboKey)a ;
ComboKey k2 = (ComboKey)b ;
return k1.getYear() - k2.getYear() ;
}
}
4. 定义key排序对比器
/**
* 排序对比器
*/
public class ComboKeyComparator extends WritableComparator {
protected ComboKeyComparator() {
super(ComboKey.class, true);
}
public int compare(WritableComparable a, WritableComparable b) {
ComboKey k1 = (ComboKey) a;
ComboKey k2 = (ComboKey) b;
return k1.compareTo(k2);
}
}
5. Mapper
public class MaxTempMapper extends Mapper<LongWritable, Text, ComboKey, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] arr = line.split(" ");
ComboKey keyOut = new ComboKey();
keyOut.setYear(Integer.parseInt(arr[0]));
keyOut.setTemp(Integer.parseInt(arr[1]));
context.write(keyOut,NullWritable.get());
}
}
6. Reducer
public class MaxTempReducer extends Reducer<ComboKey, NullWritable, IntWritable, IntWritable> {
@Override
protected void reduce(ComboKey key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
int year = key.getYear();
int temp = key.getTemp();
System.out.println("==============>reduce");
for(NullWritable v : values){
System.out.println(key.getYear() + " : " + key.getTemp());
}
context.write(new IntWritable(year),new IntWritable(temp));
}
}
7. 主函数
public class MaxTempApp {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "file:///");
Job job = Job.getInstance(conf);
//设置job的各种属性
job.setJobName("SecondarySortApp");
job.setJarByClass(MaxTempApp.class);
job.setInputFormatClass(TextInputFormat.class); //设置输入格式
//添加输入路径
FileInputFormat.addInputPath(job, new Path(args[0]));
//设置输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTempMapper.class); //Mapper类
job.setReducerClass(MaxTempReducer.class); //Reducer类
//从map输出类型
job.setMapOutputKeyClass(ComboKey.class);
job.setMapOutputValueClass(NullWritable.class);
//从reduce输出类型
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
//设置分区类
job.setPartitionerClass(YearPartitioner.class);
//设置分组对比器
job.setGroupingComparatorClass(YearGroupComparator.class);
//设置排序对比器
job.setSortComparatorClass(ComboKeyComparator.class);
//设置reduce的个数
job.setNumReduceTasks(3);
job.waitForCompletion(true);
}
}