在上一文章中的统计手机用户流量的基础之上再进行排序。
若要进行排序则须implements WritableComparable。
map和reduce阶段进行排序时,比较的是k2。v2是不参与排序比较的。如果要想让v2也进行排序,需要把k2和v2组装成新的类,作为k2,才能参与比较。
分组时也是按照k2进行比较的。
示例代码:
package com.zz.hadoop.dc.mr;
import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.zz.hadoop.dc.factory.Factory;
import com.zz.hadoop.dc.po.DataInfo;
public class DataSort {
public static void main(String[] args) throws IllegalArgumentException,
IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DataSort.class);
job.setMapperClass(SortMapper.class);
job.setMapOutputKeyClass(DataInfo.class);
job.setMapOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
job.setReducerClass(SortReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DataInfo.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setPartitionerClass(SortPartitioner.class);
job.setNumReduceTasks(Integer.parseInt(args[2]));
job.waitForCompletion(true);
}
/**
* 排序是由框架完成的, 要排序谁就把谁作为k2, 然后框架根据对象中定义的排序规则排序
*
* 当排序对象时, 可不用v2值时, 把v2置为NullWtitable
*/
public class SortMapper
extends Mapper<LongWritable, Text, DataInfo, NullWritable> {
private DataInfo k2 = new DataInfo();
@Override
protected void map(LongWritable k1, Text v1, Context context)
throws IOException, InterruptedException {
String line = v1.toString();
String[] fields = line.split("\t");
k2.set(fields[1], Long.parseLong(fields[8]),
Long.parseLong(fields[9]));
context.write(k2, NullWritable.get());
}
}
// Reduce
public class SortReduce
extends Reducer<DataInfo, NullWritable, Text, DataInfo> {
private Text k3 = new Text();
@Override
protected void reduce(DataInfo k2, Iterable<NullWritable> v2,
Context context) throws IOException, InterruptedException {
this.k3.set(k2.getTel());
context.write(k3, k2);
}
}
// 将排序好的数据进行分区存放
public static class SortPartitioner extends Partitioner<Text, DataInfo> {
/** 分区 */
private static Map<String, Integer> provider = Factory.getMap();
// 模拟移动、联通、电信分段号对应的分区码
static {
provider.put("138", 1);
provider.put("139", 1);
provider.put("155", 2);
provider.put("156", 2);
provider.put("180", 3);
provider.put("181", 3);
}
@Override
public int getPartition(Text k2, DataInfo v2, int numPartition) {
String sub = k2.toString().substring(0, 3);
Integer code = provider.get(sub);
if (null == code) {
code = 0;
}
return code;
}
}
}
bean
package com.zz.hadoop.dc.po;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class DataInfo implements WritableComparable<DataInfo> {
private String tel;
private long upPayLoad;
private long downPayLoad;
private long totalPayLoad;
public void set(String tel, long upPayLoad, long downPayLoad) {
this.tel = tel;
this.upPayLoad = upPayLoad;
this.downPayLoad = downPayLoad;
this.totalPayLoad = upPayLoad + downPayLoad;
}
// ... get/set
@Override
public void readFields(DataInput in) throws IOException {
this.tel = in.readUTF();
this.upPayLoad = in.readLong();
this.downPayLoad = in.readLong();
this.totalPayLoad = in.readLong();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(this.tel);
out.writeLong(this.upPayLoad);
out.writeLong(this.downPayLoad);
out.writeLong(this.totalPayLoad);
}
/**
* 按照总流量排序, 若总流量相等按照上行流量排序
*
* 当两个对象进行比较时:(this.attr > o.attr)
* 返回值 =0 代表它们相等;
* 返回值 <0 代表this排在被比较对象之前;
* 返回值 >0 代表this排在被比较对象之后
*/
@Override
public int compareTo(DataInfo o) {
// 处理特殊情况
if (this.totalPayLoad == o.getTotalPayLoad()) {
return this.getUpPayLoad() > o.upPayLoad ? -1 : -1;
} else {
return this.getTotalPayLoad() > o.getTotalPayLoad() ? -1 : 1;
}
}
}