在用户总流量分析中我们成功得到了数据如下:
13480253104 180 180 360
13502468823 7335 110349 117684
13560439658 2034 5892 7926
13600217502 1080 186852 187932
13602846565 1938 2910 4848
13660577991 6960 690 7650
13719199419 240 0 240
13726230503 2481 24681 27162
13760778710 120 120 240
13823070001 360 180 540
13826544101 264 0 264
13922314466 3008 3720 6728
13925057413 11058 48243 59301
13926251106 240 0 240
13926435656 132 1512 1644
15013685858 3659 3538 7197
15920133257 3156 2936 6092
15989002119 1938 180 2118
18211575961 1527 2106 3633
18320173382 9531 2412 11943
84138413 4116 1432 5548
接下来我们按照总流量(最后一列)倒序排序:
package cn.itcast.hadoop.flowsum.sort;
import cn.itcast.hadoop.flowsum.Flowbean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class FlowSumSort {
public static class FlowSumSortmapper extends Mapper<LongWritable, Text, Flowbean,Text>{
Text v = new Text();
Flowbean k = new Flowbean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] fields = line.split("\t");
String phoneNum = fields[0];
long upFlow = Long.parseLong(fields[1]);
long downFlow = Long.parseLong(fields[2]);
k.set(upFlow,downFlow);
v.set(phoneNum);
context.write(k,v);
}
}
public static class FlowSumSortReducer extends Reducer<Flowbean,Text,Text,Flowbean>{
@Override
protected void reduce(Flowbean arg0, Iterable<Text> arg1, Context arg2) throws IOException, InterruptedException {
arg2.write(arg1.iterator().next(),arg0);
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
conf.set("mapreduce.framework.name","local");
Job job = Job.getInstance(conf);
//指定这个job所在的jar包位置
job.setJarByClass(FlowSumSort.class);
//指定使用的Mapper和Reducer是哪个类
job.setMapperClass(FlowSumSortmapper.class);
job.setReducerClass(FlowSumSortReducer.class);
//设置业务逻辑Mapper类的输出kv数据类型
job.setMapOutputKeyClass(Flowbean.class);
job.setMapOutputValueClass(Text.class);
//设置业务逻辑Reducer类的输出kv数据类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Flowbean.class);
FileInputFormat.setInputPaths(job,"E:\\Flowsum\\output");
FileOutputFormat.setOutputPath(job,new Path("E:\\Flowsum\\outputsort"));
//向yarn集群提交job
boolean res = job.waitForCompletion(true);
System.exit(res?0:1);
}
}
在Flowbean类中的compareTo方法自定义实现倒序排序。
代码中Mapper思路
:
将Flowbean类型(总流量)作为key,Text类型(手机号)为value原因是按照key进行排序。
Reduce思路
:由于从mapper传过来的value(手机号)本来就是唯一的,所以迭代器中值只有一个。只需对调kv位置。
arg2.write(arg1.iterator().next(),arg0);
最终输出kv为<Text(手机号),Flowbean(总流量) >
运行结果: