需求:按照上行流量倒序排序(递减排序)
分析:自定义FlowBean,以FlowBean为map输出的key,以手机号作为Map输出的value,因为MapReduce程序会对Map阶段输出的key进行排序。
原始数据:
从左到右分别为
手机号 上行流量 下行流量 上行数据包 下行数据包
13480253104 3 180 3 180
13502468823 57 110349 102 7335
13560439658 33 5892 24 2034
13600217502 37 203704 266 2257
13602846565 15 2910 12 1938
13660577991 24 690 9 6960
13719199419 4 0 0 240
13726230503 24 24681 27 2481
13760778710 2 120 2 120
13823070001 6 180 3 360
13826544101 4 0 0 264
13922314466 12 3720 12 3008
13925057413 69 48243 63 11058
13926251106 4 0 0 240
13926435656 2 1512 4 132
15013685858 28 3538 27 3659
15920133257 20 2936 20 3156
15989002119 3 180 3 1938
18211575961 15 2106 12 1527
18320173382 21 2412 18 9531
19984138413 20 1432 16 4116
Step 1: 定义FlowBean实现WritableComparable实现比较排序
Java 的 compareTo 方法说明:
- compareTo 方法用于将当前对象与方法的参数进行比较。
- 如果指定的数与参数相等返回 0。
- 如果指定的数小于参数返回 -1。
- 如果指定的数大于参数返回 1。
例如:o1.compareTo(o2); 返回正数的话,当前对象(调用 compareTo 方法的对象 o1)要排在比较对象(compareTo 传参对象 o2)后面,返回负数的话,放在前面。如果我们比较的值均为数字则可以直接相减,比较对象减去当前对象,返回正数的话比较对象更大,放在当前对象前。通过这种方式我们的到的即为递减排序。
package org.example.mapreduce.FlowSort;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class FlowSortBean implements WritableComparable <FlowSortBean>{
private Integer upFlow; // 上行数据包数
private Integer downFlow; //下行数据包数
private Integer upCountFlow; //上行流量总和
private Integer downCountFlow; //下行流量总和
public Integer getUpFlow() { return upFlow; }
public void setUpFlow(Integer upFlow) {
this.upFlow = upFlow;
}
public Integer getDownFlow() {
return downFlow;
}
public void setDownFlow(Integer downFlow) {
this.downFlow = downFlow;
}
public Integer getUpCountFlow() {
return upCountFlow;
}
public void setUpCountFlow(Integer upCountFlow) {
this.upCountFlow = upCountFlow;
}
public Integer getDownCountFlow() {
return downCountFlow;
}
public void setDownCountFlow(Integer downCountFlow) {
this.downCountFlow = downCountFlow;
}
@Override
public String toString() {
return upFlow + "\t" + downFlow + "\t" + upCountFlow + "\t" +downCountFlow;
}
//排序
@Override
public int compareTo(FlowSortBean flowSortBean) {
return flowSortBean.upFlow - this.upFlow ;
}
//序列化
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(upFlow);
dataOutput.writeInt(downFlow);
dataOutput.writeInt(upCountFlow);
dataOutput.writeInt(downCountFlow);
}
//反序列化
@Override
public void readFields(DataInput dataInput) throws IOException {
this.upFlow = dataInput.readInt();
this.downFlow = dataInput.readInt();
this.upCountFlow = dataInput.readInt();
this.downCountFlow = dataInput.readInt();
}
}
Step 2: 定义FlowSortMapper类
package org.example.mapreduce.FlowSort;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.example.mapreduce.Flow.FlowBean;
import java.io.IOException;
public class FlowSortMapper extends Mapper<LongWritable, Text,FlowSortBean,Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//K1:偏移量
//V1: 一行字符串,首先对字符串进行切分
String[] split = value.toString().split("\t");
String phone = split[0];
//创建FlowBean对象
FlowSortBean flowSortBean = new FlowSortBean();
//将字符串转换为数字
flowSortBean.setUpFlow(Integer.parseInt(split[1]));
flowSortBean.setDownFlow(Integer.parseInt(split[2]));
flowSortBean.setUpCountFlow(Integer.parseInt(split[3]));
flowSortBean.setDownCountFlow(Integer.parseInt(split[4]));
context.write(flowSortBean,new Text(phone));
}
}
Step 3:自定义FlowReducer类
package org.example.mapreduce.FlowSort;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FlowSortReducer extends Reducer<FlowSortBean,Text,Text,FlowSortBean> {
@Override
protected void reduce(FlowSortBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
//在经历过mapper之后我们的数据 :
// k2 V2
// flowSortBean1 <手机号1,手机号2,手机号3>
// flowSortBean2 <手机号4,手机号5>
//因此我们对Value进行遍历,写入到上下文。
for (Text value:values) {
context.write(value, key);
}
}
}
Step 4: 程序main函数入口FlowSortMain
package org.example.mapreduce.FlowSort;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class FlowSortJobMain extends Configured implements Tool {
@Override
public int run(String[] strings) throws Exception {
//步骤一:创建一个Job任务对象
Job job = Job.getInstance(super.getConf(),"mapreduce_flowSort");
//步骤二:配置JOb任务对象
//1,指定文件的读取方式和读取路径
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("file:///C:\\Myprogram\\IN\\FlowCount"));
//2,指定map阶段的处理方式和数据类型
job.setMapperClass(FlowSortMapper.class);
//设置map阶段K2的数据类型
job.setMapOutputKeyClass(FlowSortBean.class);
//设置map阶段V2的数据类型
job.setMapOutputValueClass(Text.class);
//3,分区 4,排序 5,规约 6,分组
//7,设置reduce阶段的处理方式和数据类型
job.setReducerClass(FlowSortReducer.class);
//设置reduce阶段的K3的数据类型
job.setOutputKeyClass(Text.class);
//设置reduce阶段的V3的数据类型
job.setOutputValueClass(FlowSortBean.class);
//8,设置输出路径
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job,new Path("file:///C:\\Myprogram\\OUT\\flowsort_out5"));
boolean over = job.waitForCompletion(true);
return over ? 0 : 1;
}
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration,new FlowSortJobMain(),args);
System.exit(run);
}
}
完美撒花!