MapReduce案例-上行流量倒序排序（递减排序）

最新推荐文章于 2023-03-19 15:30:45 发布

@李思成

最新推荐文章于 2023-03-19 15:30:45 发布

阅读量367

点赞数 1

分类专栏： Hadoop实操文章标签： mapreduce hadoop 大数据

本文链接：https://blog.csdn.net/weixin_46560570/article/details/113444672

版权

Hadoop实操专栏收录该内容

10 篇文章 3 订阅

订阅专栏

需求：按照上行流量倒序排序（递减排序）
分析：自定义FlowBean,以FlowBean为map输出的key，以手机号作为Map输出的value，因为MapReduce程序会对Map阶段输出的key进行排序。
原始数据：
从左到右分别为
手机号上行流量下行流量上行数据包下行数据包

13480253104	3	180	3	180
13502468823	57	110349	102	7335
13560439658	33	5892	24	2034
13600217502	37	203704	266	2257
13602846565	15	2910	12	1938
13660577991	24	690	9	6960
13719199419	4	0	0	240
13726230503	24	24681	27	2481
13760778710	2	120	2	120
13823070001	6	180	3	360
13826544101	4	0	0	264
13922314466	12	3720	12	3008
13925057413	69	48243	63	11058
13926251106	4	0	0	240
13926435656	2	1512	4	132
15013685858	28	3538	27	3659
15920133257	20	2936	20	3156
15989002119	3	180	3	1938
18211575961	15	2106	12	1527
18320173382	21	2412	18	9531
19984138413	20	1432	16	4116

Step 1: 定义FlowBean实现WritableComparable实现比较排序
Java 的 compareTo 方法说明:

compareTo 方法用于将当前对象与方法的参数进行比较。
如果指定的数与参数相等返回 0。
如果指定的数小于参数返回 -1。
如果指定的数大于参数返回 1。

例如：o1.compareTo(o2); 返回正数的话，当前对象（调用 compareTo 方法的对象 o1）要排在比较对象（compareTo 传参对象 o2）后面，返回负数的话，放在前面。如果我们比较的值均为数字则可以直接相减，比较对象减去当前对象，返回正数的话比较对象更大，放在当前对象前。通过这种方式我们的到的即为递减排序。

package org.example.mapreduce.FlowSort;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowSortBean implements WritableComparable <FlowSortBean>{

    private Integer upFlow; // 上行数据包数
    private Integer downFlow; //下行数据包数
    private Integer upCountFlow; //上行流量总和
    private Integer downCountFlow; //下行流量总和

    public Integer getUpFlow() { return upFlow; }

    public void setUpFlow(Integer upFlow) {
        this.upFlow = upFlow;
    }

    public Integer getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(Integer downFlow) {
        this.downFlow = downFlow;
    }

    public Integer getUpCountFlow() {
        return upCountFlow;
    }

    public void setUpCountFlow(Integer upCountFlow) {
        this.upCountFlow = upCountFlow;
    }

    public Integer getDownCountFlow() {
        return downCountFlow;
    }

    public void setDownCountFlow(Integer downCountFlow) {
        this.downCountFlow = downCountFlow;
    }

    @Override
    public String toString() {
        return upFlow + "\t" + downFlow + "\t" + upCountFlow + "\t" +downCountFlow;
    }
    //排序
    @Override
    public int compareTo(FlowSortBean flowSortBean) {
        return  flowSortBean.upFlow - this.upFlow ;
    }
    //序列化
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(upFlow);
        dataOutput.writeInt(downFlow);
        dataOutput.writeInt(upCountFlow);
        dataOutput.writeInt(downCountFlow);
    }
    //反序列化
    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.upFlow = dataInput.readInt();
        this.downFlow = dataInput.readInt();
        this.upCountFlow = dataInput.readInt();
        this.downCountFlow = dataInput.readInt();
    }
}

Step 2: 定义FlowSortMapper类

package org.example.mapreduce.FlowSort;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.example.mapreduce.Flow.FlowBean;

import java.io.IOException;

public class FlowSortMapper extends Mapper<LongWritable, Text,FlowSortBean,Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //K1：偏移量
        //V1: 一行字符串，首先对字符串进行切分
        String[] split = value.toString().split("\t");
        String phone = split[0];
        //创建FlowBean对象
        FlowSortBean flowSortBean = new FlowSortBean();
        //将字符串转换为数字
        flowSortBean.setUpFlow(Integer.parseInt(split[1]));
        flowSortBean.setDownFlow(Integer.parseInt(split[2]));
        flowSortBean.setUpCountFlow(Integer.parseInt(split[3]));
        flowSortBean.setDownCountFlow(Integer.parseInt(split[4]));

        context.write(flowSortBean,new Text(phone));
    }
}

Step 3:自定义FlowReducer类

package org.example.mapreduce.FlowSort;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowSortReducer extends Reducer<FlowSortBean,Text,Text,FlowSortBean> {
    @Override
    protected void reduce(FlowSortBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        //在经历过mapper之后我们的数据  ：
        //      k2                   V2
        //  flowSortBean1   <手机号1，手机号2，手机号3>
        //  flowSortBean2   <手机号4，手机号5>
        //因此我们对Value进行遍历，写入到上下文。
        for (Text value:values) {
            context.write(value, key);
        }
    }
}

Step 4: 程序main函数入口FlowSortMain

package org.example.mapreduce.FlowSort;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class FlowSortJobMain extends Configured implements Tool {
    @Override
    public int run(String[] strings) throws Exception {
        //步骤一：创建一个Job任务对象
        Job job = Job.getInstance(super.getConf(),"mapreduce_flowSort");
        //步骤二：配置JOb任务对象
        //1，指定文件的读取方式和读取路径
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("file:///C:\\Myprogram\\IN\\FlowCount"));
        //2,指定map阶段的处理方式和数据类型
        job.setMapperClass(FlowSortMapper.class);
        //设置map阶段K2的数据类型
        job.setMapOutputKeyClass(FlowSortBean.class);
        //设置map阶段V2的数据类型
        job.setMapOutputValueClass(Text.class);

        //3，分区 4，排序 5，规约 6，分组

        //7，设置reduce阶段的处理方式和数据类型
        job.setReducerClass(FlowSortReducer.class);
        //设置reduce阶段的K3的数据类型
        job.setOutputKeyClass(Text.class);
        //设置reduce阶段的V3的数据类型
        job.setOutputValueClass(FlowSortBean.class);

        //8，设置输出路径
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("file:///C:\\Myprogram\\OUT\\flowsort_out5"));

        boolean over = job.waitForCompletion(true);
        return over ? 0 : 1;
    }
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        int run = ToolRunner.run(configuration,new FlowSortJobMain(),args);
        System.exit(run);
    }
}

完美撒花！

@李思成

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
MapReduce案例-上行流量倒序排序（递减排序）

需求：按照上行流量倒序排序（递减排序）分析：自定义FlowBean,以FlowBean为map输出的key，以手机号作为Map输出的value，因为MapReduce程序会对Map阶段输出的key进行排序。原始数据：从左到右分别为手机号上行流量下行流量上行数据包下行数据包13480253104 3 180 3 18013502468823 57 110349 102 733513560439658 33 5892 24 203413600217502 37 203704 266
复制链接

扫一扫