mapreduce(JAVA)实现（大数据）电话号码对应的流量排序(倒序)

最新推荐文章于 2023-05-30 22:53:01 发布

SunmonDong

最新推荐文章于 2023-05-30 22:53:01 发布

阅读量748

点赞数

分类专栏： mapreduce 文章标签： mapreduce 大数据 java

本文链接：https://blog.csdn.net/s646575997/article/details/51111900

版权

mapreduce 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

仅供参考：
使用自定义类在mapreduce中的传递，详细解释：使用wordcount详解mapreduce使用
实体类源码：

package com.sfd.vo;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
/**
 * 自定义的包装类：包装的电话号码，上限流量，下限流量，总流量;
 * 该类实现了WritableComparable接口，从而是该类可以在map和reduce
 * 之间序列化传递，并且可通过compareTo 方法实现排序
 * 
 * @author sfd
 *
 */
public class FlowBean implements WritableComparable<FlowBean>{

    private String phone;
    private  long up_flow;
    private long d_flow;
    private long s_flow;
    public FlowBean(String phone, long up_flow, long d_flow) {
        super();
        this.phone = phone;
        this.up_flow = up_flow;
        this.d_flow = d_flow;
        this .s_flow=up_flow+d_flow;
    }
    public FlowBean() {
        super();
        // TODO Auto-generated constructor stub
    }
    public String getPhone() {
        return phone;
    }
    public void setPhone(String phone) {
        this.phone = phone;
    }
    public long getUp_flow() {
        return up_flow;
    }
    public void setUp_flow(long up_flow) {
        this.up_flow = up_flow;
    }
    public long getD_flow() {
        return d_flow;
    }
    public void setD_flow(long d_flow) {
        this.d_flow = d_flow;
    }
    public long getS_flow() {
        return s_flow;
    }
    public void setS_flow(long s_flow) {
        this.s_flow = s_flow;
    }
    public String toString() {
        return phone+"\t"+up_flow+"\t"+d_flow+"\t"+s_flow;
    }

    /**
     * 序列化传递
     */
    public void write(DataOutput out) throws IOException {

        out.writeUTF(phone);
        out.writeLong(up_flow);
        out.writeLong(d_flow);      
        out.writeLong(s_flow);
    }
    /**
     * 反序列化接受
     */
    public void readFields(DataInput in) throws IOException {
        phone=in.readUTF();
        up_flow=in.readLong();
        d_flow=in.readLong();
        s_flow=in.readLong();
    }
    /**
     * 用来比较FlowBean对象的大小用来排序
     */
    public int compareTo(FlowBean fb) {
        //当传入map中的电话号码相同时判断
        if(phone.equals(fb.phone)){
            d_flow+=fb.d_flow;
            s_flow+=fb.s_flow;
            up_flow+=fb.up_flow;
            return 0;
        }
        //用来倒序排列
        return fb.s_flow<s_flow?-1:1;
    }
}

map源码：

package com.sfd.flow;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import com.sfd.vo.FlowBean;

/**
 * mapreduce中的map方法继承mapper类，其中FlowBean是自定义的包装类，
 * 包装要用的流量信息
 * 
 * @author sfd
 *
 */
public class FlowMap extends Mapper<LongWritable,Text,FlowBean, NullWritable>{

    @Override
    protected void map(LongWritable key, Text value,Context context)
            throws IOException, InterruptedException {

        String line=value.toString();

        String[] status=StringUtils.split(line,"\t");

        String phone=status[1].trim();

        long up_flow=Long.parseLong(status[7]);

        long d_flow=Long.parseLong(status[8]);

        context.write(new FlowBean(phone,up_flow,d_flow),NullWritable.get());       
    }   
}

reduce源码：

package com.sfd.flow;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

import com.sfd.vo.FlowBean;
/**
 * mapreduce中的reduce方法，继承reducer 规范类，
 * 
 * @author sfd
 *
 */
public class FlowReduce extends Reducer<FlowBean, NullWritable, FlowBean, NullWritable>{

        @Override
        protected void reduce(FlowBean fb, Iterable<NullWritable> values,Context context)
                throws IOException, InterruptedException {

            context.write(fb,NullWritable.get());
        }
}

作业的调度类：

package com.sfd.flow;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import com.sfd.vo.FlowBean;
/**
 *  用来描述一个特定的作业，（标准形式）
 * @author sfd
 *
 */
public class FlowRunner extends Configured implements Tool{

    public int run(String[] args) throws Exception {

        Configuration conf=new Configuration();
        Job flowJob=Job.getInstance(conf);

        flowJob.setJarByClass(FlowRunner.class);

        flowJob.setMapperClass(FlowMap.class);
        flowJob.setReducerClass(FlowReduce.class);

        flowJob.setOutputKeyClass(FlowBean.class);
        flowJob.setOutputValueClass(NullWritable.class);

        FileInputFormat.setInputPaths(flowJob, new Path(args[0]));
        FileOutputFormat.setOutputPath(flowJob, new Path(args[1]));
        //运行错误时 返回值为0 错误退出
        //运行正确时 返回值为1 正确退出
        return flowJob.waitForCompletion(true)?0:1;
    }

    public static void main(String[] args) throws Exception{

        int result=ToolRunner.run(new Configuration(), new FlowRunner(), args);
        System.exit(result);

    }


}