MapReduce实现手机上网流量统计

最新推荐文章于 2020-06-12 11:50:33 发布

凉秋cds

最新推荐文章于 2020-06-12 11:50:33 发布

阅读量1.8k

点赞数

分类专栏： Hadoop 文章标签： Mapreduce 流量统计

Hadoop 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

FlowCount.java

package cn.itheima.bigdata.hadoop.mr.flowcount;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


//hadoop自己实现的序列化机制跟jdk有区别： 比jdk更精简

public class FlowCount {


    public static class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean>{
        private FlowBean flowBean = new FlowBean();

        @Override
        protected void map(LongWritable key, Text value,Context context)
                throws IOException, InterruptedException {
            try {
                // 拿到一行数据
                String line = value.toString();
                // 切分字段
                String[] fields = StringUtils.split(line, "\t");
                // 拿到我们需要的若干个字段
                String phoneNbr = fields[1];
                long up_flow = Long.parseLong(fields[fields.length - 3]);
                long d_flow = Long.parseLong(fields[fields.length - 2]);
                // 将数据封装到一个flowbean中
                flowBean.set(phoneNbr, up_flow, d_flow);

                // 以手机号为key，将流量数据输出去
                context.write(new Text(phoneNbr), flowBean);
            }catch(Exception e){
                System.out.println("exception occured in mapper" );
            }

        }
    }


    public static class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean>{
        private FlowBean flowBean = new FlowBean();

        @Override
        protected void reduce(Text key, Iterable<FlowBean> values,Context context)
                throws IOException, InterruptedException {

            long up_flow_sum = 0;
            long d_flow_sum = 0;

            for(FlowBean bean:values){

                up_flow_sum += bean.getUp_flow();
                d_flow_sum += bean.getD_flow();

            }

            flowBean.set(key.toString(), up_flow_sum, d_flow_sum);

            context.write(key, flowBean);

        }

    }


    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf,"flowjob");

        job.setJarByClass(FlowCount.class);

        job.setMapperClass(FlowCountMapper.class);
        job.setReducerClass(FlowCountReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);


    }

}

FlowCountSort.java

package cn.itheima.bigdata.hadoop.mr.flowcount;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class FlowCountSort {

    public static class FlowCountSortMapper extends Mapper<LongWritable, Text, FlowBean, NullWritable>{
        FlowBean bean =  new FlowBean();
        @Override
        protected void map(LongWritable key, Text value,Context context)
                throws IOException, InterruptedException {

            String line = value.toString();

            String[] fields = StringUtils.split(line, "\t");

            String phoneNbr = fields[0];
            long up_flow = Long.parseLong(fields[1]);
            long d_flow = Long.parseLong(fields[2]);

            bean.set(phoneNbr, up_flow, d_flow);
            context.write(bean, NullWritable.get());

        }


    }

    public static class FlowCountSortReducer extends Reducer<FlowBean, NullWritable, Text, FlowBean>{


        @Override
        protected void reduce(FlowBean bean, Iterable<NullWritable> values,Context context)
                throws IOException, InterruptedException {


            context.write(new Text(bean.getPhoneNbr()), bean);


        }
    }


    public static void main(String[] args) throws Exception {


        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf,"sortjob");

        job.setJarByClass(FlowCountSort.class);

        job.setMapperClass(FlowCountSortMapper.class);
        job.setReducerClass(FlowCountSortReducer.class);

        job.setMapOutputKeyClass(FlowBean.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);

    }
}

FlowBean.java

package cn.itheima.bigdata.hadoop.mr.flowcount;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

public class FlowBean implements WritableComparable<FlowBean> {

    private String phoneNbr;
    private long up_flow;
    private long d_flow;
    private long sum_flow;

    public void set(String phoneNbr, long up_flow, long d_flow) {

        this.phoneNbr = phoneNbr;
        this.up_flow = up_flow;
        this.d_flow = d_flow;
        this.sum_flow = up_flow + d_flow;

    }

    public String getPhoneNbr() {
        return phoneNbr;
    }

    public void setPhoneNbr(String phoneNbr) {
        this.phoneNbr = phoneNbr;
    }

    public long getUp_flow() {
        return up_flow;
    }

    public void setUp_flow(long up_flow) {
        this.up_flow = up_flow;
    }

    public long getD_flow() {
        return d_flow;
    }

    public void setD_flow(long d_flow) {
        this.d_flow = d_flow;
    }

    public long getSum_flow() {
        return sum_flow;
    }

    public void setSum_flow(long sum_flow) {
        this.sum_flow = sum_flow;
    }

    /**
     * 序列化，将数据字段以字节流写出去
     */
    @Override
    public void write(DataOutput out) throws IOException {

        out.writeUTF(phoneNbr);
        out.writeLong(up_flow);
        out.writeLong(d_flow);
        out.writeLong(sum_flow);

    }

    /**
     * 反序列化，从字节流中读出各个数据字段 读出的顺序应该跟序列化时写入的顺序保持一致
     */
    @Override
    public void readFields(DataInput in) throws IOException {
        phoneNbr = in.readUTF();
        up_flow = in.readLong();
        d_flow = in.readLong();
        sum_flow = in.readLong();
    }

    @Override
    public String toString() {

        return up_flow + "\t" + d_flow + "\t" + sum_flow;
    }

    @Override
    public int compareTo(FlowBean o) {

        return sum_flow > o.getSum_flow()?-1:1;
    }


}