MapReduce实现手机上网流量统计

FlowCount.java

package cn.itheima.bigdata.hadoop.mr.flowcount;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


//hadoop自己实现的序列化机制跟jdk有区别: 比jdk更精简

public class FlowCount {


    public static class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean>{
        private FlowBean flowBean = new FlowBean();

        @Override
        protected void map(LongWritable key, Text value,Context context)
                throws IOException, InterruptedException {
            try {
                // 拿到一行数据
                String line = value.toString();
                // 切分字段
                String[] fields = StringUtils.split(line, "\t");
                // 拿到我们需要的若干个字段
                String phoneNbr = fields[1];
                long up_flow = Long.parseLong(fields[fields.length - 3]);
                long d_flow = Long.parseLong(fields[fields.length - 2]);
                // 将数据封装到一个flowbean中
                flowBean.set(phoneNbr, up_flow, d_flow);

                // 以手机号为key,将流量数据输出去
                context.write(new Text(phoneNbr), flowBean);
            }catch(Exception e){
                System.out.println("exception occured in mapper" );
            }

        }
    }


    public static class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean>{
        private FlowBean flowBean = new FlowBean();

        @Override
        protected void reduce(Text key, Iterable<FlowBean> values,Context context)
                throws IOException, InterruptedException {

            long up_flow_sum = 0;
            long d_flow_sum = 0;

            for(FlowBean bean:values){

                up_flow_sum += bean.getUp_flow();
                d_flow_sum += bean.getD_flow();

            }

            flowBean.set(key.toString(), up_flow_sum, d_flow_sum);

            context.write(key, flowBean);

        }

    }


    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf,"flowjob");

        job.setJarByClass(FlowCount.class);

        job.setMapperClass(FlowCountMapper.class);
        job.setReducerClass(FlowCountReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);


    }

}


FlowCountSort.java

package cn.itheima.bigdata.hadoop.mr.flowcount;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class FlowCountSort {

    public static class FlowCountSortMapper extends Mapper<LongWritable, Text, FlowBean, NullWritable>{
        FlowBean bean =  new FlowBean();
        @Override
        protected void map(LongWritable key, Text value,Context context)
                throws IOException, InterruptedException {

            String line = value.toString();

            String[] fields = StringUtils.split(line, "\t");

            String phoneNbr = fields[0];
            long up_flow = Long.parseLong(fields[1]);
            long d_flow = Long.parseLong(fields[2]);

            bean.set(phoneNbr, up_flow, d_flow);
            context.write(bean, NullWritable.get());

        }


    }

    public static class FlowCountSortReducer extends Reducer<FlowBean, NullWritable, Text, FlowBean>{


        @Override
        protected void reduce(FlowBean bean, Iterable<NullWritable> values,Context context)
                throws IOException, InterruptedException {


            context.write(new Text(bean.getPhoneNbr()), bean);


        }
    }


    public static void main(String[] args) throws Exception {


        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf,"sortjob");

        job.setJarByClass(FlowCountSort.class);

        job.setMapperClass(FlowCountSortMapper.class);
        job.setReducerClass(FlowCountSortReducer.class);

        job.setMapOutputKeyClass(FlowBean.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);

    }
}


FlowBean.java

package cn.itheima.bigdata.hadoop.mr.flowcount;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

public class FlowBean implements WritableComparable<FlowBean> {

    private String phoneNbr;
    private long up_flow;
    private long d_flow;
    private long sum_flow;

    public void set(String phoneNbr, long up_flow, long d_flow) {

        this.phoneNbr = phoneNbr;
        this.up_flow = up_flow;
        this.d_flow = d_flow;
        this.sum_flow = up_flow + d_flow;

    }

    public String getPhoneNbr() {
        return phoneNbr;
    }

    public void setPhoneNbr(String phoneNbr) {
        this.phoneNbr = phoneNbr;
    }

    public long getUp_flow() {
        return up_flow;
    }

    public void setUp_flow(long up_flow) {
        this.up_flow = up_flow;
    }

    public long getD_flow() {
        return d_flow;
    }

    public void setD_flow(long d_flow) {
        this.d_flow = d_flow;
    }

    public long getSum_flow() {
        return sum_flow;
    }

    public void setSum_flow(long sum_flow) {
        this.sum_flow = sum_flow;
    }

    /**
     * 序列化,将数据字段以字节流写出去
     */
    @Override
    public void write(DataOutput out) throws IOException {

        out.writeUTF(phoneNbr);
        out.writeLong(up_flow);
        out.writeLong(d_flow);
        out.writeLong(sum_flow);

    }

    /**
     * 反序列化,从字节流中读出各个数据字段 读出的顺序应该跟序列化时写入的顺序保持一致
     */
    @Override
    public void readFields(DataInput in) throws IOException {
        phoneNbr = in.readUTF();
        up_flow = in.readLong();
        d_flow = in.readLong();
        sum_flow = in.readLong();
    }

    @Override
    public String toString() {

        return up_flow + "\t" + d_flow + "\t" + sum_flow;
    }

    @Override
    public int compareTo(FlowBean o) {

        return sum_flow > o.getSum_flow()?-1:1;
    }


}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
MapReduce是一种用于处理大规模数据集的编程模型。在手机流量统计案例中,我们可以利用MapReduce统计手机用户在不同时间段内的流量使用情况。 首先,我们需要将数据进行切分,将原始数据分为多个小块。接下来,我们使用Map函数,将每个小块的数据按照指定的键值对进行映射。键可以是时间段,值可以是流量数据。 然后,我们使用Reduce函数对映射后的数据进行归并和计算。Reduce函数可以对相同键的值进行合并操作,例如求和。这样我们就可以得到每个时间段的总流量。 为了更好地说明,我们以一天为时间段为例。假设我们有一个包含手机用户流量数据的文件,每一行表示一个用户在某个时间点的流量使用情况。 在Map阶段中,我们将文件每一行解析为键值对。键是时间段(例如早上、中午、下午、晚上等),值是流量数据。在这一阶段,我们可以使用正则表达式或其他方法来提取时间段和流量数据。 在Reduce阶段中,我们将相同时间段的流量数据进行合并计算。例如,对于早上这个时间段,我们将所有流量数据进行求和操作,得到这个时间段的总流量。 最后,我们可以将结果入输出文件,或者保存在数据库中,便于进一步分析和应用。 通过MapReduce框架,我们可以高效地处理大规模的手机流量数据,提取有价值的信息。例如,我们可以分析不同时间段的流量使用情况,找出用户流量高峰时段,为运营商提供更精确的网络优化策略。此外,这种方法也可以用于其他大数据场景的数据处理和分析。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值