Hadoop案例二(用户流量统计)

用户流量统计

一、问题描述

现有一hdfs文件/input/phoneCount/phone_date.txt,文件中包含多个用户的网站访问情况。为了计算用户的流量费用,现需要统计出文件中每个用户的总上行流量、下行流量、总流量。结果保存到/output/phoneCount目录下。
phone_date.txt:

  13726230503    00-FD-07-A4-72-B8:CMCC    120.196.100.82    i02.c.aliimg.com        24    27    2481    24681    200
  13826544101    5C-0E-8B-C7-F1-E0:CMCC    120.197.40.4            4    0    264    0    200
  13926435656    20-10-7A-28-CC-0A:CMCC    120.196.100.99            2    4    132    1512    200
  13926251106    5C-0E-8B-8B-B1-50:CMCC    120.197.40.4            4    0    240    0    200
  18211575961    94-71-AC-CD-E6-18:CMCC-EASY    120.196.100.99    iface.qiyi.com    视频网站    15    12   1527  2106 200
  84138413    5C-0E-8B-8C-E8-20:7DaysInn    120.197.40.4    122.72.52.12        20    16    4116    1432    200
  13560439658    C4-17-FE-BA-DE-D9:CMCC    120.196.100.99            18    15    1116    954    200
  15920133257    5C-0E-8B-C7-BA-20:CMCC    120.197.40.4    sug.so.360.cn    信息安全    20    20    3156    2936    200
  13719199419    68-A1-B7-03-07-B1:CMCC-EASY    120.196.100.82            4    0    240    0    200
  13660577991    5C-0E-8B-92-5C-20:CMCC-EASY    120.197.40.4    s19.cnzz.com    站点统计    24    9    6960    690    200
  15013685858    5C-0E-8B-C7-F7-90:CMCC    120.197.40.4    rank.ie.sogou.com    搜索引擎    28    27    3659    3538  200
  15989002119    E8-99-C4-4E-93-E0:CMCC-EASY    120.196.100.99    www.umeng.com    站点统计    3    3    1938    180  200
    13560439658    C4-17-FE-BA-DE-D9:CMCC    120.196.100.99            15    9    918    4938    200
    13480253104    5C-0E-8B-C7-FC-80:CMCC-EASY    120.197.40.4            3    3    180    180    200
    13602846565    5C-0E-8B-8B-B6-00:CMCC    120.197.40.4    2052.flash2-http.qq.com    综合门户    15    12    1938    2910    200
    13922314466    00-FD-07-A2-EC-BA:CMCC    120.196.100.82    img.qfc.cn        12    12    3008    3720    200
13502468823    5C-0A-5B-6A-0B-D4:CMCC-EASY    120.196.100.99   y0.ifengimg.com  综合门户    57   102   7335 110349    200
18320173382    84-25-DB-4F-10-1A:CMCC-EASY  120.196.100.99 input.shouji.sogou.com  搜索引擎  21  18   9531    2412    200
13925057413 00-1F-64-E1-E6-9A:CMCC    120.196.100.55    t3.baidu.com    搜索引擎    69    63    11058    48243    200
    13760778710    00-FD-07-A4-7B-08:CMCC    120.196.100.82            2    2    120    120    200
    13560436666    00-FD-07-A4-72-B8:CMCC    120.196.100.82    i02.c.aliimg.com        24    27    2481    24681    200
    13560436666    C4-17-FE-BA-DE-D9:CMCC    120.196.100.99            18    15    1116    954    200

二、问题分析

  • 有的行是9列,有的是10列,长度不统一。但是我们用到的列相对位置是固定的。电话号码在第一列,上行流量在倒数第三列,下行流量在倒数第二列。
  • 列之间的空格分隔符个数不同一。

三、代码

  1. 自定义Writable类,PhoneFlow
package com.example.demo.FlowBean;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class PhoneFlow implements Writable {
    private String number;
    private long upStream;
    private long downStream;
    private long sumStream;
    //反序列化时需要调用
    public PhoneFlow(){}

    public PhoneFlow(String number, long upStream, long downStream,long sumStream) {
        this.number = number;
        this.upStream = upStream;
        this.downStream = downStream;
        this.sumStream=sumStream;
    }

    /**
     * 对象序列化到数据流中
     * @param dataOutput
     * @throws IOException
     */
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(number);
        dataOutput.writeLong(upStream);
        dataOutput.writeLong(downStream);
        dataOutput.writeLong(sumStream);
    }

    public String getNumber() {
        return number;
    }

    public void setNumber(String number) {
        this.number = number;
    }

    public long getUpStream() {
        return upStream;
    }

    public void setUpStream(long upStream) {
        this.upStream = upStream;
    }

    public long getDownStream() {
        return downStream;
    }

    public void setDownStream(long downStream) {
        this.downStream = downStream;
    }

    public long getSumStream() {
        return sumStream;
    }

    public void setSumStream(long sumStream) {
        this.sumStream = sumStream;
    }

    /**  数据流反序列化到对象==>
     * 从数据流中反序列化出对象的数据
     * 读取对象的顺序必须与序列化时的字段顺序一致
     */
    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.number=dataInput.readUTF();
        this.upStream=dataInput.readLong();
        this.downStream=dataInput.readLong();
        this.sumStream=dataInput.readLong();
    }

    @Override
    public String toString() {
        return " "+upStream+"  "+downStream+"  "+sumStream+"  ";
    }
}
  1. mapper类
package com.example.demo.map;

import com.example.demo.FlowBean.PhoneFlow;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class PhoneMapper extends Mapper<LongWritable, Text, Text, PhoneFlow> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] columns = line.split("\\s+"); //使用一个或以上的空格分隔列
        PhoneFlow phone = new PhoneFlow(columns[1], Long.valueOf(columns[columns.length-3]), Long.valueOf(columns[columns.length-2]),
                Long.valueOf(columns[columns.length-3])+Long.valueOf(columns[columns.length-2]));
        context.write(new Text(columns[1]),phone);
    }
}
  1. reducer
package com.example.demo.reduce;

import com.example.demo.FlowBean.PhoneFlow;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class PhoneReducer extends Reducer<Text, PhoneFlow,Text,PhoneFlow> {
    @Override
    protected void reduce(Text key, Iterable<PhoneFlow> values, Context context) throws IOException, InterruptedException {
        PhoneFlow phoneFlow = new PhoneFlow();
        long up=phoneFlow.getUpStream();
        long down=phoneFlow.getDownStream();
        for (PhoneFlow i : values) {
            up+=i.getUpStream();
            down+=i.getDownStream();
        }
        phoneFlow.setSumStream(up+down);
        phoneFlow.setUpStream(up);
        phoneFlow.setDownStream(down);
        phoneFlow.setNumber(key.toString());
        context.write(key,phoneFlow);
    }
}
  1. driver类
package com.example.demo.driver;

import com.example.demo.FlowBean.PhoneFlow;
import com.example.demo.map.PhoneMapper;
import com.example.demo.map.WordCountMap;
import com.example.demo.reduce.PhoneReducer;
import com.example.demo.reduce.WordCountReduce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.io.IOException;

@Component
public class PhoneCountDriver {
    @Autowired
    private Configuration configuration;
    public void phoneCountDriver(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Job job = Job.getInstance(configuration, "wordcount01");
        //job设置驱动类
        job.setJarByClass(PhoneCountDriver.class);
        //设置Map\Reduce
        job.setMapperClass(PhoneMapper.class);
        job.setReducerClass(PhoneReducer.class);

        //设置Map的输出类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(PhoneFlow.class);
        //设置Reducer的输出类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(PhoneFlow.class);
        //设置输入文件
        FileInputFormat.setInputPaths(job,args[0]);
        //设置结果输出目录
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        boolean result = job.waitForCompletion(true);
        System.out.println("***** ok!!!");
        if (!result)
            System.out.println("-------------------失败!!!");
        System.exit(result?0:1);


    }
}

四、运行结果

13480253104      180  180  360
13502468823      7335  110349  117684
13560436666      3597  25635  29232
13560439658      2034  5892  7926
13602846565      1938  2910  4848
13660577991      6960  690  7650
13719199419      240  0  240
13726230503      2481  24681  27162
13760778710      120  120  240
13826544101      264  0  264
13922314466      3008  3720  6728
13925057413      11058  48243  59301
13926251106      240  0  240
13926435656      132  1512  1644
15013685858      3659  3538  7197
15920133257      3156  2936  6092
15989002119      1938  180  2118
18211575961      1527  2106  3633
18320173382      9531  2412  11943
84138413         4116  1432  5548
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值