6 ,mr 八步练习 :上网流量统计

1 ,需求 :

统计每个用户的上传,下载的流量的和

2 ,数据 :data_flow.dat

1363157985066	13726230503	00-FD-07-A4-72-B8:CMCC	120.196.100.82	i02.c.aliimg.com	游戏娱乐	1	1	1	1	200
1363157995052	13726230503	5C-0E-8B-C7-F1-E0:CMCC	120.197.40.4	jd.com	京东购物	1	1	1	1	200
1363157991076	13726230503	20-10-7A-28-CC-0A:CMCC	120.196.100.99	taobao.com	淘宝购物	1	1	1	1	200
1363154400022	13926251106	5C-0E-8B-8B-B1-50:CMCC	120.197.40.4	cnblogs.com	技术门户	1	1	1	1	200
1363157993044	13926251106	94-71-AC-CD-E6-18:CMCC-EASY	120.196.100.99	iface.qiyi.com	视频网站	1	1	1	1	200
1363157995074	13926251106	5C-0E-8B-8C-E8-20:7DaysInn	120.197.40.4	122.72.52.12	未知	1	1	1	1	200
1363157993055	13926251106	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99	sougou.com	综合门户	1	1	1	1	200
1363157995033	15920133257	5C-0E-8B-C7-BA-20:CMCC	120.197.40.4	sug.so.360.cn	信息安全	1	1	1	1	200
1363157983019	15920133257	68-A1-B7-03-07-B1:CMCC-EASY	120.196.100.82	baidu.com	综合搜索	1	1	1	1	200
1363157984041	13660577991	5C-0E-8B-92-5C-20:CMCC-EASY	120.197.40.4	s19.cnzz.com	站点统计	1	1	1	1	200
1363157973098	15920133257	5C-0E-8B-C7-F7-90:CMCC	120.197.40.4	rank.ie.sogou.com	搜索引擎	1	1	1	1	200
1363157986029	13660577991	E8-99-C4-4E-93-E0:CMCC-EASY	120.196.100.99	www.umeng.com	站点统计	1	1	1	1	200
1363157992093	13660577991	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99	zhilian.com	招聘门户	1	1	1	1	200
1363157986041	13922314466	5C-0E-8B-C7-FC-80:CMCC-EASY	120.197.40.4	csdn.net	技术门户	1	1	1	1	200
1363157984040	13602846565	5C-0E-8B-8B-B6-00:CMCC	120.197.40.4	2052.flash2-http.qq.com	综合门户	1	1	1	1	200
1363157995093	13922314466	00-FD-07-A2-EC-BA:CMCC	120.196.100.82	img.qfc.cn	图片大全	1	1	1	1	200
1363157982040	13823070001	5C-0A-5B-6A-0B-D4:CMCC-EASY	120.196.100.99	y0.ifengimg.com	综合门户	1	1	1	1	200
1363157986072	13823070001	84-25-DB-4F-10-1A:CMCC-EASY	120.196.100.99	input.shouji.sogou.com	搜索引擎	1	1	1	1	200
1363157990043	13600217502	00-1F-64-E1-E6-9A:CMCC	120.196.100.55	t3.baidu.com	搜索引擎	1	1	1	1	200
1363157988072	13600217502	00-FD-07-A4-7B-08:CMCC	120.196.100.82	http://youku.com/	视频网站	1	1	1	1	200
1363157985079	13823070001	20-7C-8F-70-68-1F:CMCC	120.196.100.99	img.qfc.cn	图片浏览	1	1	1	1	200
1363157985069	13600217502	00-1F-64-E2-E8-B1:CMCC	120.196.100.55	www.baidu.com	综合门户	1	1	1	1	200

3 ,数据解析 :

在这里插入图片描述

4 ,思路 :

  1. 明确需求 : 统计每个人 ( 手机号码 ) 的上行 6,7,8,9 字段,分别求和。
  2. 具体做法 : 手机号是 k2 ,其他的数据都是 v2

5 ,pojo :

package day02.ll;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class LlPojo implements Writable {
    //  4 个属性
    private Long upPackage;
    private Long downPackage;
    private Long upData;
    private Long downData;
    //  get,set,toString
    public Long getUpPackage() {
        return upPackage;
    }
    public void setUpPackage(Long upPackage) {
        this.upPackage = upPackage;
    }
    public Long getDownPackage() {
        return downPackage;
    }
    public void setDownPackage(Long downPackage) {
        this.downPackage = downPackage;
    }
    public Long getUpData() {
        return upData;
    }
    public void setUpData(Long upData) {
        this.upData = upData;
    }
    public Long getDownData() {
        return downData;
    }
    public void setDownData(Long downData) {
        this.downData = downData;
    }
    @Override
    public String toString() {
        return "LlPojo{" +
                "upPackage=" + upPackage +
                ", downPackage=" + downPackage +
                ", upData=" + upData +
                ", downData=" + downData +
                '}';
    }
    //  序列化与反序列化
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeLong(this.upPackage);
        dataOutput.writeLong(this.downPackage);
        dataOutput.writeLong(this.upData);
        dataOutput.writeLong(this.downData);
    }
    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.upPackage = dataInput.readLong();
        this.downPackage = dataInput.readLong();
        this.upData = dataInput.readLong();
        this.downData = dataInput.readLong();
    }
    //  构造方法
    public LlPojo() {}

    public LlPojo(Long upPackage, Long downPackage, Long upData, Long downData) {
        this.upPackage = upPackage;
        this.downPackage = downPackage;
        this.upData = upData;
        this.downData = downData;
    }
}

6 , map :

package day02.ll;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

//  k1,v1,k2,v2
//  行偏移量,一行文本,一个单词,数字 1
public class LlMap extends Mapper<LongWritable,Text,Text,LlPojo> {
    private Text k2 = new Text();
    private LlPojo v2 = new LlPojo();
    //  1363157985066	13726230503	00-FD-07-A4-72-B8:CMCC	120.196.100.82	i02.c.aliimg.com	游戏娱乐	24	27	2481	24681	200
    //  我要下标 6,7,8,9 的数据
    @Override
    protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
        //  每一行的数据都切开,得到字符串数组
        String[] arr = value.toString().split("\t");
        k2.set(arr[1]);
        for (String s : arr) {
            v2.setUpPackage(Long.parseLong(arr[6]));
            v2.setDownPackage(Long.parseLong(arr[7]));
            v2.setUpData(Long.parseLong(arr[8]));
            v2.setDownData(Long.parseLong(arr[9]));
        }
        context.write(k2,v2);
    }
}

7 , reduce :

package day02.ll;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

//  k2,v2,k3,v3
public class LlReduce extends Reducer<Text,LlPojo,Text,LlPojo> {
    private LlPojo pj = new LlPojo();
    @Override   //  做加法运算
    protected void reduce(Text key, Iterable<LlPojo> values, Context context) throws IOException, InterruptedException {
        long upPackage = 0;
        long downPackage = 0;
        long upData = 0;
        long downData = 0;
        for (LlPojo v : values) {
            upPackage+=v.getUpPackage();
            downPackage+=v.getDownPackage();
            upData+=v.getUpData();
            downData+=v.getDownData();
        }
        pj.setUpPackage(upPackage);
        pj.setDownPackage(downPackage);
        pj.setUpData(upData);
        pj.setDownData(downData);
        context.write(key,pj);
    }
}

8 ,job :

package day02.ll;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class LlJob extends Configured implements Tool {
    @Override
    public int run(String[] strings) throws Exception {
        //  指定主类
        Job job = Job.getInstance(super.getConf(), "ll");
        job.setJarByClass(this.getClass());
        //  1 ,输入
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("C:\\Users\\86182\\Desktop\\hadoop\\day03 -- mr 高级\\03 -- 流量统计\\data_flow.dat"));
        //  2 ,map
        job.setMapperClass(LlMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LlPojo.class);
        //  3 ,分区
        //  4 ,排序
        //  5 ,规约
        //  6 ,分组
        //  7 ,reduce
        job.setReducerClass(LlReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LlPojo.class);
        //  8 ,输出 ( 必须写一个不存在的路径 )
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("C:\\Users\\86182\\Desktop\\hadoop\\day03 -- mr 高级\\03 -- 流量统计\\out"));
        //  执行
        boolean b = job.waitForCompletion(true);
        //  true-0;false-1
        return b?0:1;
    }
    public static void main(String[] args) throws Exception {
        int i = ToolRunner.run(new Configuration(), new LlJob(), args);
        System.exit(i);
    }
}

9 ,执行查看结构 :

本地执行即可

10 ,如果想要排序 : 按照流量排序

思路 : 先按照第一列排序,如果数值一样,就按照第二列,在一样,就第三列,第四列…

11 ,代码 : 只需要修改 pojo ,然后,用 pojo 来做 k2

package day02.ll;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class LlPojo implements WritableComparable<LlPojo> {
    //  4 个属性
    private Long upPackage;
    private Long downPackage;
    private Long upData;
    private Long downData;
    //  get,set,toString
    public Long getUpPackage() {
        return upPackage;
    }
    public void setUpPackage(Long upPackage) {
        this.upPackage = upPackage;
    }
    public Long getDownPackage() {
        return downPackage;
    }
    public void setDownPackage(Long downPackage) {
        this.downPackage = downPackage;
    }
    public Long getUpData() {
        return upData;
    }
    public void setUpData(Long upData) {
        this.upData = upData;
    }
    public Long getDownData() {
        return downData;
    }
    public void setDownData(Long downData) {
        this.downData = downData;
    }
    @Override
    public String toString() {
        return "LlPojo{" +
                "upPackage=" + upPackage +
                ", downPackage=" + downPackage +
                ", upData=" + upData +
                ", downData=" + downData +
                '}';
    }
    //  序列化与反序列化
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeLong(this.upPackage);
        dataOutput.writeLong(this.downPackage);
        dataOutput.writeLong(this.upData);
        dataOutput.writeLong(this.downData);
    }
    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.upPackage = dataInput.readLong();
        this.downPackage = dataInput.readLong();
        this.upData = dataInput.readLong();
        this.downData = dataInput.readLong();
    }
    //  构造方法
    public LlPojo() {}

    public LlPojo(Long upPackage, Long downPackage, Long upData, Long downData) {
        this.upPackage = upPackage;
        this.downPackage = downPackage;
        this.upData = upData;
        this.downData = downData;
    }
    //  比较大小方法
    @Override
    public int compareTo(LlPojo o) {
        int i1 = this.getUpPackage().compareTo(o.getUpPackage());
        int i2 = this.getDownPackage().compareTo(o.getDownPackage());
        int i3 = this.getUpData().compareTo(o.getUpData());
        int i4 = this.getDownData().compareTo(o.getDownData());
        if(i1!=0){
            return i1;
        }else if(i2!=0){
            return i2;
        }else if(i3!=0){
            return i3;
        }else {
            return i4;
        }
    }
}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值