Hadoop 自定义序列化

需求

统计每一个手机号耗费的总上行流量、下行流量、总流量。

数据如下。

手机号ip上行流量下行流量状态
13012345678153.31.146.171995800200
13112345678193.179.2.13410242501200
13512345678227.39.131.1731232890200
13312345678236.15.230.171231257200
1311234567884.227.134.147444894200
13412345678193.179.2.134567123200
13212345678193.179.2.1347655534200
1351234567884.227.134.147123123200
☁  input  pwd
/Users/ylj/demo/input
☁  input  cat mobile.txt
13012345678 153.31.146.171 995 800 200
13112345678 193.179.2.134 1024 2501 200
13512345678 227.39.131.173 1232 890 200
13312345678 236.15.230.17 1231 257 200
13112345678 84.227.134.147 444 894 200
13412345678 193.179.2.134 567 123 200
13212345678 193.179.2.134 7655 534 200
13512345678 84.227.134.147 123 123 200

分析

输入格式:

手机号IP上行流量下行流量状态
13012345678153.31.146.171995800200

输出格式

手机号上行流量下行流量总流量
130123456789958001795

代码实现

编写流量统计的 bean 对象
package com.yljphp.mapreduce.flowsum;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowBean implements Writable {

    //上行流量
    private long upFlow;
    //下行流量
    private long downFlow;
    //总流量
    private long sumFlow;

    //空参构造,为了后续反射
    public FlowBean() {
        super();
    }

    public FlowBean(long upFlow, long downFlow) {
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = upFlow + downFlow;
    }

    //序列化
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeLong(upFlow);
        out.writeLong(downFlow);
        out.writeLong(sumFlow);
    }

    //反序列化
    @Override
    public void readFields(DataInput in) throws IOException {
        this.upFlow = in.readLong();
        this.downFlow = in.readLong();
        this.sumFlow = in.readLong();
    }

    // 6 编写 toString 方法,方便后续打印到文本
    @Override
    public String toString() {
        return upFlow + "\t" + downFlow + "\t" + sumFlow;
    }

    public long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }
    
    public void set(long upFlow, long downFlow) {
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = upFlow + downFlow;
    }
}

编写 mapper
package com.yljphp.mapreduce.flowsum;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;


public class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean> {

    FlowBean v = new FlowBean();
    Text k = new Text();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        //1 获取一行
        String line = value.toString();

        //2 切割字符串
        String[] fields = line.split("\t");

        //3. 封装对象
        //取出手机号码
        String phoneNum = fields[0];
        // 取出上行流量和下行流量
        long upFlow = Long.parseLong(fields[2]);
        long downFlow = Long.parseLong(fields[3]);

        //v.setUpFlow(upFlow);
        //v.setDownFlow(downFlow);
        v.set(upFlow,downFlow);
        
        k.set(phoneNum);

        context.write(k, v);
    }
}
编写 reducer
package com.yljphp.mapreduce.flowsum;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean> {

    FlowBean v = new FlowBean();

    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {

        long sumUpFlow = 0;
        long sumDownFlow = 0;

        // 1 遍历所用 bean,将其中的上行流量,下行流量分别累加
        for (FlowBean value : values) {
            sumUpFlow += value.getUpFlow();
            sumDownFlow += value.getDownFlow();
        }
        // 2 封装对象
        //v.setUpFlow(sumUpFlow);
        //v.setDownFlow(sumDownFlow);
        v.set(sumUpFlow,sumDownFlow);

        // 3 写出
        context.write(key, v);
    }
}

编写驱动
package com.yljphp.mapreduce.flowsum;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class FlowCountDriver {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        //1 获取配置信息,或者 job 对象实例
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        //2 指定本程序的 jar 包所在的本地路径
        job.setJarByClass(FlowCountDriver.class);

        //3 指定本业务 job 要使用的 mapper/Reducer 业务类
        job.setMapperClass(FlowCountMapper.class);
        job.setReducerClass(FlowCountReducer.class);

        //4 指定 mapper 输出数据的 kv 类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        //5 指定最终输出的数据的 kv 类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        //6 指定 job 的输入路径和输出路径
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        //7 将 job 中配置的相关参数,以及 job 所用的 java 类所在的 jar 包, 提交给 yarn 去运行
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);
    }
}

输出结果

☁  output  pwd
/Users/ylj/demo/output
☁  output  cat part-r-00000
13012345678	995	800	1795
13112345678	1468	3395	4863
13212345678	7655	534	8189
13312345678	1231	257	1488
13412345678	567	123	690
13512345678	1355	1013	2368
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值