以key为特征,分类多个信息(手机流量栗子)

Mapper

package com.atguigu.mr.writable2;
/*
 * Mapper阶段会运行MapTask,MapTask会调用Mapper类
 * 作用:在该类中实现需要在MapTask中实现的业务逻辑代码
 * */

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/*
 * Mapper<KEYIN,VALUEIN,KEYOUT,VALUEOUT>:
 *   第一组泛型:
 *       KEYIN:读取数据时的偏移量的类型
 *       VALUEIN:读取的一行一行的数据的类型
 *   第二组泛型:
 *       KEYOUT:写出的key的类型(在这是手机号的类型)
 *       VALUEOUT:写出的value的类型(在这是手机号的数量的类型)
 * */
public class FlowMapper extends Mapper<LongWritable, Text,Text,FlowBean> {
    private Text outKey = new Text();//创建的Key对象
    private FlowBean outValue = new FlowBean();//创建的value的对象
    /**
     * 1.在map方法中实现需要在MapTask中实现的业务逻辑代码
     * 2,该方法在被循环调用,每调用一次传入一行数据
     * @param key 读取数据时的偏移量
     * @param value 读取的数据(一行一行的数据)
     * @param context 上下文(在这用来将key,value写出去)
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context) throws IOException, InterruptedException {
//        super.map(key, value, context);
        //1、将数据切割
        String[] phoneInfo = value.toString().split("\t");
        //2、封装key,value
        //给key赋值
        outKey.set(phoneInfo[1]);
        //给value赋值
        outValue.setUpFlow(Long.parseLong(phoneInfo[phoneInfo.length-3]));
        outValue.setDownFlow(Long.parseLong(phoneInfo[phoneInfo.length-2]));
        outValue.setSumFlow(outValue.getUpFlow()+outValue.getDownFlow());
        //3.给将key,value写出去
        context.write(outKey,outValue);
    }
}

Reducer

package com.atguigu.mr.writable2;


import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/*
 * Reduce阶段会运行ReduceTask,ReduceTask会调用Reducer类
 * 作用:在该类中实现需要在ReducerTask中实现的业务逻辑代码
 * */
/*
 * Reducer<KEYIN,VALUEIN,KEYOUT,VALUEOUT>
 * 第一组泛型:
 *   KEYIN:读取的key的类型(Mapper写出的key的类型)
 *   VALUEIN:读取的value的类型(Mapper写出的value的类型)
 * 第二组泛型:
 *   KEYOUT:写出的key的泛型(在这是单词的类型)
 *   VALUEOUT:写出的value的类型(在这是单词的数量)
 * */
public class FlowReducer extends Reducer<Text,FlowBean,Text,FlowBean> {
    private FlowBean outValue = new FlowBean();//创建value对象
    /**
     * 1.在reduce方法中实现需要在ReduceTask中实现现在的业务逻辑代码
     * 2.reduce方法在被循环调用,每调用一次传入一组数据(在这key值相同为一组)
     * @param key 读取的key
     * @param values 读取的所有的value
     * @param context 上下文(在这用来将key,value写出去)
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context) throws IOException, InterruptedException {
        long sumUpFlow = 0;
        long sumDownFlow = 0;
//        super.reduce(key, values, context);
        //遍历所有的value
        for (FlowBean value : values){
            //将上行流量累加
            sumUpFlow += value.getUpFlow();
            //将下行流量累加
            sumDownFlow += value.getDownFlow();

        }
        //2.封装key,value
        //给value赋值
        outValue.setUpFlow(sumUpFlow);
        outValue.setDownFlow(sumDownFlow);
        outValue.setSumFlow(outValue.getUpFlow()+outValue.getDownFlow());
        //3.将key,value写出去
        context.write(key,outValue);
    }
}

FollowBean

package com.atguigu.mr.writable2;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/*
*
* JavaBean :
* 1.自定义类并实现Writable接口
* 2.重写write和readFields方法
* */
public class FlowBean implements Writable {
    private long upFlow;
    private long downFlow;
    private long sumFlow;
    public FlowBean(){

    }
    /*
    * 序列化时调用该方法
    * */
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeLong(upFlow);
        out.writeLong(downFlow);
        out.writeLong(sumFlow);

    }

    /*
    * 反序列化调用该方法
    * 注意:反序列化时的顺序和序列化时的顺序要保持一致
    * */
    @Override
    public void readFields(DataInput in) throws IOException {
        upFlow = in.readLong();
        downFlow = in.readLong();
        sumFlow = in.readLong();
    }

    public long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }

    public long getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }

    @Override
    public String toString() {
        return "FlowBean{" +
                "upFlow=" + upFlow +
                ", downFlow=" + downFlow +
                ", sumFlow=" + sumFlow +
                '}';
    }
}

Driver

package com.atguigu.mr.writable2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class FlowDriver {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        //1.创建Job实例
        Job job = Job.getInstance(new Configuration());
        //2.给Job赋值
        //2.1 关联本程序的Jar——如果是本地可以不写,在集群上运行必须写
        job.setJarByClass(FlowDriver.class);
        //2.2设置Mapper和Reducer类
        job.setMapperClass(FlowMapper.class);
        job.setReducerClass(FlowReducer.class);
        //2.3设置Mapper输出的key,value的类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);
        //2.4设置最终输出的key,value的类型(在这是reducer输出的key,value的类型)
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        //2.5设置输入和输出路径
        FileInputFormat.setInputPaths(job,new Path("E:\\文档\\大数据\\尚硅谷_大数据\\test\\2MR1\\input"));
        FileOutputFormat.setOutputPath(job,new Path("E:\\文档\\大数据\\尚硅谷_大数据\\test\\2MR1\\output3"));
        //3.运行Job
        /*
         * boolean waitForCompletion(boolean verbose)
         * verbose:是否打印信息
         * 返回值:如果job执行成功返回true
         * */
        boolean b = job.waitForCompletion(true);
        System.out.println("b====" + b);
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值