MapReducer 计算定位时间问题-面试题

/**数据
 *   userA,locationA,2018-01-01 08:00:00,60
     userA,locationA,2018-01-01 09:00:00,60
     userA,locationB,2018-01-01 10:00:00,60
     userA,locationA,2018-01-01 11:00:00,60
 *
 *
 最终结果
 userA,locationA,2018-01-01 08:00:00,120
 userA,locationB,2018-01-01 10:00:00,60   //注意时间依旧正序
 userA,locationA,2018-01-01 11:00:00,60
 *
 * 基本思路,需要两个mr
 *  mr1 :
 *      ①按用户 ,位置,时间,正序排序,
 *      ②按用户 ,位置  分组
 *      ③在ruducer中的Iterable取第一条数据时间+分钟(换算成时间戳),与下一条时间(换算成时间戳)对比,
 *       相同继续循环增加时间,不相同则输出之前的,取最新的一条数据作为新起点进行对比
 *      分组后输出效果
 *      userA,locationA,2018-01-01 08:00:00,120
        userA,locationA,2018-01-01 11:00:00,60  //讲分组放到一起,不能按时间正序
        userA,locationB,2018-01-01 10:00:00,60

 *  mr2 :
 *      根据上次输出的数据再跑一次mr,这次按时间正序排序即可(不用分组)
 *
 */

代码如下

定义bean对象

package com.dxt.dingwei;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class DingWeiBean  implements WritableComparable<DingWeiBean>{
    private String user;
    private String location;
    private String time;
    private int minuit;

    @Override
    public String toString() {
        return  user + "," +location+","+ time + "," + minuit;
    }
    public void set(String[] split){

        this.setUser(split[0]);
        this.setLocation(split[1]);
        this.setTime(split[2]);
        this.setMinuit(Integer.parseInt(split[3]));
    }
    public void set(DingWeiBean dwb){
        this.setUser(dwb.getUser());
        this.setLocation(dwb.getLocation() );
        this.setTime(dwb.getTime());
        this.setMinuit(dwb.getMinuit());
    }
    public DingWeiBean() {
        super();
    }

    public String getUser() {
        return user;
    }

    public void setUser(String user) {
        this.user = user;
    }

    public String getLocation() {
        return location;
    }

    public void setLocation(String location) {
        this.location = location;
    }

    public String getTime() {
        return time;
    }

    public void setTime(String time) {
        this.time = time;
    }

    public int getMinuit() {
        return minuit;
    }

    public void setMinuit(int minuit) {
        this.minuit = minuit;
    }

    /**
     * 比较 用户名,位置,时间 ,升序排序
     * @param o
     * @return
     */
    public int compareTo(DingWeiBean o) {
        int comp_user=o.user.compareTo(this.user);
        if (comp_user==0){
             int comp_loc=o.location.compareTo(this.location);
             if(comp_loc==0){

              int comp_time=o.time.compareTo(this.time);
              if(comp_time==0){
                  return 0;
              }else {
                  return comp_time>0?-1:1;
              }
             }else {
                 return comp_loc>0?-1:1;
             }
        }else{
            return comp_user>0?-1:1;
        }
    }

    public void write(DataOutput out) throws IOException {
        out.writeUTF(this.user);
        out.writeUTF(this.location);
        out.writeUTF(this.time);
        out.writeInt(this.minuit);
    }
    public void readFields(DataInput in) throws IOException {
        this.user=in.readUTF();
        this.location=in.readUTF();
        this.time=in.readUTF();
        this.minuit=in.readInt();
    }
}

 定义分组条件

package com.dxt.dingwei;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/**
 * 分组操作,将user , location 相同的分到一个组中
 */
public class DingWeiCompartor extends WritableComparator {

    public DingWeiCompartor() {
        super(DingWeiBean.class, true);
    }

    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        DingWeiBean a1 = (DingWeiBean) a;
        DingWeiBean b1 = (DingWeiBean) b;

        int comp_user = a1.getUser().compareTo(b1.getUser());
        if (comp_user == 0) {

            int comp_loc = a1.getLocation().compareTo(b1.getLocation());
            if (comp_loc == 0) {
                return 0;
            } else {
                return comp_loc > 0 ? -1 : 1;
            }
        } else {
            return comp_user > 0 ? -1 : 1;
        }
    }
}

mapper

package com.dxt.dingwei;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class DingWeiMapper extends Mapper<LongWritable, Text, DingWeiBean, NullWritable> {

    //获取数据并处理
    // userA,locationA,2018-01-01 08:00:00,60
    // userA,locationA,2018-01-01 09:00:00,60
    // userA,locationB,2018-01-01 10:00:00,60
    // userA,locationA,2018-01-01 11:00:00,60

    DingWeiBean dwb = new DingWeiBean();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String line = value.toString();
        String[] split = line.split(",");
        System.out.println("=========="+split.length+"==================");
        dwb.set(split);

        context.write(dwb,NullWritable.get());
    }
}

Reducer

package com.dxt.dingwei;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;

public class DingweiReducer extends Reducer<DingWeiBean,NullWritable,DingWeiBean,NullWritable> {

    DingWeiBean dwb=new DingWeiBean();
    SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    @Override
    protected void reduce(DingWeiBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {

        //数据在这里已经进行了分组并排序,将相同用户,相同位置,升序时间排序的数据集合到了一起
        // userA,locationA,2018-01-01 08:00:00,60
        // userA,locationA,2018-01-01 09:00:00,60
        // userA,locationA,2018-01-01 11:00:00,60
        // userA,locationA,2018-01-01 12:00:00,60
        int count=0;
        for (NullWritable value : values) {
          //因为升序 取第一个数据作为开始
            count++;
            if(count==1){
              //第一条,拿出来,复制给bean对象
               dwb.set(key);
            }else{
             //从第二条记录开始循环
             //将日期转换成时间戳进行对比
                try {
                    long curr_timestemp=sdf.parse(key.getTime()).getTime();
                    //计算第一次(或者上次记录的时间)时间戳+等待的分钟
                    long timestemp=sdf.parse(dwb.getTime()).getTime()+dwb.getMinuit()*60*1000;
                    if(curr_timestemp==timestemp){
                        //如果相等,说明时间是连续的,,不相等说明不连续,不连续就重新开始将那一条作为第一条重复进行
                        //相等,时间相加
                        dwb.setMinuit(dwb.getMinuit()+key.getMinuit());

                    }else{
                        context.write(dwb,NullWritable.get());//将已经连续的数据写出去

                        //将出现一个新的不连续数据作为第一条
                        dwb.set(key);
                    }
                } catch (ParseException e) {
                    e.printStackTrace();
                }

            }
        }
        //最后写出所有的
        context.write(dwb,NullWritable.get());
    }
}

Driver

public class DingWei {
    public static void main(String[] args) {
        Configuration configuration = new Configuration();

        try {
            Job job = Job.getInstance(configuration);
            //驱动类
            job.setJarByClass(DingWei.class);

            job.setMapperClass(DingWeiMapper.class);
            job.setReducerClass(DingweiReducer.class);

            //map输出格式
            job.setMapOutputKeyClass(DingWeiBean.class);
            job.setMapOutputValueClass(NullWritable.class);

            //redeucer最终输出格式
            job.setOutputKeyClass(DingWeiBean.class);
            job.setOutputValueClass(DingWeiBean.class);

            job.setGroupingComparatorClass(DingWeiCompartor.class);

            //设置输入输出路径
            FileInputFormat.setInputPaths(job, new Path("E:\\input"));
            FileOutputFormat.setOutputPath(job, new Path("E:\\outDabwei"));

            boolean b = job.waitForCompletion(true);
            System.exit(b ? 0 : 1);


        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}

mr执行完成输出数据格式为,不符合要求

userA,locationA,2018-01-01 08:00:00,120
userA,locationA,2018-01-01 11:00:00,60
userA,locationB,2018-01-01 10:00:00,60

mr2的主要流程是在mr1的基础上对时间排序,mr1的输出作为mr2的输入

//bean对象自定义时间排序
    /**
     * 比较,只需要正序比较时间就行
     * @param o
     * @return
     */
    public int compareTo(DingweiBean2 o) {
        int comp_time = o.time.compareTo(this.time);
        return comp_time>0?-1:1;
    }

//map,reducer直接write

最终数据 

userA,locationA,2018-01-01 08:00:00,120
userA,locationB,2018-01-01 10:00:00,60
userA,locationA,2018-01-01 11:00:00,60
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值