MapReduce 之-- 某个用户在某个位置从某个时刻开始停留了多长时间--升级版

package kaoshi831;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class MRsort {
    /**
     * 
     *
        数据意义:某个用户在某个位置从某个时刻开始停留了多长时间
        处理逻辑:
        对同一个用户,在同一个位置,连续的多条记录进行合并
        合并原则:开始时间取最早的,停留时长加和
        用户ID,位置ID,开始时间,停留时长(分钟)
     *  user_a,location_a,2018-01-01 08:00:00,60
     */
    static class MyMapper extends Mapper<LongWritable, Text, SortOwn, Text>{
        private Text mv=new Text();
        private SortOwn so = new SortOwn();
        @Override
        protected void map(LongWritable key,
                Text value, 
                Mapper<LongWritable, Text, SortOwn, Text>.Context context)
                throws IOException, InterruptedException {
            //user_a,location_a,2018-01-01 08:00:00,60
            String[] sp = value.toString().split(",");  //读取每行的元素   
            so.setUlid(sp[0]+"\t"+sp[1]);           //将字段,(用户ID,位置ID,开始时间)添加到包装类中
            so.setTime(sp[2]);                      //按照,(用户ID,位置ID)分组,(开始时间)排序
            mv.set(sp[3]);
            context.write(so, mv);
        }
    }
    static class MyReducer extends Reducer<SortOwn,Text , Text, IntWritable>{
        private Text outkey = new Text();
        private IntWritable outvalue= new IntWritable();
        @Override
        protected void reduce(SortOwn key,
                Iterable<Text> values, 
                Context context)
                throws IOException, InterruptedException {
            //user_a,location_a,2018-01-01 08:00:00,60
            int sum=0;  
            for(Text v:values){
                String sp = v.toString();
                sum+=Integer.parseInt(sp);
                System.out.println(key);        //测试key的位置--(包装类中我将"开始时间"按照倒序排列),所有取最后一个key.getTime();
            }                                   //values是个迭代器,类似指针的遍历方式
            String k=key.getUlid()+"\t"+key.getTime();
            outkey.set(k);
            outvalue.set(sum);
            context.write(outkey, outvalue);
        }
    }

    public static void main(String[] args) throws IllegalArgumentException, IOException, URISyntaxException, ClassNotFoundException, InterruptedException {
        System.setProperty("HADOOP_USER_NAME", "hadoop");
        Configuration conf=new Configuration();
        Job job=Job.getInstance(conf);

        job.setJarByClass(kaoshi831.MRsort.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

        job.setMapOutputKeyClass(SortOwn.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //添加分组
        job.setGroupingComparatorClass(MyGroup.class);

        FileInputFormat.addInputPath(job, new Path("hdfs://hadoop01:9000/ksin"));

        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop01:9000"), conf);
        Path path=new Path("hdfs://hadoop01:9000/ksout01");
        if(fs.exists(path)){
            fs.delete(path, true);
        }
        FileOutputFormat.setOutputPath(job,path);

        job.waitForCompletion(true);

    }

}

//自定义包装类,实现WritableComparable

package kaoshi831;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class SortOwn implements WritableComparable<SortOwn>{
    //user_a,location_a,2018-01-01 08:00:00,60
    private String ulid;
    private String time;

    @Override
    public String toString() {
        return ulid + "," + time;
    }
    public SortOwn() {
        super();
        // TODO Auto-generated constructor stub
    }
    public String getUlid() {
        return ulid;
    }
    public void setUlid(String ulid) {
        this.ulid = ulid;
    }
    public String getTime() {
        return time;
    }
    public void setTime(String time) {
        this.time = time;
    }
    public SortOwn(String ulid, String time) {
        super();
        this.ulid = ulid;
        this.time = time;
    }
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(ulid);
        out.writeUTF(time);

    }
    @Override
    public void readFields(DataInput in) throws IOException {
        this.ulid=in.readUTF();
        this.time=in.readUTF();
    }
    @Override
    public int compareTo(SortOwn o) {
        int tmp=this.ulid.compareTo(o.ulid);
        if(tmp==0){
            return o.time.compareTo(this.time);//倒序
        }
        return tmp;
    }

}

//自定义分组 继承WritableComparator

package kaoshi831;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class MyGroup extends WritableComparator {
    public MyGroup(){//通过构造加载
        super(SortOwn.class,true);//用反射,加载要分组的类
    }
    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        //定义分组条件
        SortOwn sb1=(SortOwn)a;
        SortOwn sb2=(SortOwn)b;
            return sb1.getUlid().compareTo(sb2.getUlid());
    }
}
  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值