MapReduce 分析网站基本指标

  1. 理解【网站基本指标】的几个概念
    1. 1PV:网页浏览量-》每天 每周 每月
      -》用户每打开一次就记录1次
    2. 1UV:独立访客数—》userID
      –>cookie-》过期时间
    3. 2VV: 访客的访问次数
    4. 3IP:独立IP数
  2. 分析需求,依据MapReduce 编程模板编程PV程序

    1. 1具体代码

      package com.ibeifeng.bigdata.senior.hadoop.mapreduce;
      
      import java.io.IOException;
      
      import org.apache.commons.lang.StringUtils;
      import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.conf.Configured;
      import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.io.IntWritable;
      import org.apache.hadoop.io.LongWritable;
      import org.apache.hadoop.io.Text;
      import org.apache.hadoop.mapreduce.Job;
      import org.apache.hadoop.mapreduce.Mapper;
      import org.apache.hadoop.mapreduce.Reducer;
      import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
      import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
      import org.apache.hadoop.util.Tool;
      import org.apache.hadoop.util.ToolRunner;
      
      /**
       * @author beifeng
       * 
       */
      public class WebPvMapReduce extends Configured implements Tool {
      
          // step 1 : Mapper Class
      
          public static class WebPvMapper extends
                  Mapper<LongWritable, Text, IntWritable, IntWritable> {
      
              private IntWritable mapOutputValue = new IntWritable(1);
              private IntWritable mapOutputKey = new IntWritable();
      
              @Override
              protected void setup(Context context) throws IOException,
                      InterruptedException {
                  // TODO Auto-generated method stub
              }
      
              @Override
              public void map(LongWritable key, Text value, Context context)
                      throws IOException, InterruptedException {
                  // line value
                  String lineValue = value.toString();
      
                  // split
                  String[] values = lineValue.split("\t");
      
                  if (values.length < 30) {
      
                      // Counter
                      context.getCounter("WEBPVMAPPRE_COUNTERS",
                              "LENGTH_LT_30_COUNTER").increment(1L);
                      return;
                  }
                  // url
                  String urlValue = values[1];
                  if (StringUtils.isBlank(urlValue)) {
                      // Counter
                      context.getCounter("WEBPVMAPPRE_COUNTERS", "URL_BLANK_COUNTER")
                              .increment(1L);
                      return;
                  }
                  // provinceId
                  String provinceIdValue = values[23];
                  if (StringUtils.isBlank(provinceIdValue)) {
                      // Counter
                      context.getCounter("WEBPVMAPPRE_COUNTERS",
                              "PROVINCEID_BLANK_COUNTER").increment(1L);
                      return;
                  }
                  Integer provinceId = Integer.MAX_VALUE;
                  try {
                      provinceId = Integer.valueOf(provinceIdValue);
                  } catch (Exception e) {
                      // Counter
                      context.getCounter("WEBPVMAPPRE_COUNTERS",
                              "PROVINCEID_NOT_NUMBER_COUNTER").increment(1L);
                      return;
                  }
      
                  // map output key
                  mapOutputKey.set(provinceId);
      
                  context.write(mapOutputKey, mapOutputValue);
              }
      
              @Override
              protected void cleanup(Context context) throws IOException,
                      InterruptedException {
                  // TODO Auto-generated method stub
              }
          }
      
          // step 2 : Reducer Class
          public static class WebPvReducer extends
                  Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
      
              private IntWritable outputValue = new IntWritable();
      
              @Override
              protected void setup(Context context) throws IOException,
                      InterruptedException {
                  // TODO Auto-generated method stub
              }
      
              @Override
              protected void reduce(IntWritable key, Iterable<IntWritable> values,
                      Context context) throws IOException, InterruptedException {
                  // temp
                  int sum = 0;
      
                  // iterator
                  for (IntWritable value : values) {
                      // total
                      sum += value.get();
                  }
      
                  // set
                  outputValue.set(sum);
      
                  // output
                  context.write(key, outputValue);
              }
      
              @Override
              protected void cleanup(Context context) throws IOException,
                      InterruptedException {
                  // TODO Auto-generated method stub
              }
      
          }
      
          /**
           * 
           * @param args
           * @return
           * @throws Exception
           *             int run(String [] args) throws Exception;
           */
          // step 3 : Driver
          public int run(String[] args) throws Exception {
      
              Configuration configuration = this.getConf();
      
              Job job = Job.getInstance(configuration, this.getClass()
                      .getSimpleName());
              job.setJarByClass(WebPvMapReduce.class);
      
              // set job
              // input
              Path inpath = new Path(args[0]);
              FileInputFormat.addInputPath(job, inpath);
      
              // output
              Path outpath = new Path(args[1]);
              FileOutputFormat.setOutputPath(job, outpath);
      
              // Mapper
              job.setMapperClass(WebPvMapper.class);
              // TODD
              job.setMapOutputKeyClass(IntWritable.class);
              job.setMapOutputValueClass(IntWritable.class);
      
              // =================shuffle==================
              // 1.partition
              // job.setPartitionerClass(cls);
      
              // 2.sort
              // job.setSortComparatorClass(cls);
      
              // 3combiner
              job.setCombinerClass(WebPvReducer.class);
      
              // 4.group
              // job.setGroupingComparatorClass(cls);
      
              // =================shuffle==================
      
              // Reducer
              job.setReducerClass(WebPvReducer.class);
              // TODD
              job.setOutputKeyClass(IntWritable.class);
              job.setOutputValueClass(IntWritable.class);
      
              // submit job
              boolean isSuccess = job.waitForCompletion(true);
      
              return isSuccess ? 0 : 1;
      
          }
      
          public static void main(String[] args) throws Exception {
      
              Configuration configuration = new Configuration();
      
              // 传递两个参数,设置路径
              args = new String[] {
                      // 参数1:输入路径
                      "hdfs://hadoop-senior01.ibeifeng.com:8020/user/beifeng/webpv/input",
                      // 参数2:输出路径
                      "hdfs://hadoop-senior01.ibeifeng.com:8020/user/beifeng/output7" };
              // run job
              int status = ToolRunner.run(configuration, new WebPvMapReduce(), args);
      
              // exit program
              System.exit(status);
          }
      
      }
    2. 运行

      1. 创建目录:bin/hdfs dfs -mkdir -p webpv/input
      2. 将两个文件都上传

        bin/hdfs dfs -put /opt/datas/2015082818 /user/beifeng/webpv/input
                    bin/hdfs dfs -put /opt/datas/2015082819 /user/beifeng/webpv/input
      3. 一次性读取两个文件,使用combiner

        // 3combiner
        job.setCombinerClass(WebPvReducer.class);

        这里写图片描述

      4. 减少Reduce数量,节省性能

        这里写图片描述

  3. 自定义计算器,对MapReduce进行DEBUG测试

    1. 在每个return前加入Conunter;如下

      // Counter
                      context.getCounter("WEBPVMAPPRE_COUNTERS", "PROVINCEID_NOT_NUMBER_COUNTER").increment(1L);
    2. 结果显示:
      这里写图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值