MapReduce处理PV问题

最新推荐文章于 2023-11-30 20:43:23 发布

weixin_39953756

最新推荐文章于 2023-11-30 20:43:23 发布

阅读量229

点赞数

本文链接：https://blog.csdn.net/weixin_39953756/article/details/80948080

版权

package com.ibeifeng.hadoop19_copy;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.jboss.netty.util.internal.StringUtil;

//处理用户登录网站的日志信息
//一行日志代表一条信息
//数据的格式由36个字段组成，每个字段用制表符分隔，参考，track字典可以得知每个字段代表的含义，
//我们现在要考察的是一份日志中，统计各个省的PV数据，需要取出url与provinceID这两个字段！url不能为空！provinceID不能为空！url是第二个字段，provinceID是第24个字段

public class WebPV {

public static class PVMap extends Mapper<LongWritable,Text,IntWritable,IntWritable>{
  private final IntWritable mapoutkey = new IntWritable();
  private final IntWritable mapoutvalue = new IntWritable(1);


  @Override
  protected void map(LongWritable key, Text value, Context context)
    throws IOException, InterruptedException {
   String line = value.toString();
   String[] message = line.split("\t");
   //判断数组的长度是否小于30，小于30的数据不计算
   if(message.length < 30){
    //增加计数器，可以计算出字段少于30的行数！
    context.getCounter("Count_NUMBER", "Length_short_than_30").increment(1L);
    return;
   }
   //判断url是否为空
   String url = message[1];
   if(StringUtils.isBlank(url)){
    context.getCounter("Count_NUMBER", "url_is_blank").increment(1L);
    return;
   }

   //判断provinceID是否为空
   String provinceID = message[23];
   if(StringUtils.isBlank(provinceID)){
    context.getCounter("Count_NUMBER", "provinceID_is_blank").increment(1L);
    return;
   }

   //
   Integer i = Integer.MAX_VALUE;
   try {
    i = Integer.parseInt(provinceID);
   } catch (NumberFormatException e) {
    // TODO Auto-generated catch block
    context.getCounter("Count_NUMBER", "Transe").increment(1L);
    e.printStackTrace();
    return;
   }

   mapoutkey.set(i);
   context.write(mapoutkey, mapoutvalue);

  }
}

public static class PVReduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{
  //private final IntWritable reduceoutkey = new IntWritable();
  private final IntWritable reduceoutvalue = new IntWritable();
  @Override
  protected void reduce(IntWritable key, Iterable<IntWritable> values,
    Context context)
    throws IOException, InterruptedException {
   int sum = 0;
   for(IntWritable value : values){
    sum += value.get();
   }
   reduceoutvalue.set(sum);
   context.write(key, reduceoutvalue);
  }

}

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{
  //获取Hadoop的默认配置信息
  Configuration conf = new Configuration();
  //设置job
  Job job = Job.getInstance(conf, "JinNan");

  //设置打成Jar包
  job.setJarByClass(WebPV.class);

  //设置输入路径
  Path inpath = new Path("hdfs://node-1:8020/2015082818");
  FileInputFormat.setInputPaths(job, inpath);

  //设置mapper类
  job.setMapperClass(PVMap.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);

  //shuffle

  //设置reduce类
  job.setReducerClass(PVReduce.class);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);

  //设置输出路径
  Path outpath = new Path("hdfs://node-1:8020/test");
  FileSystem fs = outpath.getFileSystem(conf);
  if(fs.exists(outpath)){
   fs.delete(outpath, true);
  }

  FileOutputFormat.setOutputPath(job, outpath);

  System.exit(job.waitForCompletion(true)? 0: 1);
}
}

weixin_39953756

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
MapReduce处理PV问题

package com.ibeifeng.hadoop19_copy;import java.io.IOException;import java.util.StringTokenizer;import org.apache.commons.lang.StringUtils;import org.apache.hadoop.conf.Configuration;import org.apache....
复制链接

扫一扫