Hadoop(5):MapReduce编程统计PV

7 篇文章 0 订阅

一、目的

对于用户访问日志,使用MapReduce进行编程分析,获得每个城市的访问量。

二、实现环境

1.CDH 5.7.0

2.CentOS 7.4

3.集群外部Hadoop开发环境部署

参考博客:https://blog.csdn.net/u010886217/article/details/89278390

三、实现代码

1.研究日志格式

2.MapReduce实现统计日志代码

package Hadoop;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class PVMapReduce extends Configured implements Tool{
    //map类
    //继承Mapper类,<KEYIN, VALUEIN, KEYOUT, VALUEOUT>   输入的key,输入的value,输出的key,输出的value
    public static class MyPVMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable>{
        IntWritable province_id = new IntWritable();
        IntWritable mr_value = new IntWritable(1);
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)
                throws IOException, InterruptedException {
            //(pro_id,1)
            //获得一整条数据
            String line = value.toString();
            //将数据进行切割获得一串数组
            String [] str = line.split("\t");
            //切割之后数组长度小于30,认为这条数据字段缺失,丢弃
            if(str.length <= 30){
                context.getCounter("我的计数器","长度小于30的数据").increment(1);
                return;
            }
            //取出数组下标为23的字符串  =》 pro_id
            String pro_id = str[23];
            String url = str[1];
            /**
             * 其实很多情况下,数据是不完整的,所以在map方法当中,我们要对数据进行清洗
             * 做if判断,去掉不符合逻辑的数据
             */
            if(StringUtils.isBlank(url)){
                context.getCounter("我的计数器","url为空的数据").increment(1);
                return;
            }

            int pro_int = -1;
            try{
                pro_int =  Integer.parseInt(pro_id);
            }catch(Exception e){
                context.getCounter("我的计数器","数值转换异常的数据").increment(1);
                return;
            }

            //最后要具体查看数据结果,判断是否符合我们的要求(可能还要做更多的过滤)
            province_id.set(pro_int);
            context.write(province_id, mr_value);
        }
    }


    //combiner
    public static class MyPVcombiner extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable>{
        IntWritable total = new IntWritable();
        @Override
        protected void reduce(IntWritable key, Iterable<IntWritable> values,
                              Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException {
            int count = -1;
            for (IntWritable intWritable : values) {
                count += intWritable.get();
            }
            total.set(count);
            context.write(key, total);
        }

    }

    //reduce类
    //	reduce类的输入,其实就是map类中map方法的输出							输入key  输入value  输出key  输出value
    public static class MyPVReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable>{
        IntWritable total = new IntWritable();
        @Override
        protected void reduce(IntWritable key, Iterable<IntWritable> values,
                              Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException {

            int count = -1;
            for (IntWritable intWritable : values) {
                count += intWritable.get();
            }
            total.set(count);
            context.write(key, total);
        }

    }
    //运行类,run方法,在测试的时候使用main函数,调用这个类的run方法来运行
    /**
     *
     * @param args 参数是要接受main方法得到的参数,在run中使用
     * @return
     * @throws Exception
     */
    public int run(String[] args) throws Exception {
        //通过调用this的getConf方法得到从外部传入的conf对象
        Configuration conf = this.getConf();

        Job job = Job.getInstance(conf,this.getClass().getSimpleName());

        job.setJarByClass(PVMapReduce.class);

        //输入路径
        Path inpath = new Path(args[0]);
        FileInputFormat.addInputPath(job, inpath);
        //输出路径
        Path outpath = new Path(args[1]);
        FileOutputFormat.setOutputPath(job, outpath);

        //执行前先判断输出路径是否存在,存在就将该路径删除
        FileSystem fs = outpath.getFileSystem(conf);
        if(fs.exists(outpath)){
            fs.delete(outpath,true);
        }

        //设置Map相关参数
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setMapperClass(MyPVMapper.class);

        //设置shuffle
        job.setCombinerClass(MyPVcombiner.class);

        //设置reduce相关参数
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(IntWritable.class);
        job.setReducerClass(MyPVReducer.class);

        int isSuccess =  job.waitForCompletion(true)?0:1;

        return isSuccess;
    }


    public static void main(String[] args) {
        Configuration conf = new Configuration();
        args = new String[]{
                "hdfs://hadoop01:8020/20150812",
                "hdfs://hadoop01:8020/out"
        };
        try {
            int isSucces =  ToolRunner.run(conf,new PVMapReduce(), args);
            System.out.println("isSuccess"+isSucces);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }


}

 

 

 

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值