MapReduce功能实现九---Pv、Uv

MapReduce功能实现系列
      MapReduce功能实现一—Hbase和Hdfs之间数据相互转换
      MapReduce功能实现二—排序
      MapReduce功能实现三—Top N
      MapReduce功能实现四—小综合(从hbase中读取数据统计并在hdfs中降序输出Top 3)
      MapReduce功能实现五—去重(Distinct)、计数(Count)
      MapReduce功能实现六—最大值(Max)、求和(Sum)、平均值(Avg)
      MapReduce功能实现七—小综合(多个job串行处理计算平均值)
      MapReduce功能实现八—分区(Partition)
      MapReduce功能实现九—Pv、Uv
      MapReduce功能实现十—倒排索引(Inverted Index)
      MapReduce功能实现十一—join
 

前言:这里给出的是hadoop1版本的代码,如果你把前面的知识都掌握了的话,我相信你自己也可以写出hadoop2版本的代码来。

一、Pv:

[hadoop@h71 q1]$ vi ip.txt
192.168.1.1
192.168.2.2
192.168.3.3
192.168.2.2
1.1.1.1
[hadoop@h71 q1]$ hadoop fs -put ip.txt /input

java代码:

import java.io.IOException;
import java.util.Iterator;
 
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
 
public class IpPv {
 
    public static class IpPvUvMap extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
        @Override
        public void map(LongWritable longWritable, Text text, OutputCollector<Text, Text>
                outputCollector, Reporter reporter) throws IOException {
            String ip = text.toString().split(" ", 5)[0];
            outputCollector.collect(new Text("pv"), new Text("1"));
        }
    }
 
    public static class IpPvUvReduce extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
        @Override
        public void reduce(Text key, Iterator<Text> iterator, OutputCollector<Text, Text>
                outputCollector, Reporter reporter) throws IOException {
            long sum = 0;
            while(iterator.hasNext()){
                sum = sum + Long.parseLong(iterator.next().toString());
            }
            outputCollector.collect(new Text("pv"), new Text(String.valueOf(sum)));
        }
    }
 
    public static void main(String [] args) throws IOException {
        System.out.println(args.length);
        if(args.length < 2){
            System.out.println("args not right!");
            return ;
        }
        JobConf conf = new JobConf(IpPv.class);
        //set output key class
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);
 
        //set mapper & reducer class
        conf.setMapperClass(IpPvUvMap.class);
        conf.setCombinerClass(IpPvUvReduce.class);
        conf.setReducerClass(IpPvUvReduce.class);
 
        // set format
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
 
        String inputDir = args[0];
        String outputDir = args[1];
 
       // FileInputFormat.setInputPaths(conf, "/user/hadoop/input/");
        FileInputFormat.setInputPaths(conf, inputDir);
        FileOutputFormat.setOutputPath(conf, new Path(outputDir));
        boolean flag = JobClient.runJob(conf).isSuccessful();
    }
}

执行:

[hadoop@h71 q1]$ /usr/jdk1.7.0_25/bin/javac IpPv.java
[hadoop@h71 q1]$ /usr/jdk1.7.0_25/bin/jar cvf xx.jar IpPv*class
[hadoop@h71 q1]$ hadoop jar xx.jar IpPv /input/ip.txt /output

查看结果:

[hadoop@h71 q1]$ hadoop fs -cat /output/part-00000
pv      5

  OutputCollector和Reporter是Hadoop-0.19以前版本里面的API,在Hadoop-0.20.2以后就换成Context,Context的功能包含了OutputCollector和Reporter的功能。

  我们还可以在代码中进行设置来自定义 key/value 输出分隔符:在主函数中添加如下一行代码:
conf.set(“mapred.textoutputformat.separator”, “;”); //此处以”;“作为分割符
输出结果: pv;5
 

二、Uv:

[hadoop@h71 q1]$ vi ii.txt 
1.1.1.1 a
2.2.2.2 a
1.1.1.1 b
1.1.1.1 c
2.2.2.2 f
3.3.3.3 gg
[hadoop@h71 q1]$ hadoop fs -put ii.txt /input

java代码:

import java.io.IOException;
import java.util.Iterator;
 
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
 
/**
 * 在reduce端直接计算Uv
 */
public class IpUv {
 
    public static  class IpUvMapper1 extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
        @Override
        public void map(LongWritable longWritable, Text text, 
        		OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
            String ip = text.toString().split(" ", 5)[0];
            outputCollector.collect(new Text(ip.trim()), new Text("1"));
        }
    }
 
    public static class IpUvReducer1 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
        @Override
        public void reduce(Text text, Iterator<Text> iterator, 
        		OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
            outputCollector.collect(text, new Text("1"));
        }
    }
 
    public static class IpUvMapper2 extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text>{
        public void map(LongWritable longWritable, Text text, 
        		OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
            String ip = text.toString().split("\t")[0];
            outputCollector.collect(new Text("uv"), new Text("1"));
        }
    }
 
    public static class IpUvReducer2 extends MapReduceBase implements Reducer<Text, Text, Text, Text>{
        @Override
        public void reduce(Text text, Iterator<Text> iterator, 
        		OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
            long sum = 0;
            /**
             * uv, [1,1,1,1,1,1]
             */
            while(iterator.hasNext()){
                sum = sum + Long.parseLong(iterator.next().toString());
            }
            outputCollector.collect(new Text("uv"), new Text(String.valueOf(sum)));
        }
    }
 
    public static void main(String [] args) throws IOException {
        if(args.length < 2){
            System.out.println("args not right!");
            return ;
        }
        JobConf conf = new JobConf(IpUv.class);
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);
 
        //set mapper & reducer class
        conf.setMapperClass(IpUvMapper1.class);
        conf.setReducerClass(IpUvReducer1.class);
 
        // set format
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
 
        String inputDir = args[0];
        String outputDir = args[1];
 
        // FileInputFormat.setInputPaths(conf, "/user/hadoop/rongxin/locationinput/");
        FileInputFormat.setInputPaths(conf, inputDir);
        FileOutputFormat.setOutputPath(conf, new Path(outputDir));
        boolean flag = JobClient.runJob(conf).isSuccessful();
 
        if(flag){
            JobConf conf1 = new JobConf(IpUv.class);
            conf1.setOutputKeyClass(Text.class);
            conf1.setOutputValueClass(Text.class);
 
            //set mapper & reducer class
            conf1.setMapperClass(IpUvMapper2.class);
            conf1.setReducerClass(IpUvReducer2.class);
 
            // set format
            conf1.setInputFormat(TextInputFormat.class);
            conf1.setOutputFormat(TextOutputFormat.class);
 
            // FileInputFormat.setInputPaths(conf, "/user/hadoop/rongxin/locationinput/");
            FileInputFormat.setInputPaths(conf1, outputDir);
            FileOutputFormat.setOutputPath(conf1, new Path(outputDir + "-2"));
            boolean flag1 = JobClient.runJob(conf1).isSuccessful();
            System.out.println(flag1);
        }
    }
}

执行:

[hadoop@h71 q1]$ /usr/jdk1.7.0_25/bin/javac IpUv.java 
[hadoop@h71 q1]$ /usr/jdk1.7.0_25/bin/jar cvf xx.jar IpUv*class
[hadoop@h71 q1]$ hadoop jar xx.jar IpUv /input/ii.txt /output

查看结果:

[hadoop@h71 q1]$ hadoop fs -cat /output/part-00000
1.1.1.1 1
2.2.2.2 1
3.3.3.3 1
[hadoop@h71 q1]$ hadoop fs -cat /output-2/part-00000
uv      3
  • 1
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小强签名设计

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值