MapReduce功能实现九---Pv、Uv

最新推荐文章于 2023-01-28 18:55:02 发布

小强签名设计

最新推荐文章于 2023-01-28 18:55:02 发布

阅读量3.3k

点赞数 1

分类专栏： hadoop MapReduce功能实现文章标签： mapreduce Pv Uv

本文链接：https://blog.csdn.net/m0_37739193/article/details/76566866

版权

hadoop 同时被 2 个专栏收录

20 篇文章 4 订阅

订阅专栏

MapReduce功能实现

11 篇文章 3 订阅

订阅专栏

前言：这里给出的是hadoop1版本的代码，如果你把前面的知识都掌握了的话，我相信你自己也可以写出hadoop2版本的代码来。

一、Pv：

[hadoop@h71 q1]$ vi ip.txt
192.168.1.1
192.168.2.2
192.168.3.3
192.168.2.2
1.1.1.1
[hadoop@h71 q1]$ hadoop fs -put ip.txt /input

java代码：

import java.io.IOException;
import java.util.Iterator;
 
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
 
public class IpPv {
 
    public static class IpPvUvMap extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
        @Override
        public void map(LongWritable longWritable, Text text, OutputCollector<Text, Text>
                outputCollector, Reporter reporter) throws IOException {
            String ip = text.toString().split(" ", 5)[0];
            outputCollector.collect(new Text("pv"), new Text("1"));
        }
    }
 
    public static class IpPvUvReduce extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
        @Override
        public void reduce(Text key, Iterator<Text> iterator, OutputCollector<Text, Text>
                outputCollector, Reporter reporter) throws IOException {
            long sum = 0;
            while(iterator.hasNext()){
                sum = sum + Long.parseLong(iterator.next().toString());
            }
            outputCollector.collect(new Text("pv"), new Text(String.valueOf(sum)));
        }
    }
 
    public static void main(String [] args) throws IOException {
        System.out.println(args.length);
        if(args.length < 2){
            System.out.println("args not right!");
            return ;
        }
        JobConf conf = new JobConf(IpPv.class);
        //set output key class
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);
 
        //set mapper & reducer class
        conf.setMapperClass(IpPvUvMap.class);
        conf.setCombinerClass(IpPvUvReduce.class);
        conf.setReducerClass(IpPvUvReduce.class);
 
        // set format
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
 
        String inputDir = args[0];
        String outputDir = args[1];
 
       // FileInputFormat.setInputPaths(conf, "/user/hadoop/input/");
        FileInputFormat.setInputPaths(conf, inputDir);
        FileOutputFormat.setOutputPath(conf, new Path(outputDir));
        boolean flag = JobClient.runJob(conf).isSuccessful();
    }
}

执行：

[hadoop@h71 q1]$ /usr/jdk1.7.0_25/bin/javac IpPv.java
[hadoop@h71 q1]$ /usr/jdk1.7.0_25/bin/jar cvf xx.jar IpPv*class
[hadoop@h71 q1]$ hadoop jar xx.jar IpPv /input/ip.txt /output

查看结果：

[hadoop@h71 q1]$ hadoop fs -cat /output/part-00000
pv      5

OutputCollector和Reporter是Hadoop-0.19以前版本里面的API，在Hadoop-0.20.2以后就换成Context，Context的功能包含了OutputCollector和Reporter的功能。

我们还可以在代码中进行设置来自定义 key/value 输出分隔符：在主函数中添加如下一行代码：
conf.set(“mapred.textoutputformat.separator”, “;”); //此处以”;“作为分割符
输出结果： pv;5

二、Uv：

[hadoop@h71 q1]$ vi ii.txt 
1.1.1.1 a
2.2.2.2 a
1.1.1.1 b
1.1.1.1 c
2.2.2.2 f
3.3.3.3 gg
[hadoop@h71 q1]$ hadoop fs -put ii.txt /input

java代码：

import java.io.IOException;
import java.util.Iterator;
 
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
 
/**
 * 在reduce端直接计算Uv
 */
public class IpUv {
 
    public static  class IpUvMapper1 extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
        @Override
        public void map(LongWritable longWritable, Text text, 
        		OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
            String ip = text.toString().split(" ", 5)[0];
            outputCollector.collect(new Text(ip.trim()), new Text("1"));
        }
    }
 
    public static class IpUvReducer1 extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
        @Override
        public void reduce(Text text, Iterator<Text> iterator, 
        		OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
            outputCollector.collect(text, new Text("1"));
        }
    }
 
    public static class IpUvMapper2 extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text>{
        public void map(LongWritable longWritable, Text text, 
        		OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
            String ip = text.toString().split("\t")[0];
            outputCollector.collect(new Text("uv"), new Text("1"));
        }
    }
 
    public static class IpUvReducer2 extends MapReduceBase implements Reducer<Text, Text, Text, Text>{
        @Override
        public void reduce(Text text, Iterator<Text> iterator, 
        		OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
            long sum = 0;
            /**
             * uv, [1,1,1,1,1,1]
             */
            while(iterator.hasNext()){
                sum = sum + Long.parseLong(iterator.next().toString());
            }
            outputCollector.collect(new Text("uv"), new Text(String.valueOf(sum)));
        }
    }
 
    public static void main(String [] args) throws IOException {
        if(args.length < 2){
            System.out.println("args not right!");
            return ;
        }
        JobConf conf = new JobConf(IpUv.class);
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);
 
        //set mapper & reducer class
        conf.setMapperClass(IpUvMapper1.class);
        conf.setReducerClass(IpUvReducer1.class);
 
        // set format
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
 
        String inputDir = args[0];
        String outputDir = args[1];
 
        // FileInputFormat.setInputPaths(conf, "/user/hadoop/rongxin/locationinput/");
        FileInputFormat.setInputPaths(conf, inputDir);
        FileOutputFormat.setOutputPath(conf, new Path(outputDir));
        boolean flag = JobClient.runJob(conf).isSuccessful();
 
        if(flag){
            JobConf conf1 = new JobConf(IpUv.class);
            conf1.setOutputKeyClass(Text.class);
            conf1.setOutputValueClass(Text.class);
 
            //set mapper & reducer class
            conf1.setMapperClass(IpUvMapper2.class);
            conf1.setReducerClass(IpUvReducer2.class);
 
            // set format
            conf1.setInputFormat(TextInputFormat.class);
            conf1.setOutputFormat(TextOutputFormat.class);
 
            // FileInputFormat.setInputPaths(conf, "/user/hadoop/rongxin/locationinput/");
            FileInputFormat.setInputPaths(conf1, outputDir);
            FileOutputFormat.setOutputPath(conf1, new Path(outputDir + "-2"));
            boolean flag1 = JobClient.runJob(conf1).isSuccessful();
            System.out.println(flag1);
        }
    }
}

执行：

[hadoop@h71 q1]$ /usr/jdk1.7.0_25/bin/javac IpUv.java 
[hadoop@h71 q1]$ /usr/jdk1.7.0_25/bin/jar cvf xx.jar IpUv*class
[hadoop@h71 q1]$ hadoop jar xx.jar IpUv /input/ii.txt /output

查看结果：

[hadoop@h71 q1]$ hadoop fs -cat /output/part-00000
1.1.1.1 1
2.2.2.2 1
3.3.3.3 1
[hadoop@h71 q1]$ hadoop fs -cat /output-2/part-00000
uv      3

小强签名设计

关注

1
点赞
踩
10

收藏

觉得还不错? 一键收藏
打赏
0
评论
MapReduce功能实现九---Pv、Uv

前言：这里给出的是hadoop1版本的代码，如果你把前面的知识都掌握了的话，我相信你自己也可以写出hadoop2版本的代码来。一、Pv[hadoop@h71 q1]$ vi ip.txt192.168.1.1192.168.2.2192.168.3.3192.168.2.21.1.1.1[hadoop@h71 q1]$ hadoop fs -put ip.t
复制链接

扫一扫