mapreduce计数器

最新推荐文章于 2022-02-05 15:15:55 发布

16:27

最新推荐文章于 2022-02-05 15:15:55 发布

阅读量168

点赞数

分类专栏： hadoop 文章标签： mapreduce hadoop 大数据 hdfs

本文链接：https://blog.csdn.net/qq_48773627/article/details/115830969

版权

hadoop 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

计算器主要是在map和reduce阶段定义

map

 Counter counter = context.getCounter("MR_Count", "MapReduceCounter");	//第一个参数是定义名字，第二个也是定义map出现的次数
 counter.increment(1L);	//1L就是每运行一次map就加1

reduce
//自定义计算器：使用枚举

public static enum MyCount{
    REDUCE_INPUT_KEY_RECORDS,
    REDUCE_INPUT_VALUE_RECORDS
}		//分别计算key和value出现的次数

context.getCounter(MyCount.REDUCE_INPUT_KEY_RECORDS).increment(1L); 		//key出现的次数
context.getCounter(MyCount.REDUCE_INPUT_VALUE_RECORDS).increment(1L);		//value出现的次数

完整代码

SortMapper.java

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;


public class SortMapper extends Mapper<LongWritable, Text,PairWritable,Text> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        //自定义计数器
        Counter counter = context.getCounter("MR_Count", "MapReduceCounter");
        counter.increment(1L);

        //1、对每一行数据进行拆分，然后封装到PairWritable对象中，作为k2
        String[] split = value.toString().split("\t");
        PairWritable pairWritable = new PairWritable();
        pairWritable.setFirst(split[0]);
        pairWritable.setSecond(Integer.parseInt(split[1]));

        //2、将k2,v2写入到上下文中
        context.write(pairWritable,value);

    }
}

SortReducer.java


import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class SortReducer extends Reducer<PairWritable, Text,PairWritable, NullWritable>{

    //自定义计算器：使用枚举
    public static enum MyCount{
        REDUCE_INPUT_KEY_RECORDS,
        REDUCE_INPUT_VALUE_RECORDS
    }

    @Override
    protected void reduce(PairWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        //统计Reduce阶段的key的个数
        context.getCounter(MyCount.REDUCE_INPUT_KEY_RECORDS).increment(1L);
        //因为一样的k可能会形成这样的V <a 1,a 1>，所以要遍历一次写一次k，这样就不会漏了
        for (Text value : values) {
            //统计Reduce阶段的Value的个数
            context.getCounter(MyCount.REDUCE_INPUT_VALUE_RECORDS).increment(1L);
            context.write(key,NullWritable.get());
        }

    }
}

PairWritable.java


import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class PairWritable implements WritableComparable<PairWritable> {

    //first是左边的单词     string a  b  a  b  a
    //second是右边的单词    int    1  1  9  2  3
    private String first;
    private int second;

    //实现排序规则
    @Override
    public int compareTo(PairWritable other) {

        //先比较first,如果first相同，在比较second
        int result = this.first.compareTo(other.first);   //单词比较，如果a 大于 b就返回大于0，否则返回小于0，
        // 用奥斯卡码计算

        //如果first单词相等则比较second的单词
        if (result == 0){
            //因为second是int值所以可以直接相减返回int值给比较器
            return this.second - other.second;
        }

        //如果result不是0那就返回int值给比较强
        return result;
    }

    //实现序列化
    @Override
    public void write(DataOutput dataOutput) throws IOException {

        dataOutput.writeUTF(first);
        dataOutput.writeInt(second);


    }

    //实现反序列化
    @Override
    public void readFields(DataInput dataInput) throws IOException {

        this.first = dataInput.readUTF();
        this.second = dataInput.readInt();
    }

    public String getFirst() {
        return first;
    }

    public void setFirst(String first) {
        this.first = first;
    }

    public int getSecond() {
        return second;
    }

    public void setSecond(int second) {
        this.second = second;
    }

    @Override
    public String toString() {
        return  first + '\t' + second ;
    }
}

JobMain.java

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class JobMain extends Configured implements Tool {
    @Override
    public int run(String[] strings) throws Exception {

        Job job = Job.getInstance(super.getConf(),"mapreduce_sort");
        job.setJarByClass(JobMain.class);

        //输入
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://slave1:9000/sort"));

        //map
        job.setMapperClass(SortMapper.class);
        job.setMapOutputKeyClass(PairWritable.class);
        job.setMapOutputValueClass(Text.class);

        //reduce
        job.setReducerClass(SortReducer.class);
        job.setOutputKeyClass(PairWritable.class);
        job.setOutputValueClass(NullWritable.class);

        //输出
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("hdfs://slave1:9000/outsort"));

        boolean b = job.waitForCompletion(true);
        return b?0:1;
    }

    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        int run = ToolRunner.run(configuration,new JobMain(),args);
        System.exit(run);
    }
}

16:27

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
mapreduce计数器

计算器主要是在map和reduce阶段定义map Counter counter = context.getCounter("MR_Count", "MapReduceCounter"); //第一个参数是定义名字，第二个也是定义map出现的次数 counter.increment(1L); //1L就是每运行一次map就加1reduce//自定义计算器：使用枚举public static enum MyCount{ REDUCE_INPUT_KEY_RECORDS, REDU
复制链接

扫一扫