mapreduce计数器

计算器主要是在map和reduce阶段定义

map

 Counter counter = context.getCounter("MR_Count", "MapReduceCounter");	//第一个参数是定义名字,第二个也是定义map出现的次数
 counter.increment(1L);	//1L就是每运行一次map就加1

reduce
//自定义计算器:使用枚举

public static enum MyCount{
    REDUCE_INPUT_KEY_RECORDS,
    REDUCE_INPUT_VALUE_RECORDS
}		//分别计算key和value出现的次数

context.getCounter(MyCount.REDUCE_INPUT_KEY_RECORDS).increment(1L); 		//key出现的次数
context.getCounter(MyCount.REDUCE_INPUT_VALUE_RECORDS).increment(1L);		//value出现的次数

完整代码

SortMapper.java

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;


public class SortMapper extends Mapper<LongWritable, Text,PairWritable,Text> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        //自定义计数器
        Counter counter = context.getCounter("MR_Count", "MapReduceCounter");
        counter.increment(1L);

        //1、对每一行数据进行拆分,然后封装到PairWritable对象中,作为k2
        String[] split = value.toString().split("\t");
        PairWritable pairWritable = new PairWritable();
        pairWritable.setFirst(split[0]);
        pairWritable.setSecond(Integer.parseInt(split[1]));

        //2、将k2,v2写入到上下文中
        context.write(pairWritable,value);

    }
}

SortReducer.java


import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class SortReducer extends Reducer<PairWritable, Text,PairWritable, NullWritable>{

    //自定义计算器:使用枚举
    public static enum MyCount{
        REDUCE_INPUT_KEY_RECORDS,
        REDUCE_INPUT_VALUE_RECORDS
    }

    @Override
    protected void reduce(PairWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        //统计Reduce阶段的key的个数
        context.getCounter(MyCount.REDUCE_INPUT_KEY_RECORDS).increment(1L);
        //因为一样的k可能会形成这样的V <a 1,a 1>,所以要遍历一次写一次k,这样就不会漏了
        for (Text value : values) {
            //统计Reduce阶段的Value的个数
            context.getCounter(MyCount.REDUCE_INPUT_VALUE_RECORDS).increment(1L);
            context.write(key,NullWritable.get());
        }

    }
}

PairWritable.java


import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class PairWritable implements WritableComparable<PairWritable> {

    //first是左边的单词     string a  b  a  b  a
    //second是右边的单词    int    1  1  9  2  3
    private String first;
    private int second;

    //实现排序规则
    @Override
    public int compareTo(PairWritable other) {

        //先比较first,如果first相同,在比较second
        int result = this.first.compareTo(other.first);   //单词比较,如果a 大于 b就返回大于0,否则返回小于0,
        // 用奥斯卡码计算

        //如果first单词相等则比较second的单词
        if (result == 0){
            //因为second是int值所以可以直接相减返回int值给比较器
            return this.second - other.second;
        }

        //如果result不是0那就返回int值给比较强
        return result;
    }

    //实现序列化
    @Override
    public void write(DataOutput dataOutput) throws IOException {

        dataOutput.writeUTF(first);
        dataOutput.writeInt(second);


    }

    //实现反序列化
    @Override
    public void readFields(DataInput dataInput) throws IOException {

        this.first = dataInput.readUTF();
        this.second = dataInput.readInt();
    }

    public String getFirst() {
        return first;
    }

    public void setFirst(String first) {
        this.first = first;
    }

    public int getSecond() {
        return second;
    }

    public void setSecond(int second) {
        this.second = second;
    }

    @Override
    public String toString() {
        return  first + '\t' + second ;
    }
}

JobMain.java

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class JobMain extends Configured implements Tool {
    @Override
    public int run(String[] strings) throws Exception {

        Job job = Job.getInstance(super.getConf(),"mapreduce_sort");
        job.setJarByClass(JobMain.class);

        //输入
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://slave1:9000/sort"));

        //map
        job.setMapperClass(SortMapper.class);
        job.setMapOutputKeyClass(PairWritable.class);
        job.setMapOutputValueClass(Text.class);

        //reduce
        job.setReducerClass(SortReducer.class);
        job.setOutputKeyClass(PairWritable.class);
        job.setOutputValueClass(NullWritable.class);

        //输出
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("hdfs://slave1:9000/outsort"));

        boolean b = job.waitForCompletion(true);
        return b?0:1;
    }

    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        int run = ToolRunner.run(configuration,new JobMain(),args);
        System.exit(run);
    }
}

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值