计算器主要是在map和reduce阶段定义
map
Counter counter = context.getCounter("MR_Count", "MapReduceCounter"); //第一个参数是定义名字,第二个也是定义map出现的次数
counter.increment(1L); //1L就是每运行一次map就加1
reduce
//自定义计算器:使用枚举
public static enum MyCount{
REDUCE_INPUT_KEY_RECORDS,
REDUCE_INPUT_VALUE_RECORDS
} //分别计算key和value出现的次数
context.getCounter(MyCount.REDUCE_INPUT_KEY_RECORDS).increment(1L); //key出现的次数
context.getCounter(MyCount.REDUCE_INPUT_VALUE_RECORDS).increment(1L); //value出现的次数
完整代码
SortMapper.java
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class SortMapper extends Mapper<LongWritable, Text,PairWritable,Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//自定义计数器
Counter counter = context.getCounter("MR_Count", "MapReduceCounter");
counter.increment(1L);
//1、对每一行数据进行拆分,然后封装到PairWritable对象中,作为k2
String[] split = value.toString().split("\t");
PairWritable pairWritable = new PairWritable();
pairWritable.setFirst(split[0]);
pairWritable.setSecond(Integer.parseInt(split[1]));
//2、将k2,v2写入到上下文中
context.write(pairWritable,value);
}
}
SortReducer.java
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class SortReducer extends Reducer<PairWritable, Text,PairWritable, NullWritable>{
//自定义计算器:使用枚举
public static enum MyCount{
REDUCE_INPUT_KEY_RECORDS,
REDUCE_INPUT_VALUE_RECORDS
}
@Override
protected void reduce(PairWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
//统计Reduce阶段的key的个数
context.getCounter(MyCount.REDUCE_INPUT_KEY_RECORDS).increment(1L);
//因为一样的k可能会形成这样的V <a 1,a 1>,所以要遍历一次写一次k,这样就不会漏了
for (Text value : values) {
//统计Reduce阶段的Value的个数
context.getCounter(MyCount.REDUCE_INPUT_VALUE_RECORDS).increment(1L);
context.write(key,NullWritable.get());
}
}
}
PairWritable.java
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class PairWritable implements WritableComparable<PairWritable> {
//first是左边的单词 string a b a b a
//second是右边的单词 int 1 1 9 2 3
private String first;
private int second;
//实现排序规则
@Override
public int compareTo(PairWritable other) {
//先比较first,如果first相同,在比较second
int result = this.first.compareTo(other.first); //单词比较,如果a 大于 b就返回大于0,否则返回小于0,
// 用奥斯卡码计算
//如果first单词相等则比较second的单词
if (result == 0){
//因为second是int值所以可以直接相减返回int值给比较器
return this.second - other.second;
}
//如果result不是0那就返回int值给比较强
return result;
}
//实现序列化
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(first);
dataOutput.writeInt(second);
}
//实现反序列化
@Override
public void readFields(DataInput dataInput) throws IOException {
this.first = dataInput.readUTF();
this.second = dataInput.readInt();
}
public String getFirst() {
return first;
}
public void setFirst(String first) {
this.first = first;
}
public int getSecond() {
return second;
}
public void setSecond(int second) {
this.second = second;
}
@Override
public String toString() {
return first + '\t' + second ;
}
}
JobMain.java
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class JobMain extends Configured implements Tool {
@Override
public int run(String[] strings) throws Exception {
Job job = Job.getInstance(super.getConf(),"mapreduce_sort");
job.setJarByClass(JobMain.class);
//输入
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("hdfs://slave1:9000/sort"));
//map
job.setMapperClass(SortMapper.class);
job.setMapOutputKeyClass(PairWritable.class);
job.setMapOutputValueClass(Text.class);
//reduce
job.setReducerClass(SortReducer.class);
job.setOutputKeyClass(PairWritable.class);
job.setOutputValueClass(NullWritable.class);
//输出
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job,new Path("hdfs://slave1:9000/outsort"));
boolean b = job.waitForCompletion(true);
return b?0:1;
}
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration,new JobMain(),args);
System.exit(run);
}
}