1、Counters的作用
可以用来记录处理了多少调数据,错误数据有多少条等等
2、原理解析
在map端的setup方法和reduce端的setup方法里都有一个参数Context,通过他可以生成多个counter计数器,直接调用其increment(1);进行计数,程序结束时会在控制台打印
3、代码
-
1、CounterMapper.class
import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /** * [@Author](https://my.oschina.net/arthor) liufu */ public class CounterMapper extends Mapper<LongWritable, Text, LongWritable, Text>{ Counter secuss = null; Counter fail = null; [@Override](https://my.oschina.net/u/1162528) protected void setup(Context context) throws IOException, InterruptedException { secuss = context.getCounter("CounterTest", "secuss"); fail = context.getCounter("CounterTest", "fail"); } [@Override](https://my.oschina.net/u/1162528) protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] fields = line.split("\t"); try { long age = Long.parseLong(fields[0]); } catch (Exception e){ fail.increment(1); context.write(key, value); return; } secuss.increment(1); context.write(key, value); } [@Override](https://my.oschina.net/u/1162528) protected void cleanup(Context context) throws IOException, InterruptedException { secuss = null; fail = null; } }
-
2、CounterReducer.class
import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; /** * [@Author](https://my.oschina.net/arthor) liufu */ public class CounterReducer extends Reducer<LongWritable, Text, Text, Text> { Counter secuss = null; Counter fail = null; @Override protected void setup(Context context) throws IOException, InterruptedException { secuss = context.getCounter("CounterTest", "secuss"); fail = context.getCounter("CounterTest", "fail"); } @Override protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { String line = value.toString(); String[] fields = line.split("\t"); try { long age = Long.parseLong(fields[0]); } catch (Exception e) { fail.increment(1); context.write(value, value); return; } secuss.increment(1); context.write(value, value); } } @Override protected void cleanup(Context context) throws IOException, InterruptedException { secuss = null; fail = null; } }
-
3、CounterTestRun.class
import com.bigdata.surfilter.JoinInMapper.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; /** * @Author liufu */ public class CounterTestRun { public static void main(String[] args) throws IOException, URISyntaxException { Configuration conf = new Configuration(); Job job = new Job(conf, "joinInMapper"); //通过classpath中主类找到jar job.setJarByClass(CounterTestRun.class); //job的map端和reduce端代码 job.setMapperClass(CounterMapper.class); job.setReducerClass(CounterReducer.class); //设置map端和reduce输出的类型,这样才能够做反射得到对应的类 job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //job 如何读取数据,如何写出数据 job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); //job 的数据从哪里来; 绑定输入目录,可以使用setInputPaths, 也可以使用 addInputPaths FileInputFormat.setInputPaths(job, new Path("/counter/input1/"),new Path("/counter/input2/")); //写到哪里去 FileOutputFormat.setOutputPath(job, new Path("/counter/output/")); try { boolean b = job.waitForCompletion(true); System.exit(b == true ? 0 : 1); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } }