Hadoop读书笔记系列文章:http://blog.csdn.net/caicongyang/article/category/2166855
1.MapReduce 计数器的作用
2.MapReduce自带的计数器
14/11/26 22:28:51 INFO mapred.JobClient: Counters: 19
14/11/26 22:28:51 INFO mapred.JobClient: File Output Format Counters
14/11/26 22:28:51 INFO mapred.JobClient: Bytes Written=25
14/11/26 22:28:51 INFO mapred.JobClient: FileSystemCounters
14/11/26 22:28:51 INFO mapred.JobClient: FILE_BYTES_READ=343
14/11/26 22:28:51 INFO mapred.JobClient: HDFS_BYTES_READ=42
14/11/26 22:28:51 INFO mapred.JobClient: FILE_BYTES_WRITTEN=128056
14/11/26 22:28:51 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=25
14/11/26 22:28:51 INFO mapred.JobClient: File Input Format Counters
14/11/26 22:28:51 INFO mapred.JobClient: Bytes Read=21
14/11/26 22:28:51 INFO mapred.JobClient: Map-Reduce Framework
14/11/26 22:28:51 INFO mapred.JobClient: Map output materialized bytes=47
14/11/26 22:28:51 INFO mapred.JobClient: Map input records=2
14/11/26 22:28:51 INFO mapred.JobClient: Reduce shuffle bytes=0
14/11/26 22:28:51 INFO mapred.JobClient: Spilled Records=4
14/11/26 22:28:51 INFO mapred.JobClient: Map output bytes=37
14/11/26 22:28:51 INFO mapred.JobClient: Total committed heap usage (bytes)=366034944
14/11/26 22:28:51 INFO mapred.JobClient: SPLIT_RAW_BYTES=97
14/11/26 22:28:51 INFO mapred.JobClient: Combine input records=0
14/11/26 22:28:51 INFO mapred.JobClient: Reduce input records=2
14/11/26 22:28:51 INFO mapred.JobClient: Reduce input groups=2
14/11/26 22:28:51 INFO mapred.JobClient: Combine output records=0
14/11/26 22:28:51 INFO mapred.JobClient: Reduce output records=2
14/11/26 22:28:51 INFO mapred.JobClient: Map output records=2
3.自定义计数器
package counter;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
/**
*
* <p>
* Title: WordCount.java
* Package counter
* </p>
* <p>
* Description: 自定义计数器
* <p>
* @author Tom.Cai
* @created 2014-11-26 下午10:47:32
* @version V1.0
*
*/
public class WordCount {
private static final String INPUT_PATH = "hdfs://192.168.80.100:9000/hello";
private static final String OUT_PATH = "hdfs://192.168.80.100:9000/out";
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
Path outPath = new Path(OUT_PATH);
if (fileSystem.exists(outPath)) {
fileSystem.delete(outPath, true);
}
Job job = new Job(conf, WordCount.class.getSimpleName());
FileInputFormat.setInputPaths(job, INPUT_PATH);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(MyMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setPartitionerClass(HashPartitioner.class);
job.setNumReduceTasks(1);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
job.setOutputFormatClass(TextOutputFormat.class);
job.waitForCompletion(true);
}
static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
<span style="color:#ff0000;"> /**
* 计数器的使用
*/
Counter mycounter = context.getCounter("MyCounter", "hello");
if (value.toString().contains("hello")) {
mycounter.increment(1L);
}</span>
String[] splited = value.toString().split("\t");
for (String word : splited) {
context.write(new Text(word), new LongWritable(1));
}
}
}
static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> value, Context context) throws IOException, InterruptedException {
long count = 0L;
for (LongWritable times : value) {
count += times.get();
}
context.write(key, new LongWritable(count));
}
}
}
3.自定义计数器后输出
4/11/26 22:45:38 INFO mapred.JobClient: Counters: 20
14/11/26 22:45:38 INFO mapred.JobClient: File Output Format Counters
14/11/26 22:45:38 INFO mapred.JobClient: Bytes Written=25
14/11/26 22:45:38 INFO mapred.JobClient: MyCounter
14/11/26 22:45:38 INFO mapred.JobClient: hello=2
14/11/26 22:45:38 INFO mapred.JobClient: FileSystemCounters
14/11/26 22:45:38 INFO mapred.JobClient: FILE_BYTES_READ=343
14/11/26 22:45:38 INFO mapred.JobClient: HDFS_BYTES_READ=42
14/11/26 22:45:38 INFO mapred.JobClient: FILE_BYTES_WRITTEN=128036
14/11/26 22:45:38 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=25
14/11/26 22:45:38 INFO mapred.JobClient: File Input Format Counters
14/11/26 22:45:38 INFO mapred.JobClient: Bytes Read=21
14/11/26 22:45:38 INFO mapred.JobClient: Map-Reduce Framework
14/11/26 22:45:38 INFO mapred.JobClient: Map output materialized bytes=47
14/11/26 22:45:38 INFO mapred.JobClient: Map input records=2
14/11/26 22:45:38 INFO mapred.JobClient: Reduce shuffle bytes=0
14/11/26 22:45:38 INFO mapred.JobClient: Spilled Records=4
14/11/26 22:45:38 INFO mapred.JobClient: Map output bytes=37
14/11/26 22:45:38 INFO mapred.JobClient: Total committed heap usage (bytes)=366034944
14/11/26 22:45:38 INFO mapred.JobClient: SPLIT_RAW_BYTES=97
14/11/26 22:45:38 INFO mapred.JobClient: Combine input records=0
14/11/26 22:45:38 INFO mapred.JobClient: Reduce input records=2
14/11/26 22:45:38 INFO mapred.JobClient: Reduce input groups=2
14/11/26 22:45:38 INFO mapred.JobClient: Combine output records=0
14/11/26 22:45:38 INFO mapred.JobClient: Reduce output records=2
14/11/26 22:45:38 INFO mapred.JobClient: Map output records=2
欢迎大家一起讨论学习!
有用的自己收!
记录与分享,让你我共成长!欢迎查看我的其他博客;我的博客地址:http://blog.csdn.net/caicongyang