1.hadoop的counter可以是用枚举和字符串两种方式定义
枚举:
//先定义枚举
public static enum LOG_COUNTER{
BAD_RECORDS;
}
//使用counter
Counter counter = context.getCounter(LOG_COUNTER.BAD_RECORDS);
counter.increment(1);
字符串
Counter counter2 = context.getCounter("log_counter", "bad_records");
counter2.increment(1);
综合demo如下
mapreduce程序
package com.lijie.conter;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class CounterMapReduce extends Configured implements Tool{
public static void main(String[] args) throws Exception {
String[] args1 = {"hdfs://lijie:9000/middle/*","hdfs://lijie:9000/middle/out"};
int run = ToolRunner.run(new Configuration(), new CounterMapReduce(), args1);
System.exit(run);
}
public static enum LOG_COUNTER{
BAD_RECORDS;
}
public static class CounterMap extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map( LongWritable key, Text value,
Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException,
InterruptedException {
List<String> list = ParseTVData.transData(value.toString());
if(list == null || list.size() == 0){
Counter counter = context.getCounter(LOG_COUNTER.BAD_RECORDS);
counter.increment(1);
Counter counter2 = context.getCounter("log_counter", "bad_records");
counter2.increment(1);
}else{
for (String string : list) {
context.write(new Text(string), new Text(""));
}
}
}
}
@Override
public int run(String[] arg0) throws Exception {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
Path path = new Path(arg0[1]);
FileSystem fs = path.getFileSystem(conf);
if(fs.isDirectory(path)){
fs.delete(path, true);
}
Job job = new Job(conf,"counter");
job.setJarByClass(CounterMapReduce.class);
FileInputFormat.addInputPath(job, new Path(arg0[0]));
FileOutputFormat.setOutputPath(job, path);
job.setMapperClass(CounterMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//yasuo
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
job.waitForCompletion(true);
return 0;
}
}
其中ParseTVData.transData(value.toString());只是对json的解析具体如下:
package com.lijie.conter;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ParseTVData {
public static List< String> transData(String text) {
List< String> list = new ArrayList< String>();
Document doc;
String rec = "";
try {
doc = Jsoup.parse(text);//jsoup解析数据
Elements content = doc.getElementsByTag("WIC");
String num = content.get(0).attr("cardNum");//记录编号
if(num==null||num.equals("")){
num=" ";
}
String stbNum = content.get(0).attr("stbNum");//机顶盒号
if(stbNum.equals("")){
return list;
}
String date = content.get(0).attr("date");//日期
Elements els = doc.getElementsByTag("A");
if (els.isEmpty()) {
return list;
}
for (Element el : els) {
String e = el.attr("e");//结束时间
String s = el.attr("s");//开始时间
String sn = el.attr("sn");//频道名称
rec = stbNum + "@" + date + "@" + sn + "@" + s+ "@" + e ;
list.add(rec);
}
} catch (Exception e) {
System.out.println(e.getMessage());
return list;
}
return list;
}
}