mapper程序
public static class Map extends Mapper<Object, Text, Text, Text>{
private Text word = new Text();
private Text ips = new Text();
@Override
protected void map(Object key, Text value,
Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
String line = value.toString();
KPI kpi = KPI.filterIPs(line);
if(kpi.isValid()){
String request = kpi.getRequest();
String remote_addr = kpi.getRemote_addr();
word.set(request);
ips.set(remote_addr);
context.write(word, ips);
}else{
context.getCounter(Counter.LINESKIP).increment(1);
}
}
}
Reducer程序
public static class Reduce extends Reducer<Text, Text, Text, LongWritable>{
@Override
protected void reduce(Text key, Iterable<Text> values,
Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
java.util.Map<String,Integer> map = new HashMap<>();
for (Text text : values) {
map.put(text.toString(), 1);
}
context.write(key, new LongWritable(map.size()));
}
}
main函数
@Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
INPUT_PATH=args[0];
OUTPUT_PATH=args[1];
Configuration conf = new Configuration();
existsFile(conf);
Job job = new Job(conf, "KPIIPS");
job.setJarByClass(KPIIPS.class);
FileInputFormat.addInputPath(job, new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.waitForCompletion(true);
Counters counters=job.getCounters();
org.apache.hadoop.mapreduce.Counter counter=counters.findCounter(Counter.LINESKIP);//查找计数器
long value=counter.getValue();
System.err.println("跳过了--------::::"+value);
return job.isSuccessful()?0:1;
}
public static void main(String[] args) throws Exception {
int run = ToolRunner.run(new Configuration(), new KPIIPS(), args);
System.out.println(run);
}
public static String INPUT_PATH = "";
public static String OUTPUT_PATH = "";
enum Counter{
LINESKIP,
}
执行时出现下面异常
14/10/07 19:51:18 WARN mapred.LocalJobRunner: job_local_0001
java.io.IOException: Type mismatch in value from map: expected org.apache.hadoop.io.LongWritable, recieved org.apache.hadoop.io.Text
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1019)
at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:691)
at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
at hadoop.kpi.KPIIPS$Map.map(KPIIPS.java:109)
at hadoop.kpi.KPIIPS$Map.map(KPIIPS.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:214)
14/10/07 19:51:19 INFO mapred.JobClient: map 0% reduce 0%
14/10/07 19:51:19 INFO mapred.JobClient: Job complete: job_local_0001
14/10/07 19:51:19 INFO mapred.JobClient: Counters: 0
解决方法:
设置map的输出key和value的类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);