附录1:WordCount1.java和CounterThread.java的代码
//WordCount1.java代码
package mypackage;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.Hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount1 {
public static class TokenizerMapper extends Mapper{
private final static IntWritable one = new IntWritable(1); //建立"int"型变量one,初值为1
private Text word = new Text(); //建立"string:型变量 word,用于接收传入的单词
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString()); //将输入的文本按行分段
while (itr.hasMoreTokens()) {
word.set(itr.nextToken()); //为word赋值
context.write(word, one); // 将 键-值 对 word one 传入
}
//System.out.println("read lines:"+context.getCounter("org.apache.hadoop.mapred.Task$Counter","MAP_INPUT_RECORDS").getValue());
//System.out.println( "输入行数:" + context.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );
//System.out.println( "输入行数:" + context.getCounters().findCounter("", "MAP_INPUT_RECORDS").getValue() );
}
}
public static class IntSumReducer
extends Reducer {
private IntWritable result = new IntWritable(); //创建整型变量result
public void reduce(Text key, Iterable values,
Context context
) throws IOException, InterruptedException {
int sum = 0; //创建int 型变量sum 初值0
for (IntWritable val : values) {
sum += val.get(); //将每个key对应的所有value类间
}
result.set(sum); //sum传入result
context.write(key, result); //将 key-result对传入
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//String[] newArgs = new String[]{"hdfs://localhost:9000/data/tmpfile","hdfs://localhost:9000/data/wc_output"};
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount ");
System.exit(2);
}
Job job = new Job(conf, "WordCount1"); //建立新job
job.setJarByClass(WordCount1.class);
job.setMapperClass(TokenizerMapper.class); //设置map类
job.setCombinerClass(IntSumReducer.class); //设置combiner类
job.setReducerClass(IntSumReducer.class); //设置reducer类
job.setOutputKeyClass(Text.class); //输出的key类型
job.setOutputValueClass(IntWritable.class); //输出的value类型
FileInputFormat.addInputPath(job, new Path(otherArgs[0])); //输入输出参数(在设置中指定)
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
CounterThread ct = new CounterThread(job);
ct.start();
job.waitForCompletion(true);
System.exit(0);
//System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
//CounterThread.java的代码
package mypackage;
import java.lang.*;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class CounterThread extends Thread{
public CounterThread(Job job) {
_job = job;
}
public void run() {
while(true){
try {
Thread.sleep(1000*5);
} catch (InterruptedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
if(_job.getStatus().getState() == JobStatus.State.RUNNING)
//continue;
System.out.println( "输入行数:" + _job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
private Job _job;
}