java hadoop makefile_为Hadoop的MapReduce程序编写makefile

附录1:WordCount1.java和CounterThread.java的代码

//WordCount1.java代码

package mypackage;

import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.Hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount1 {

public static class TokenizerMapper extends Mapper{

private final static IntWritable one = new IntWritable(1);  //建立"int"型变量one,初值为1

private Text word = new Text();                            //建立"string:型变量 word,用于接收传入的单词

public void map(Object key, Text value, Context context

) throws IOException, InterruptedException {

StringTokenizer itr = new StringTokenizer(value.toString());  //将输入的文本按行分段

while (itr.hasMoreTokens()) {

word.set(itr.nextToken());                                  //为word赋值

context.write(word, one);                                  // 将 键-值 对 word one 传入

}

//System.out.println("read lines:"+context.getCounter("org.apache.hadoop.mapred.Task$Counter","MAP_INPUT_RECORDS").getValue());

//System.out.println( "输入行数:" + context.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );

//System.out.println( "输入行数:" + context.getCounters().findCounter("", "MAP_INPUT_RECORDS").getValue() );

}

}

public static class IntSumReducer

extends Reducer {

private IntWritable result = new IntWritable();                //创建整型变量result

public void reduce(Text key, Iterable values,

Context context

) throws IOException, InterruptedException {

int sum = 0;                                                //创建int 型变量sum 初值0

for (IntWritable val : values) {

sum += val.get();                                          //将每个key对应的所有value类间

}

result.set(sum);                                              //sum传入result

context.write(key, result);                                  //将 key-result对传入

}

}

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

//String[] newArgs = new String[]{"hdfs://localhost:9000/data/tmpfile","hdfs://localhost:9000/data/wc_output"};

String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

if (otherArgs.length != 2) {

System.err.println("Usage: wordcount ");

System.exit(2);

}

Job job = new Job(conf, "WordCount1");                  //建立新job

job.setJarByClass(WordCount1.class);

job.setMapperClass(TokenizerMapper.class);              //设置map类

job.setCombinerClass(IntSumReducer.class);              //设置combiner类

job.setReducerClass(IntSumReducer.class);              //设置reducer类

job.setOutputKeyClass(Text.class);                      //输出的key类型

job.setOutputValueClass(IntWritable.class);              //输出的value类型

FileInputFormat.addInputPath(job, new Path(otherArgs[0]));  //输入输出参数(在设置中指定)

FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

CounterThread ct = new CounterThread(job);

ct.start();

job.waitForCompletion(true);

System.exit(0);

//System.exit(job.waitForCompletion(true) ? 0 : 1);

}

}

//CounterThread.java的代码

package mypackage;

import java.lang.*;

import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.JobStatus;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class CounterThread extends Thread{

public CounterThread(Job job) {

_job = job;

}

public void run() {

while(true){

try {

Thread.sleep(1000*5);

} catch (InterruptedException e1) {

// TODO Auto-generated catch block

e1.printStackTrace();

}

try {

if(_job.getStatus().getState() == JobStatus.State.RUNNING)

//continue;

System.out.println( "输入行数:" + _job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (InterruptedException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

}

}

private Job _job;

}

0b1331709591d260c1c78e86d0c51c18.png

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值