紧接Hello Hadoop,这一次尝试实现Mapper And Reduce.
环境接上,唯一改变的是类:
实现类:
package org.aheroboy.hadoop;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class HelloMapperAndReduce {
public static void main(String[] args) throws Exception{
//hadoop will load default configuration files:core-default.xml,core-site.xml
Configuration conf = new Configuration();
//use for parsing parameters from command line.
System.out.println("Parameters: " + args);
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: HelloWorld <Input Folder> <Output Folder>");
System.exit(2);
}
//Create a job so hadoop can run it as a job.
Job job = new Job(conf, "Hello World!");
job.setJarByClass(HelloMapperAndReduce.class);
job.setMapperClass(HelloMapper.class);
job.setCombinerClass(HelloReduce.class);
job.setReducerClass(HelloReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static class HelloMapper extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
System.out.println("Hello Mapper!");
}
}
}
public static class HelloReduce extends Reducer<Text,IntWritable,Text,IntWritable>{
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
System.out.println("Hello Reduce!");
}
}
}
准备数据:
hadoop@foreveryy:~$ find -maxdepth 2 >words.txt
hadoop@foreveryy:~$ ls -ltr
total 1
-rw-rw-r-- 1 hadoop hadoop 4757 Nov 24 05:37 words.txt
hadoop@foreveryy:~$ hadoop fs -copyFromLocal words.txt /user/hadoop/input/words.txt
hadoop@foreveryy:~$ hadoop fs -ls input
Found 1 items
-rw-r--r-- 3 hadoop supergroup 4757 2012-11-24 05:38 /user/hadoop/input/words.txt
hadoop@foreveryy:~$ hadoop fs -ls
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2012-11-24 05:38 /user/hadoop/input
运行Job:
hadoop@foreveryy:~/tmp$ hadoop jar HelloWrold_V2.jar input output
Parameters: [Ljava.lang.String;@30c221
12/11/24 06:09:28 INFO input.FileInputFormat listStatus(FileInputFormat.java:237): Total input paths to process : 2
12/11/24 06:09:28 WARN util.NativeCodeLoader <clinit>(NativeCodeLoader.java:52): Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
12/11/24 06:09:28 WARN snappy.LoadSnappy <clinit>(LoadSnappy.java:46): Snappy native library not loaded
12/11/24 06:09:28 INFO mapred.JobClient monitorAndPrintJob(JobClient.java:1350): Running job: job_201211240320_0005
12/11/24 06:09:29 INFO mapred.JobClient monitorAndPrintJob(JobClient.java:1363): map 0% reduce 0%
12/11/24 06:09:33 INFO mapred.JobClient monitorAndPrintJob(JobClient.java:1363): map 100% reduce 0%
Job 执行完检查结果:
hadoop@foreveryy:~/tmp$ hadoop fs -ls output
Found 3 items
-rw-r--r-- 3 hadoop supergroup 0 2012-11-24 06:09 /user/hadoop/output/_SUCCESS
drwxr-xr-x - hadoop supergroup 0 2012-11-24 06:09 /user/hadoop/output/_logs
-rw-r--r-- 3 hadoop supergroup 5335 2012-11-24 06:09 /user/hadoop/output/part-r-00000
hadoop@foreveryy:~/tmp$ hadoop fs -copyToLocal output output
hadoop@foreveryy:~/tmp$ lt
total 12
-rw-rw-r-- 1 hadoop hadoop 5018 Nov 24 05:53 HelloWrold_V2.jar
drwxrwxr-x 3 hadoop hadoop 4096 Nov 24 06:17 output
hadoop@foreveryy:~/tmp/output$ tree
.
├── _logs
│ └── history
│ ├── job_201211240320_0005_1353766168619_hadoop_Hello+World%21
│ └── job_201211240320_0005_conf.xml
├── part-r-00000
└── _SUCCESS
2 directories, 4 files
By: ysgheping@gmail.com