一、启动hadoop
cd /apps/hadoop/sbin
./start-all.sh
二、在linux上,创建一个目录/data/mapreduce1
mkdir -p /data/mapreduce1
三、切换到/data/mapreduce1目录下,下载文本文件
四、在该目录下,下载依赖包
五、解压依赖包
六、将文本文件上传到hdfs的/mymapreduce1/in目录下
hadoop fs -mkdir -p /mymapreduce1/in
hadoop fs -put /data/mapreduce1/buyer_favorite1 /mymapreduce1/in
七、用eclipse编写如下代码:
代码如下:
1. 1.package mapreduce;
2.import java.io.IOException;
3.import java.util.StringTokenizer;
4.import org.apache.hadoop.fs.Path;
5.import org.apache.hadoop.io.IntWritable;
6.import org.apache.hadoop.io.Text;
7.import org.apache.hadoop.mapreduce.Job;
8.import org.apache.hadoop.mapreduce.Mapper;
9.import org.apache.hadoop.mapreduce.Reducer;
10.import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11.import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12.public class WordCount {
13. public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
14. Job job = Job.getInstance();
15. job.setJobName("WordCount");
16. job.setJarByClass(WordCount.class);
17. job.setMapperClass(doMapper.class);
18. job.setReducerClass(doReducer.class);
19. job.setOutputKeyClass(Text.class);
20. job.setOutputValueClass(IntWritable.class);
21. Path in = new Path("hdfs://localhost:9000/mymapreduce1/in/buyer_favorite1");
22. Path out = new Path("hdfs://localhost:9000/mymapreduce1/out");
23. FileInputFormat.addInputPath(job, in);
24. FileOutputFormat.setOutputPath(job, out);
25. System.exit(job.waitForCompletion(true) ? 0 : 1);
26. }
27. public static class doMapper extends Mapper<Object, Text, Text, IntWritable>{
28. public static final IntWritable one = new IntWritable(1);
29. public static Text word = new Text();
30. @Override
31. protected void map(Object key, Text value, Context context)
32. throws IOException, InterruptedException {
33. StringTokenizer tokenizer = new StringTokenizer(value.toString(), "\t");
34. word.set(tokenizer.nextToken());
35. context.write(word, one);
36. }
37. }
38. public static class doReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
39. private IntWritable result = new IntWritable();
40. @Override
41. protected void reduce(Text key, Iterable<IntWritable> values, Context context)
42. throws IOException, InterruptedException {
43. int sum = 0;
44. for (IntWritable value : values) {
45. sum += value.get();
46. }
47. result.set(sum);
48. context.write(key, result);
49. }
50. }
51.}
八、查看hdfs
hadoop fs -ls /mymapreduce1/out
hadoop fs -cat /mymapreuce1/out/*