package com.demo.hadoop.mapreduce; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; /** * MapReduce开发WordCount应用程序 * */ public class WordCountApp { /** * * Map:读取输入的文件 * */ public static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { LongWritable one = new LongWritable(1); String line = value.toString(); String[] words = line.split(" "); for(String word: words) { context.write(new Text(word), one); } } } /** * Reduce * */ public static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> { @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for(LongWritable value: values) { sum += value.get(); } context.write(key, new LongWritable(sum)); } } /** * 定义Driver类 * */ public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration, "wordcount"); job.setJarByClass(WordCountApp.class); FileInputFormat.setInputPaths(job, new Path(args[0])); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } } 启动hadoop pumbaa@pumbaapc:~/hadoopapp/hadoop-2.6.0-cdh5.7.0/sbin$ ./start-all.sh 打jar包 pumbaa@pumbaapc:~/IdeaProjects/hadooptrain$ mvn clean package 把target下面的包copy到其他目录文件夹 pumbaa@pumbaapc:~/IdeaProjects/hadooptrain/target$ scp hadoop-train-1.0.jar /home/pumbaa/hadooplib 运行jar包 hadoop jar hadoop-train-1.0.jar com.demo.hadoop.mapreduce.WordCountApp hdfs://pumbaapc:8020/hello.txt hdfs://pumbaapc:8020/output/wc 查看输出文件 hadoop fs -text /output/wc/part-r-00000 pumbaa@pumbaapc:~/hadooplib$ hadoop fs -text /output/wc/part-r-00000 18/03/04 17:15:35 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable hadoop 2 hdfs 2 hello 2 welcome 2 利用partitioner把相似的块分配到同一个reducer中
package com.demo.hadoop.mapreduce; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class PartitionerApp { /** * * Map:读取输入的文件 * */ public static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { LongWritable one = new LongWritable(1); String line = value.toString(); String[] words = line.split(" "); context.write(new Text(words[0]), new LongWritable(Long.parseLong(words[1]))); } } /** * Reduce * */ public static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> { @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for(LongWritable value: values) { sum += value.get(); } context.write(key, new LongWritable(sum)); } } public static class Mypartitioner extends Partitioner<Text, LongWritable> { @Override public int getPartition(Text text, LongWritable longWritable, int numPartitions) { if(text.toString().equals("xiaomi")) { return 0; }else if(text.toString().equals("huawei")) { return 1; }else if(text.toString().equals("iphone7")) { return 2; }else { return 3; } } } /** * 定义Driver类 * */ public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration, "wordcount"); job.setJarByClass(PartitionerApp.class); FileInputFormat.setInputPaths(job, new Path(args[0])); job.setMapperClass(PartitionerApp.MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(PartitionerApp.MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //job.setCombinerClass(MyReducer.class); job.setPartitionerClass(Mypartitioner.class); job.setNumReduceTasks(4); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }