MapReduce模版及实例

最新推荐文章于 2024-06-19 21:56:24 发布

电猿

最新推荐文章于 2024-06-19 21:56:24 发布

阅读量329

点赞数

分类专栏： hadoop 文章标签： hadoop mapreduce 模版

本文链接：https://blog.csdn.net/oYongDe/article/details/74891793

版权

hadoop 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

统计单词

input_file内容
hadoop yarn
mapreduce hbase

编写mapreduce的模版

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * map reduce
 */
public class ModuleMapReduce extends Configured  implements Tool {
        // 1: map class
        /**
         * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
         */
        // TODO
        public static class ModuleMapper extends
                        Mapper<LongWritable, Text, Text, IntWritable> {
                @Override
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 2: reduce class
        // TODO
        public static class ModuleReducer extends
                        Reducer<Text, IntWritable, Text, IntWritable> {
                @Override
                protected void reduce(Text key, Iterable<IntWritable> values,
                                Context context) throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 3: driver class
        public int run(String[] args) throws Exception {
                // 1. get configuration
                Configuration configuration = getConf();
                // 2. create job
                Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
                // run jar
                job.setJarByClass(this.getClass());
                // 3: set job
                /**
                 * input -> map -> reduce -> output
                 */
                // 3.1 input
                Path inPath = new Path(args[0]);
                FileInputFormat.addInputPath(job, inPath);
                // 3.2 map
                job.setMapperClass(ModuleMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                // 3.3 reduce
                job.setReducerClass(ModuleReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                // 3.4 output
                Path outPath = new Path(args[1]);
                FileOutputFormat.setOutputPath(job, outPath);
                // 4. submit job
                boolean isSuccess = job.waitForCompletion(true);
                return isSuccess ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                //  new configuration
                Configuration configuration = new Configuration();

                int status = ToolRunner.run(configuration, new ModuleMapReduce(), args);

                System.exit(status);
        }
}

根据模版写的实例1

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * map reduce
 */
public class WordCount extends Configured  implements Tool {
        // 1: map class
        /**
         * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
         */
        public static class WordCountMapper extends
                        Mapper<LongWritable, Text, Text, IntWritable> {
                private Text mapOutputKey = new Text();
                private final static IntWritable mapOutputValue = new IntWritable(1);
                @Override
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        // line value
                        String lineValue = value.toString();

                        // split
                        StringTokenizer stringTokenizer = new StringTokenizer(lineValue);

                        // iterator
                        while(stringTokenizer.hasMoreElements()){
                                // get word value
                                String wordval = stringTokenizer.nextToken();
                                // set value
                                mapOutputKey.set(wordval);
                                // output
                                context.write(mapOutputKey, mapOutputValue);
                        }
                }
        }

        // 2: reduce class
        public static class WordCountReducer extends
                        Reducer<Text, IntWritable, Text, IntWritable> {
                private IntWritable outputValue = new IntWritable(1);
                @Override
                protected void reduce(Text key, Iterable<IntWritable> values,
                                Context context) throws IOException, InterruptedException {
                        // sum tmp
                        int sum = 0;
                        // iterator
                        for(IntWritable value: values){
                                // total
                                sum += value.get();
                        }

                        // set value
                        outputValue.set(sum);
                        // output
                        context.write(key, outputValue);
                }
        }

        // 3: driver class
        public int run(String[] args) throws Exception {
                // 1. get configuration
                Configuration configuration = getConf();
                // 2. create job
                Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
                // run jar
                job.setJarByClass(this.getClass());
                // 3: set job
                /**
                 * input -> map -> reduce -> output
                 */
                // 3.1 input
                Path inPath = new Path(args[0]);
                FileInputFormat.addInputPath(job, inPath);
                // 3.2 map
                job.setMapperClass(WordCountMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                // 3.3 reduce
                job.setReducerClass(WordCountReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                // 3.4 output
                Path outPath = new Path(args[1]);
                FileOutputFormat.setOutputPath(job, outPath);
                // 4. submit job
                boolean isSuccess = job.waitForCompletion(true);
                return isSuccess ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                //  new configuration
                Configuration configuration = new Configuration();

                //int status = new WordCount().run(args);

                int status = ToolRunner.run(configuration, new WordCount(), args);

                System.exit(status);
        }
}

非模版写的原生实例2

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;

/**
 * map reduce
 * 
 */
public class WordCount extends Configured  implements Tool {
        // 1: map class
        /**
         * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
         */
        public static class WordCountMapper extends
                        Mapper<LongWritable, Text, Text, IntWritable> {
                private Text mapOutputKey = new Text();
                private final static IntWritable mapOutputValue = new IntWritable(1);
                @Override
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        // line value
                        String lineValue = value.toString();

                        // split
                        StringTokenizer stringTokenizer = new StringTokenizer(lineValue);

                        // iterator
                        while(stringTokenizer.hasMoreElements()){
                                // get word value
                                String wordval = stringTokenizer.nextToken();
                                // set value
                                mapOutputKey.set(wordval);
                                // output
                                context.write(mapOutputKey, mapOutputValue);
                        }
                }
        }

        // 2: reduce class
        public static class WordCountReducer extends
                        Reducer<Text, IntWritable, Text, IntWritable> {
                private IntWritable outputValue = new IntWritable(1);
                @Override
                protected void reduce(Text key, Iterable<IntWritable> values,
                                Context context) throws IOException, InterruptedException {
                        // sum tmp
                        int sum = 0;
                        // iterator
                        for(IntWritable value: values){
                                // total
                                sum += value.get();
                        }

                        // set value
                        outputValue.set(sum);
                        // output
                        context.write(key, outputValue);
                }
        }

        // 3: driver class
        public int run(String[] args) throws Exception {
                // 1. get configuration
                Configuration configuration = new Configuration();
                // 2. create job
                Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
                // run jar
                job.setJarByClass(this.getClass());
                // 3: set job
                /**
                 * input -> map -> reduce -> output
                 */
                // 3.1 input
                Path inPath = new Path(args[0]);
                FileInputFormat.addInputPath(job, inPath);
                // 3.2 map
                job.setMapperClass(WordCountMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                // 3.3 reduce
                job.setReducerClass(WordCountReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                // 3.4 output
                Path outPath = new Path(args[1]);
                FileOutputFormat.setOutputPath(job, outPath);
                // 4. submit job
                boolean isSuccess = job.waitForCompletion(true);
                return isSuccess ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                int status = new WordCount().run(args);

                System.exit(status);
        }
}

模版优化

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * map reduce
 */
public class ModuleMapReduce extends Configured  implements Tool {
        // 1: map class
        /**
         * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
         */
        // TODO
        public static class ModuleMapper extends
                        Mapper<LongWritable, Text, Text, IntWritable> {

                @Override
                public void setup(Context context) throws IOException,
                                InterruptedException {
                        // Nothing
                }
                @Override
                public void cleanup(Context context) throws IOException,
                                InterruptedException {
                        // Nothing
                }
                @Override
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 2: reduce class
        // TODO
        public static class ModuleReducer extends
                        Reducer<Text, IntWritable, Text, IntWritable> {

                @Override
                protected void setup(Context context)
                                throws IOException, InterruptedException {
                        // Nothing
                }


                @Override
                protected void cleanup(
                                Context context)
                                throws IOException, InterruptedException {
                        // Nothing
                }

                @Override
                protected void reduce(Text key, Iterable<IntWritable> values,
                                Context context) throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 3: driver class
        public int run(String[] args) throws Exception {
                // 1. get configuration
                Configuration configuration = getConf();
                // 2. create job
                Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
                // run jar
                job.setJarByClass(this.getClass());
                // 3: set job
                /**
                 * input -> map -> reduce -> output
                 */
                // 3.1 input
                Path inPath = new Path(args[0]);
                FileInputFormat.addInputPath(job, inPath);
                // 3.2 map
                job.setMapperClass(ModuleMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                // 3.3 reduce
                job.setReducerClass(ModuleReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                // 3.4 output
                Path outPath = new Path(args[1]);
                FileOutputFormat.setOutputPath(job, outPath);
                // 4. submit job
                boolean isSuccess = job.waitForCompletion(true);
                return isSuccess ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                //  new configuration
                Configuration configuration = new Configuration();

                int status = ToolRunner.run(configuration, new ModuleMapReduce(), args);

                System.exit(status);
        }
}

增加shuffle优化

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * map reduce
 */
public class ModuleMapReduce extends Configured  implements Tool {
        // 1: map class
        /**
         * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
         */
        // TODO
        public static class ModuleMapper extends
                        Mapper<LongWritable, Text, Text, IntWritable> {

                @Override
                public void setup(Context context) throws IOException,
                                InterruptedException {
                        // Nothing
                }
                @Override
                public void cleanup(Context context) throws IOException,
                                InterruptedException {
                        // Nothing
                }
                @Override
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 2: reduce class
        // TODO
        public static class ModuleReducer extends
                        Reducer<Text, IntWritable, Text, IntWritable> {

                @Override
                protected void setup(Context context)
                                throws IOException, InterruptedException {
                        // Nothing
                }


                @Override
                protected void cleanup(
                                Context context)
                                throws IOException, InterruptedException {
                        // Nothing
                }

                @Override
                protected void reduce(Text key, Iterable<IntWritable> values,
                                Context context) throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 3: driver class
        public int run(String[] args) throws Exception {
                // 1. get configuration
                Configuration configuration = getConf();
                // 2. create job
                Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
                // run jar
                job.setJarByClass(this.getClass());
                // 3: set job
                /**
                 * input -> map -> reduce -> output
                 */
                // 3.1 input
                Path inPath = new Path(args[0]);
                FileInputFormat.addInputPath(job, inPath);
                // 3.2 map
                job.setMapperClass(ModuleMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);

                ///
                // shuffle
                // 1. partitioner   default hash partitioner
                // job.setPartitionerClass(cls);
                // 2. sort
                // job.setSortComparatorClass(cls);
                // 3. optional combiner (small reduce)
                // job.setCombinerClass(cls);
                // 4. group
                // job.setGroupingComparatorClass(cls);
                ///

                // 3.3 reduce
                job.setReducerClass(ModuleReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                //set reduce number
                job.setNumReduceTasks(2);
                // 3.4 output
                Path outPath = new Path(args[1]);
                FileOutputFormat.setOutputPath(job, outPath);
                // 4. submit job
                boolean isSuccess = job.waitForCompletion(true);
                return isSuccess ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                //  new configuration
                Configuration configuration = new Configuration();
                // set compress
                // configuration.set("mapreduce.map.output.compress","true");
                // configuration.set("mapreduce.map.output.compresscodec","org.apache.hadoop.io.comperss.SnappyCodec");

                //DefaultCodec tx = null;

                int status = ToolRunner.run(configuration, new ModuleMapReduce(), args);

                System.exit(status);
        }
}