MapReduce模版及实例

统计单词

input_file内容
hadoop yarn
mapreduce hbase

编写mapreduce的模版

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * map reduce
 */
public class ModuleMapReduce extends Configured  implements Tool {
        // 1: map class
        /**
         * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
         */
        // TODO
        public static class ModuleMapper extends
                        Mapper<LongWritable, Text, Text, IntWritable> {
                @Override
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 2: reduce class
        // TODO
        public static class ModuleReducer extends
                        Reducer<Text, IntWritable, Text, IntWritable> {
                @Override
                protected void reduce(Text key, Iterable<IntWritable> values,
                                Context context) throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 3: driver class
        public int run(String[] args) throws Exception {
                // 1. get configuration
                Configuration configuration = getConf();
                // 2. create job
                Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
                // run jar
                job.setJarByClass(this.getClass());
                // 3: set job
                /**
                 * input -> map -> reduce -> output
                 */
                // 3.1 input
                Path inPath = new Path(args[0]);
                FileInputFormat.addInputPath(job, inPath);
                // 3.2 map
                job.setMapperClass(ModuleMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                // 3.3 reduce
                job.setReducerClass(ModuleReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                // 3.4 output
                Path outPath = new Path(args[1]);
                FileOutputFormat.setOutputPath(job, outPath);
                // 4. submit job
                boolean isSuccess = job.waitForCompletion(true);
                return isSuccess ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                //  new configuration
                Configuration configuration = new Configuration();

                int status = ToolRunner.run(configuration, new ModuleMapReduce(), args);

                System.exit(status);
        }
}

根据模版写的实例1

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * map reduce
 */
public class WordCount extends Configured  implements Tool {
        // 1: map class
        /**
         * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
         */
        public static class WordCountMapper extends
                        Mapper<LongWritable, Text, Text, IntWritable> {
                private Text mapOutputKey = new Text();
                private final static IntWritable mapOutputValue = new IntWritable(1);
                @Override
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        // line value
                        String lineValue = value.toString();

                        // split
                        StringTokenizer stringTokenizer = new StringTokenizer(lineValue);

                        // iterator
                        while(stringTokenizer.hasMoreElements()){
                                // get word value
                                String wordval = stringTokenizer.nextToken();
                                // set value
                                mapOutputKey.set(wordval);
                                // output
                                context.write(mapOutputKey, mapOutputValue);
                        }
                }
        }

        // 2: reduce class
        public static class WordCountReducer extends
                        Reducer<Text, IntWritable, Text, IntWritable> {
                private IntWritable outputValue = new IntWritable(1);
                @Override
                protected void reduce(Text key, Iterable<IntWritable> values,
                                Context context) throws IOException, InterruptedException {
                        // sum tmp
                        int sum = 0;
                        // iterator
                        for(IntWritable value: values){
                                // total
                                sum += value.get();
                        }

                        // set value
                        outputValue.set(sum);
                        // output
                        context.write(key, outputValue);
                }
        }

        // 3: driver class
        public int run(String[] args) throws Exception {
                // 1. get configuration
                Configuration configuration = getConf();
                // 2. create job
                Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
                // run jar
                job.setJarByClass(this.getClass());
                // 3: set job
                /**
                 * input -> map -> reduce -> output
                 */
                // 3.1 input
                Path inPath = new Path(args[0]);
                FileInputFormat.addInputPath(job, inPath);
                // 3.2 map
                job.setMapperClass(WordCountMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                // 3.3 reduce
                job.setReducerClass(WordCountReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                // 3.4 output
                Path outPath = new Path(args[1]);
                FileOutputFormat.setOutputPath(job, outPath);
                // 4. submit job
                boolean isSuccess = job.waitForCompletion(true);
                return isSuccess ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                //  new configuration
                Configuration configuration = new Configuration();

                //int status = new WordCount().run(args);

                int status = ToolRunner.run(configuration, new WordCount(), args);

                System.exit(status);
        }
}

非模版写的原生实例2

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;

/**
 * map reduce
 * 
 */
public class WordCount extends Configured  implements Tool {
        // 1: map class
        /**
         * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
         */
        public static class WordCountMapper extends
                        Mapper<LongWritable, Text, Text, IntWritable> {
                private Text mapOutputKey = new Text();
                private final static IntWritable mapOutputValue = new IntWritable(1);
                @Override
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        // line value
                        String lineValue = value.toString();

                        // split
                        StringTokenizer stringTokenizer = new StringTokenizer(lineValue);

                        // iterator
                        while(stringTokenizer.hasMoreElements()){
                                // get word value
                                String wordval = stringTokenizer.nextToken();
                                // set value
                                mapOutputKey.set(wordval);
                                // output
                                context.write(mapOutputKey, mapOutputValue);
                        }
                }
        }

        // 2: reduce class
        public static class WordCountReducer extends
                        Reducer<Text, IntWritable, Text, IntWritable> {
                private IntWritable outputValue = new IntWritable(1);
                @Override
                protected void reduce(Text key, Iterable<IntWritable> values,
                                Context context) throws IOException, InterruptedException {
                        // sum tmp
                        int sum = 0;
                        // iterator
                        for(IntWritable value: values){
                                // total
                                sum += value.get();
                        }

                        // set value
                        outputValue.set(sum);
                        // output
                        context.write(key, outputValue);
                }
        }

        // 3: driver class
        public int run(String[] args) throws Exception {
                // 1. get configuration
                Configuration configuration = new Configuration();
                // 2. create job
                Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
                // run jar
                job.setJarByClass(this.getClass());
                // 3: set job
                /**
                 * input -> map -> reduce -> output
                 */
                // 3.1 input
                Path inPath = new Path(args[0]);
                FileInputFormat.addInputPath(job, inPath);
                // 3.2 map
                job.setMapperClass(WordCountMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                // 3.3 reduce
                job.setReducerClass(WordCountReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                // 3.4 output
                Path outPath = new Path(args[1]);
                FileOutputFormat.setOutputPath(job, outPath);
                // 4. submit job
                boolean isSuccess = job.waitForCompletion(true);
                return isSuccess ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                int status = new WordCount().run(args);

                System.exit(status);
        }
}

模版优化

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * map reduce
 */
public class ModuleMapReduce extends Configured  implements Tool {
        // 1: map class
        /**
         * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
         */
        // TODO
        public static class ModuleMapper extends
                        Mapper<LongWritable, Text, Text, IntWritable> {

                @Override
                public void setup(Context context) throws IOException,
                                InterruptedException {
                        // Nothing
                }
                @Override
                public void cleanup(Context context) throws IOException,
                                InterruptedException {
                        // Nothing
                }
                @Override
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 2: reduce class
        // TODO
        public static class ModuleReducer extends
                        Reducer<Text, IntWritable, Text, IntWritable> {

                @Override
                protected void setup(Context context)
                                throws IOException, InterruptedException {
                        // Nothing
                }


                @Override
                protected void cleanup(
                                Context context)
                                throws IOException, InterruptedException {
                        // Nothing
                }

                @Override
                protected void reduce(Text key, Iterable<IntWritable> values,
                                Context context) throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 3: driver class
        public int run(String[] args) throws Exception {
                // 1. get configuration
                Configuration configuration = getConf();
                // 2. create job
                Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
                // run jar
                job.setJarByClass(this.getClass());
                // 3: set job
                /**
                 * input -> map -> reduce -> output
                 */
                // 3.1 input
                Path inPath = new Path(args[0]);
                FileInputFormat.addInputPath(job, inPath);
                // 3.2 map
                job.setMapperClass(ModuleMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                // 3.3 reduce
                job.setReducerClass(ModuleReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                // 3.4 output
                Path outPath = new Path(args[1]);
                FileOutputFormat.setOutputPath(job, outPath);
                // 4. submit job
                boolean isSuccess = job.waitForCompletion(true);
                return isSuccess ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                //  new configuration
                Configuration configuration = new Configuration();

                int status = ToolRunner.run(configuration, new ModuleMapReduce(), args);

                System.exit(status);
        }
}

增加shuffle优化

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * map reduce
 */
public class ModuleMapReduce extends Configured  implements Tool {
        // 1: map class
        /**
         * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
         */
        // TODO
        public static class ModuleMapper extends
                        Mapper<LongWritable, Text, Text, IntWritable> {

                @Override
                public void setup(Context context) throws IOException,
                                InterruptedException {
                        // Nothing
                }
                @Override
                public void cleanup(Context context) throws IOException,
                                InterruptedException {
                        // Nothing
                }
                @Override
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 2: reduce class
        // TODO
        public static class ModuleReducer extends
                        Reducer<Text, IntWritable, Text, IntWritable> {

                @Override
                protected void setup(Context context)
                                throws IOException, InterruptedException {
                        // Nothing
                }


                @Override
                protected void cleanup(
                                Context context)
                                throws IOException, InterruptedException {
                        // Nothing
                }

                @Override
                protected void reduce(Text key, Iterable<IntWritable> values,
                                Context context) throws IOException, InterruptedException {
                        // TODO
                }
        }

        // 3: driver class
        public int run(String[] args) throws Exception {
                // 1. get configuration
                Configuration configuration = getConf();
                // 2. create job
                Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
                // run jar
                job.setJarByClass(this.getClass());
                // 3: set job
                /**
                 * input -> map -> reduce -> output
                 */
                // 3.1 input
                Path inPath = new Path(args[0]);
                FileInputFormat.addInputPath(job, inPath);
                // 3.2 map
                job.setMapperClass(ModuleMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);

                ///
                // shuffle
                // 1. partitioner   default hash partitioner
                // job.setPartitionerClass(cls);
                // 2. sort
                // job.setSortComparatorClass(cls);
                // 3. optional combiner (small reduce)
                // job.setCombinerClass(cls);
                // 4. group
                // job.setGroupingComparatorClass(cls);
                ///

                // 3.3 reduce
                job.setReducerClass(ModuleReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                //set reduce number
                job.setNumReduceTasks(2);
                // 3.4 output
                Path outPath = new Path(args[1]);
                FileOutputFormat.setOutputPath(job, outPath);
                // 4. submit job
                boolean isSuccess = job.waitForCompletion(true);
                return isSuccess ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                //  new configuration
                Configuration configuration = new Configuration();
                // set compress
                // configuration.set("mapreduce.map.output.compress","true");
                // configuration.set("mapreduce.map.output.compresscodec","org.apache.hadoop.io.comperss.SnappyCodec");

                //DefaultCodec tx = null;

                int status = ToolRunner.run(configuration, new ModuleMapReduce(), args);

                System.exit(status);
        }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

电猿

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值