MapReduce 基本模版与WordCount代码

最新推荐文章于 2022-11-11 20:32:53 发布

H_Hao

最新推荐文章于 2022-11-11 20:32:53 发布

阅读量582

点赞数

分类专栏：笔记大数据学习

本文链接：https://blog.csdn.net/haoyuexihuai/article/details/53000536

版权

笔记同时被 2 个专栏收录

103 篇文章 0 订阅

订阅专栏

大数据学习

45 篇文章 0 订阅

订阅专栏

一. 基本模版

    package com.ibeifeng.bigdata.senior.hadoop.mapreduce;

    import java.io.IOException;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class WordCountMapReduce {

        // step 1 : Mapper Class
        public static class WordCountMapper extends
                Mapper<LongWritable, Text, Text, IntWritable> {

            @Override
            public void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {
            }

        }

        // step 2 : Reducer Class
        public static class WordCountReducer extends
                Reducer<Text, IntWritable, Text, IntWritable> {

            @Override
            protected void reduce(Text key, Iterable<IntWritable> values,
                    Context context) throws IOException, InterruptedException {
            }

        }

        // step 3 : Driver
        public int run(String[] args) throws Exception {

            Configuration configuration = new Configuration();

            Job job = Job.getInstance(configuration, this.getClass()
                    .getSimpleName());
            job.setJarByClass(this.getClass());

            // set job
            // input
            Path inpath = new Path(args[0]);
            FileInputFormat.addInputPath(job, inpath);

            // output
            Path outpath = new Path(args[1]);
            FileOutputFormat.setOutputPath(job, outpath);

            // Mapper
            job.setMapperClass(WordCountMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);

            // Reducer
            job.setReducerClass(WordCountReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);

            // submit job
            boolean isSuccess = job.waitForCompletion(true);

            return isSuccess ? 0 : 1;

        }

        public static void main(String[] args) throws Exception {
            //run job
            int status=new WordCountMapReduce().run(args);

            System.exit(status);
        }
    }

二. WordCount

package com.ibeifeng.bigdata.senior.hadoop.mapreduce;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountMapReduce {

    // step 1 : Mapper Class
    public static class WordCountMapper extends
            Mapper<LongWritable, Text, Text, IntWritable> {

        // 输出单词
        private Text mapOutputKey = new Text();
        // 出现一次就记作一次
        private IntWritable mapOutputValue = new IntWritable(1);

        @Override
        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {

            System.out.println("map-in-0-key: " + key.get() + " -- "
                    + "map-in-value: " + value.toString());
            // line value
            // 获取文件每一行的<key,value>
            String lineValue = value.toString();

            // split
            // 分割单词，以空格分割
            String[] strs = lineValue.split(" ");

            // iterator
            // 将数组里面的每一个单词拿出来，一个个组成<key,value>
            // 生成1
            for (String str : strs) {
                // set map output key
                // 设置key
                mapOutputKey.set(str);

                // output
                // 最终输出
                context.write(mapOutputKey, mapOutputValue);
            }
        }

    }

    // step 2 : Reducer Class
    public static class WordCountReducer extends
            Reducer<Text, IntWritable, Text, IntWritable> {

        private IntWritable outputValue = new IntWritable();

        @Override
        protected void reduce(Text key, Iterable<IntWritable> values,
                Context context) throws IOException, InterruptedException {
            // temp : sum
            // 定义一个临时变量
            int sum = 0;

            // iterator
            // 对于迭代器中的值进行迭代累加，最后sum加完以后就是统计的次数
            for (IntWritable value : values) {
                // total
                sum += value.get();
            }

            // set output value
            outputValue.set(sum);

            // output
            context.write(key, outputValue);
        }

    }

    // step 3 : Driver
    public int run(String[] args) throws Exception {

        Configuration configuration = new Configuration();

        Job job = Job.getInstance(configuration, this.getClass()
                .getSimpleName());
        job.setJarByClass(WordCountMapReduce.class);

        // set job
        // input
        Path inpath = new Path(args[0]);
        FileInputFormat.addInputPath(job, inpath);

        // output
        Path outpath = new Path(args[1]);
        FileOutputFormat.setOutputPath(job, outpath);

        // Mapper
        job.setMapperClass(WordCountMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        // Reducer
        job.setReducerClass(WordCountReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        // submit job
        boolean isSuccess = job.waitForCompletion(true);

        return isSuccess ? 0 : 1;

    }

    public static void main(String[] args) throws Exception {

        // 传递两个参数，设置路径
        args = new String[] {
                // 参数1：输入路径
                "hdfs://hadoop-senior01.ibeifeng.com:8020/user/beifeng/input",
                // 参数2：输出路径
                "hdfs://hadoop-senior01.ibeifeng.com:8020/user/beifeng/output3" };

        // run job
        int status = new WordCountMapReduce().run(args);

        System.exit(status);
    }
}

H_Hao

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
MapReduce 基本模版与WordCount代码

一. 基本模版package com.ibeifeng.bigdata.senior.hadoop.mapreduce;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWri
复制链接

扫一扫