五、第一个Map Reduce例子

最新推荐文章于 2022-06-02 11:26:13 发布

诛仙89

最新推荐文章于 2022-06-02 11:26:13 发布

阅读量115

点赞数

分类专栏： hadoop

本文链接：https://blog.csdn.net/chuxue1989/article/details/100410672

版权

hadoop 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

hadoop version 2.7.2(linux 完全集群模式)
jdk 1.8
maven 3.3.9
运行在win10

该例子来源于网络，需求：统计字母出现的次数
文本文件类似这样: a b c d 字母之间有一个空格

	<properties>
        <hadoop.version>2.7.2</hadoop.version>
    </properties>


    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

    </dependencies>

一、定义Map

package com.cisco.learn.hadoop.mr.local;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * map function:输入一个key-value，然后经过计算，生产一个新的key-value
 * LongWritable:key in 输入 key类型
 * Text: value in 输入value类型
 * Text: key out 输出key类型
 * IntWritable:value out 输出value类型
 */
public class WordMapper extends Mapper<LongWritable,Text, Text, IntWritable> {

    /**
     *
     * @param key 输入key
     * @param value 输入value
     * @param context
     * @throws IOException
     */
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
            throws IOException, InterruptedException {

        //接收到split中的输出结果作为map的输入
        String line = value.toString();
        String[] words = line.split(" ");

        //放到list中，每个text统计加1，作为map的输出
        //写到context上下文环境中，输出到下一个执行过程shuffle
        for(String word : words) {
            context.write(new Text(word), new IntWritable(1));
        }
    }

}

二、定义Reducer

package com.cisco.learn.hadoop.mr.local;

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class WordReducer extends Reducer<Text, IntWritable, Text, LongWritable> {

    /**
     * 参数基本和map一样，只不过第二个参数values是个集合，他是等map结束后，然后对每个key汇总的结果
     */
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values,
                          Reducer<Text, IntWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        long count = 0;

        //此时的输入是shuffle的输出
        //输入的key是字符串，输入的value是shuffle派发过来的是一个个list
        for(IntWritable v : values) {
            count += v.get();
        }
        context.write(key, new LongWritable(count));
    }

}

三、三种运行模式

代码部分

package com.cisco.learn.hadoop.mr.local;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.File;

public class Main {

    static {
        System.setProperty("hadoop.home.dir", "D:\\soft\\hadoop");
        System.setProperty("HADOOP_USER_NAME", "root") ;
    }


    public static void main(String[] args) throws Exception {
        if(args.length!=3){
            System.err.println("param  invalid");
            System.exit(-1);
        }
        String inputPath = args[0];
        String outPath = args[1];
        String mode = args[2];
        System.out.println("input:"+inputPath);
        System.out.println("output:"+outPath);
        System.out.println("mode:"+mode);

        Configuration conf = new Configuration();
        if("cluster".equals(mode)){
            //core-site
            conf.set("fs.defaultFS", "hdfs://s130:9000");
            conf.set("mapreduce.job.jar","hadoop-demo\\target\\hadoop-demo-1.0.jar");

            //mapred-site
            conf.set("mapreduce.framework.name","yarn");
            conf.set("mapreduce.app-submission.cross-platform", "true");

            //yarn
            conf.set("yarn.resourcemanager.hostname","s130");
            conf.set("yarn.nodemanager.aux-services","mapreduce_shuffle");

            deleteHdfs(conf,outPath);
        }else{
            File outPut = new File(args[1]);
            if(outPut.isDirectory()){
                deleteDirectory(args[1]);
            }
        }

        Job job = Job.getInstance(conf);

        job.setJarByClass(Main.class);
        job.setMapperClass(WordMapper.class);
        job.setReducerClass(WordReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
        System.out.println("success");
    }

    public static void deleteHdfs(Configuration configuration,String output) throws Exception{
        FileSystem fs = FileSystem.get(configuration);
        boolean b = fs.delete(new Path(output),true);
        System.out.println("文件夹是否删除:" + b);
        fs.close();
    }

    public static boolean deleteFile(String fileName){
        File file = new File(fileName);
        if(file.isFile() && file.exists()){
            Boolean succeedDelete = file.delete();
            if(succeedDelete){
                System.out.println("删除单个文件"+fileName+"成功！");
                return true;
            }
            else{
                System.out.println("删除单个文件"+fileName+"失败！");
                return true;
            }
        }else{
            System.out.println("删除单个文件"+fileName+"失败！");
            return false;
        }
    }

    public static boolean deleteDirectory(String dir){
        //如果dir不以文件分隔符结尾，自动添加文件分隔符
        if(!dir.endsWith(File.separator)){
            dir = dir+File.separator;
        }
        File dirFile = new File(dir);
        //如果dir对应的文件不存在，或者不是一个目录，则退出
        if(!dirFile.exists() || !dirFile.isDirectory()){
            System.out.println("删除目录失败"+dir+"目录不存在！");
            return false;
        }
        boolean flag = true;
        //删除文件夹下的所有文件(包括子目录)
        File[] files = dirFile.listFiles();
        for(int i=0;i<files.length;i++){
            //删除子文件
            if(files[i].isFile()){
                flag = deleteFile(files[i].getAbsolutePath());
                if(!flag){
                    break;
                }
            }
            //删除子目录
            else{
                flag = deleteDirectory(files[i].getAbsolutePath());
                if(!flag){
                    break;
                }
            }
        }

        if(!flag){
            System.out.println("删除目录失败");
            return false;
        }

        //删除当前目录
        if(dirFile.delete()){
            System.out.println("删除目录"+dir+"成功！");
            return true;
        }else{
            System.out.println("删除目录"+dir+"失败！");
            return false;
        }
    }
}

3.1本地模式运行

传递参数
D:\soft\hadoop\data\input\input.txt D:\soft\hadoop\data\out local

在本地模拟个一个hadoop运行环境，执行LocalRunner，然后将结果输出接到D:\soft\hadoop\data\out中。

3.2远程提交

传递参数
/apps/icam/documents/hadoop/input.txt /apps/icam/documents/hadoop/output cluster

首先必须在hdfs上创建目录和上传好文件，最后一个参数是cluster，这是远程提交mr。

必须注意这个，我的项目名字是hadoop-demo,然后执行maven 的 clean和package后，生成的jar在target\hadoop-demo-1.0.jar

conf.set("mapreduce.job.jar","hadoop-demo\\target\\hadoop-demo-1.0.jar");

3.3打包jar在Hadoop集群上运行

将jar上传到hadoop所在的namenode的机器上。

hadoop jar hadoop-demo-1.0.jar com.cisco.learn.hadoop.mr.local.Main /apps/icam/documents/hadoop/input.txt /apps/icam/documents/hadoop/output cluster

诛仙89

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
五、第一个Map Reduce例子

hadoop version 2.7.2(linux 完全集群模式)jdk 1.8maven 3.3.9运行在win10该例子来源于网络，需求：统计字母出现的次数文本文件类似这样: a b c d 字母之间有一个空格 <properties> <hadoop.version>2.7.2</hadoop.version> ...
复制链接

扫一扫

专栏目录