五、第一个Map Reduce例子

hadoop version 2.7.2(linux 完全集群模式)
jdk 1.8
maven 3.3.9
运行在win10

该例子来源于网络,需求:统计字母出现的次数
文本文件类似这样: a b c d 字母之间有一个空格

	<properties>
        <hadoop.version>2.7.2</hadoop.version>
    </properties>


    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

    </dependencies>

一、定义Map

package com.cisco.learn.hadoop.mr.local;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * map function:输入一个key-value,然后经过计算,生产一个新的key-value
 * LongWritable:key in 输入 key类型
 * Text: value in 输入value类型
 * Text: key out 输出key类型
 * IntWritable:value out 输出value类型
 */
public class WordMapper extends Mapper<LongWritable,Text, Text, IntWritable> {

    /**
     *
     * @param key 输入key
     * @param value 输入value
     * @param context
     * @throws IOException
     */
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
            throws IOException, InterruptedException {

        //接收到split中的输出结果作为map的输入
        String line = value.toString();
        String[] words = line.split(" ");

        //放到list中,每个text统计加1,作为map的输出
        //写到context上下文环境中,输出到下一个执行过程shuffle
        for(String word : words) {
            context.write(new Text(word), new IntWritable(1));
        }
    }

}

二、定义Reducer

package com.cisco.learn.hadoop.mr.local;

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


public class WordReducer extends Reducer<Text, IntWritable, Text, LongWritable> {

    /**
     * 参数基本和map一样,只不过第二个参数values是个集合,他是等map结束后,然后对每个key汇总的结果
     */
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values,
                          Reducer<Text, IntWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        long count = 0;

        //此时的输入是shuffle的输出
        //输入的key是字符串,输入的value是shuffle派发过来的是一个个list
        for(IntWritable v : values) {
            count += v.get();
        }
        context.write(key, new LongWritable(count));
    }

}

三、三种运行模式

代码部分

package com.cisco.learn.hadoop.mr.local;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.File;

public class Main {

    static {
        System.setProperty("hadoop.home.dir", "D:\\soft\\hadoop");
        System.setProperty("HADOOP_USER_NAME", "root") ;
    }


    public static void main(String[] args) throws Exception {
        if(args.length!=3){
            System.err.println("param  invalid");
            System.exit(-1);
        }
        String inputPath = args[0];
        String outPath = args[1];
        String mode = args[2];
        System.out.println("input:"+inputPath);
        System.out.println("output:"+outPath);
        System.out.println("mode:"+mode);

        Configuration conf = new Configuration();
        if("cluster".equals(mode)){
            //core-site
            conf.set("fs.defaultFS", "hdfs://s130:9000");
            conf.set("mapreduce.job.jar","hadoop-demo\\target\\hadoop-demo-1.0.jar");

            //mapred-site
            conf.set("mapreduce.framework.name","yarn");
            conf.set("mapreduce.app-submission.cross-platform", "true");

            //yarn
            conf.set("yarn.resourcemanager.hostname","s130");
            conf.set("yarn.nodemanager.aux-services","mapreduce_shuffle");

            deleteHdfs(conf,outPath);
        }else{
            File outPut = new File(args[1]);
            if(outPut.isDirectory()){
                deleteDirectory(args[1]);
            }
        }

        Job job = Job.getInstance(conf);

        job.setJarByClass(Main.class);
        job.setMapperClass(WordMapper.class);
        job.setReducerClass(WordReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
        System.out.println("success");
    }

    public static void deleteHdfs(Configuration configuration,String output) throws Exception{
        FileSystem fs = FileSystem.get(configuration);
        boolean b = fs.delete(new Path(output),true);
        System.out.println("文件夹是否删除:" + b);
        fs.close();
    }

    public static boolean deleteFile(String fileName){
        File file = new File(fileName);
        if(file.isFile() && file.exists()){
            Boolean succeedDelete = file.delete();
            if(succeedDelete){
                System.out.println("删除单个文件"+fileName+"成功!");
                return true;
            }
            else{
                System.out.println("删除单个文件"+fileName+"失败!");
                return true;
            }
        }else{
            System.out.println("删除单个文件"+fileName+"失败!");
            return false;
        }
    }

    public static boolean deleteDirectory(String dir){
        //如果dir不以文件分隔符结尾,自动添加文件分隔符
        if(!dir.endsWith(File.separator)){
            dir = dir+File.separator;
        }
        File dirFile = new File(dir);
        //如果dir对应的文件不存在,或者不是一个目录,则退出
        if(!dirFile.exists() || !dirFile.isDirectory()){
            System.out.println("删除目录失败"+dir+"目录不存在!");
            return false;
        }
        boolean flag = true;
        //删除文件夹下的所有文件(包括子目录)
        File[] files = dirFile.listFiles();
        for(int i=0;i<files.length;i++){
            //删除子文件
            if(files[i].isFile()){
                flag = deleteFile(files[i].getAbsolutePath());
                if(!flag){
                    break;
                }
            }
            //删除子目录
            else{
                flag = deleteDirectory(files[i].getAbsolutePath());
                if(!flag){
                    break;
                }
            }
        }

        if(!flag){
            System.out.println("删除目录失败");
            return false;
        }

        //删除当前目录
        if(dirFile.delete()){
            System.out.println("删除目录"+dir+"成功!");
            return true;
        }else{
            System.out.println("删除目录"+dir+"失败!");
            return false;
        }
    }
}

3.1本地模式运行

传递参数
D:\soft\hadoop\data\input\input.txt D:\soft\hadoop\data\out local

在本地模拟个一个hadoop运行环境,执行LocalRunner,然后将结果输出接到D:\soft\hadoop\data\out中。

3.2远程提交

传递参数
/apps/icam/documents/hadoop/input.txt /apps/icam/documents/hadoop/output cluster

首先必须在hdfs上创建目录和上传好文件,最后一个参数是cluster,这是远程提交mr。

必须注意这个,我的项目名字是hadoop-demo,然后执行maven 的 clean和package后,生成的jar在target\hadoop-demo-1.0.jar

conf.set("mapreduce.job.jar","hadoop-demo\\target\\hadoop-demo-1.0.jar");

3.3打包jar在Hadoop集群上运行

将jar上传到hadoop所在的namenode的机器上。

hadoop jar hadoop-demo-1.0.jar com.cisco.learn.hadoop.mr.local.Main /apps/icam/documents/hadoop/input.txt /apps/icam/documents/hadoop/output cluster
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值