从本地环境开发环境提交MapReduce作业到CDH集群执行

一、环境:

集群 CDH 6.2.0/CentOS Linux release 7.5.1804 (Core) /非Kerberos

本地 Win10

二、部署本地开发环境:

从githup获取winutils-master包

github地址:https://codeload.github.com/steveloughran/winutils/zip/refs/heads/master

解压 winutils-master.zip,解压后的目录如下(根据自己的实际情况选择一个hadoop版本配置本地环境):

配置本地环境变量:

注意:

HADOOP_USER_NAME 设置为hdfs是因为该用户具有写入hdfs文件系统的权限

HADOOP_HOME和对应的BIN也是必须设置的,本地运行需要hadoop的环境

三、java代码用例(maven项目):

maven版本:apache-maven-3.6.3

jdk版本:jdk-1.8

从集群下载YARN配置,解压并把yarn-conf放置在项目目录下:

maven依赖如下:

    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>3.0.0-cdh6.2.0</version>
    </dependency>

    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>3.0.0-cdh6.2.0</version>
    </dependency> 

注:

maven对应CDH包的版本参考:

https://docs.cloudera.com/documentation/enterprise/6/release-notes/topics/rg_cdh_62_maven_artifacts.html#concept_2gp_d8n_yk

 

package com.cdh.pro.guagua.yarn.conf;

import java.io.File;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.conf.YarnConfiguration;

/**
 * 初始化配置类
 */
public class InitConfiguration {
    
    public static Configuration initConf(String confPath) {
        Configuration configuration = new YarnConfiguration();
        configuration.addResource(new Path(confPath + File.separator + "core-site.xml"));
        configuration.addResource(new Path(confPath + File.separator + "hdfs-site.xml"));
        configuration.addResource(new Path(confPath + File.separator + "mapred-site.xml"));
        configuration.addResource(new Path(confPath + File.separator + "yarn-site.xml"));
        configuration.setBoolean("dfs.support.append", true);
        configuration.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
        configuration.setBoolean("fs.hdfs.impl.disable.cache", true);
        return configuration;
    }

}


 

package com.cdh.pro.guagua.yarn;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 作业提交
 */
public class InitMapReduceJob {


    public static Job initWordCountJob(Configuration conf) {
        Job wcjob = null;
        try {
            conf.setBoolean("mapreduce.app-submission.cross-platform", true);  //设置跨平台提交作业
            // 设置job所使用的jar包,使用Configuration对象调用set()方法,设置mapreduce.job.jar wcount.jar
            // mvn clean package 编译获得  guagua-0.0.1-SNAPSHOT.jar 必须设置,否则CDH执行无法找到对应class
            conf.set("mapred.jar", System.getProperty("user.dir") + "/lib/guagua-0.0.1-SNAPSHOT.jar");
            // 创建job对象需要conf对象,conf对象包含的信息是:所用的jar包
            wcjob = Job.getInstance(conf);
            wcjob.setMapperClass(WordCountMapper.class);
            wcjob.setReducerClass(WordCountReducer.class);

            // wcjob的mapper类输出的kv数据类型
            wcjob.setMapOutputKeyClass(Text.class);
            wcjob.setMapOutputValueClass(LongWritable.class);

            // wcjob的reducer类输出的kv数据类型
            // job对象调用setOutputKey
            wcjob.setOutputKeyClass(Text.class);
            wcjob.setOutputValueClass(LongWritable.class);
            FileInputFormat.setInputPaths(wcjob, "/test/input");
            FileOutputFormat.setOutputPath(wcjob, new Path("/test/output"));
        } catch (Exception e) {
            e.printStackTrace();
        }
        return wcjob;
    }

}
package com.cdh.pro.guagua.yarn;

import java.io.IOException;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * map
 */
public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable>{

    @Override
    protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
        // 获取到一行文件的内容
        String line = value.toString();
        // 切分这一行的内容为一个单词数组
        String[] words = StringUtils.split(line, " ");
        // 遍历输出<word,1>
        for(String word:words){
            context.write(new Text(word), new LongWritable(1));
        }
    }

}
package com.cdh.pro.guagua.yarn;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * reduce
 */
public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable>{

    @Override
    protected void reduce(Text key, Iterable<LongWritable> values,Context context)
            throws IOException, InterruptedException {
        long count = 0;
        for(LongWritable value:values){
            // 调用value的get()方法将long值取出来
            count += value.get();
        }
        // 输出<单词:count>键值对
        context.write(key, new LongWritable(count));
    }

}
package com.cdh.pro.guagua.yarn;

import java.io.File;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;

import com.cdh.pro.guagua.yarn.conf.InitConfiguration;

/**
 * 执行
 */
public class NonKerberosMRTest {

    
    private static String confPath = System.getProperty("user.dir") + File.separator + "yarndemo"
    + File.separator + "yarn-conf";


    public static void main(String[] args) {
        try {
            System.out.println(confPath);
            Configuration conf = InitConfiguration.initConf(confPath);
            Job wcjob = InitMapReduceJob.initWordCountJob(conf);
            wcjob.setJarByClass(NonKerberosMRTest.class);
            wcjob.setJobName("NonKerberosMRTest");

            // 调用job对象的waitForCompletion()方法,提交作业。
            boolean res = wcjob.waitForCompletion(true);
            System.exit(res ? 0 : 1);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}

CDH集群执行以下操作(建立对应的目录和文件):

[root@virtual-cdh01 app]# vim word.txt
wq dsvr dewfw wq pp qq ww qq pp pp
de de ss aa ss dd
[root@virtual-cdh01 app]# hadoop fs -mkdir /test/input/
[root@virtual-cdh01 app]# hadoop fs -put word.txt /test/input/

打包项目:

mvn clean package

将target目录下的jar包复制到项目的lib目录下:

注意:

打包项目并放置对应的jar包到lib目录是为了CDH集群在运行作业的时候找到对应的class

// mvn clean package 编译获得  guagua-0.0.1-SNAPSHOT.jar 必须设置,否则CDH执行无法找到对应class
conf.set("mapred.jar", System.getProperty("user.dir") + "/lib/guagua-0.0.1-SNAPSHOT.jar");

运行作业:

到CDH集群查看结果:

[root@virtual-cdh01 app]# hadoop fs -ls /test/output/

执行成功!!!

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值