2 ,aws sdk for java :启动 emr ,执行步骤,执行后关闭,2 个步骤执行后关闭

1 ,写一个 spark 程序 :

  1. 目的 : 测试
  2. 功能 : 把 sql 结果存储进指定的文件夹
  3. 执行 :
spark-submit --master yarn --deploy-mode cluster --num-executors 5 --executor-cores 3 --executor-memory 6144m --class lifecycle01_tool.Tool10_sqlToFile s3://lifecyclebigdata/dataWareHouse/BALABALA/00jar/03_hive/hiveLifeCycle-1.0-SNAPSHOT.jar "select * from balabala.outer_year_season" "lifecyclebigdata/dataWareHouse/BALABALA/09_testData/res01"

2 ,步骤类 : HadoopJarStepConfig

  1. getProperties() : 获取步骤运行时的 java 属性列表,展示给我们的 main 函数知道
  2. setProperties() : 步骤运行时,设置 java 属性。
  3. withProperties() : 同上
  4. setJar() : jar 包路径
  5. getJar() : 获取我们设置过的 jar 路径
  6. withJar() : 同上
  7. setMainClass() : 设置主类
  8. getMainClass() : 获取到设置的那个主类
  9. withMainClass : 同上
  10. setArgs : 设置参数
  11. getArgs : 获取参数
  12. withArgs : 同上

3 ,执行一个步骤后关闭 :

package test01;

import com.amazonaws.AmazonClientException;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClientBuilder;
import com.amazonaws.services.elasticmapreduce.model.*;
import java.util.*;

//  有步骤,执行后关闭
public class Test02_Start_Step_Stop {
    public static void main(String[] args) {

        //  1 ,创建证书对象 :用来访问 aws 平台
        AWSCredentials credentials_profile = null;
        try {
            //  指定证书 ( 密码写在文件中 )
            //  default 指的是文件中的用户 ( win-文件在C:\Users\86182\.aws )
            //  文件内容 :
            /*[default]
            aws_access_key_id = AKIAUC3K4XQ7Y2H3HIVE
            aws_secret_access_key = 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*/
            //  windows :如果不传参,就自动搜索 ( C:\Users\USERNAME\.aws\credentials )
            //  linux :如果不传参,就自动搜索 ( ~/.aws/credentials )
            //          普通用户 : /home/ec2-user/.aws/credentails
            //          root 用户 : /root/.aws/credentials
            final ProfileCredentialsProvider dft01 = new ProfileCredentialsProvider("default");
            //  返回 aws 客户端
            credentials_profile = dft01.getCredentials();
        } catch (Exception e) {
            throw new AmazonClientException("Cannot load credentials from .aws/credentials file.Make sure that the credentials file exists and that the profile name is defined within it.",e);
        }

        //  2 ,创建 emr 对象 : 可以用来创建集群,操作 emr 集群
        AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard()
                .withCredentials(new AWSStaticCredentialsProvider(credentials_profile))
                .withRegion(Regions.CN_NORTHWEST_1)
                .build();

        //  3 ,指定 emr 中要安装的程序 :
        Application hadoop = new Application().withName("Hadoop");
        Application hive = new Application().withName("Hive");
        Application spark = new Application().withName("Spark");

        //  4 ,创建配置对象( emr 组件的配置信息 )
        Configuration configuration = new Configuration();
        configuration.setClassification("hive-site");
        configuration.addPropertiesEntry("javax.jdo.option.ConnectionURL", "jdbc:mysql://sflmysql03.cbgb7etsvnph.rds.cn-northwest-1.amazonaws.com.cn:3306/hivemysql?createDatabaseIfNotExist=true");
        configuration.addPropertiesEntry("javax.jdo.option.ConnectionDriverName", "org.mariadb.jdbc.Driver");
        configuration.addPropertiesEntry("javax.jdo.option.ConnectionUserName", "sfl");
        configuration.addPropertiesEntry("javax.jdo.option.ConnectionPassword", "mypasswd");
        //      将配置对象存储到存储集合中
        List<Configuration> configurations = new LinkedList<Configuration>();
        configurations.add(configuration);


        //      执行程序 :spark-submit --master yarn --deploy-mode cluster --num-executors 5 --executor-cores 3 --executor-memory 6144m --class lifecycle01_tool.Tool10_sqlToFile s3://lifecyclebigdata/dataWareHouse/BALABALA/00jar/03_hive/hiveLifeCycle-1.0-SNAPSHOT.jar "select * from balabala.outer_year_season" "lifecyclebigdata/dataWareHouse/BALABALA/09_testData/res01"
        //  5 ,步骤 :执行程序
        //      所有步骤
        List<StepConfig> stepConfigs = new ArrayList<StepConfig>();

        HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig()
                //  执行 shell 命令的 jar
                .withJar("command-runner.jar")
                //  执行 shell 命令
                .withArgs("spark-submit","--master","yarn","--deploy-mode","cluster","--num-executors","5","--executor-cores","3",
                        "--executor-memory","6144m","--class","lifecycle01_tool.Tool10_sqlToFile","s3://lifecyclebigdata/dataWareHouse/BALABALA/00jar/03_hive/hiveLifeCycle-1.0-SNAPSHOT.jar","select * from balabala.outer_year_season","lifecyclebigdata/dataWareHouse/BALABALA/09_testData/res01");

        StepConfig sparkStep = new StepConfig()
                //  步骤名字
                .withName("Spark Step")
                //  异常后如何做 : 继续
                .withActionOnFailure("CONTINUE")
                .withHadoopJarStep(sparkStepConf);

        stepConfigs.add(sparkStep);

        //  6 ,配置集群信息 :
        RunJobFlowRequest request = new RunJobFlowRequest()
                //  集群名字
                .withName("HadoopHiveSparkCluster")
                //  集群版本
                .withReleaseLabel("emr-5.27.0")
                //  集群中的应用
                .withApplications(hadoop,hive,spark)
                //  集群的日志打印的地址
                .withLogUri("s3://lifecyclebigdata/emrLog/")
                //  emr 角色
                .withServiceRole("python")
                //  Auto Scaling 角色
                .withAutoScalingRole("EMR_AutoScaling_DefaultRole")
                //  启动配置文件
                .withConfigurations(configurations)
                //  添加步骤
                .withSteps(stepConfigs)
                //  根设备 ebs 卷大小 ( 单位 GB )
                .withEbsRootVolumeSize(10)
                //  ec2 instance profile
                .withJobFlowRole("python-ec2")
                //  ec2 实例配置
                .withInstances(new JobFlowInstancesConfig()
                        //  vpc 子网
                        .withEc2SubnetId("subnet-56ba7e2d")
                        //  安全组
                        .withEmrManagedMasterSecurityGroup("sg-3bc38a52")
                        .withEmrManagedSlaveSecurityGroup("sg-3bc38a52")
                        //  ec2 键对
                        .withEc2KeyName("lifecycle-python")
                        //  4 个 ec2 实例 ( 默认 1 个 master ,其它都是 slave )
                        .withInstanceCount(4)
                        //  执行完所有任务后,关闭集群 ( true-开,false-关 )
                        .withKeepJobFlowAliveWhenNoSteps(false)
                        //  ec2 的机型
                        .withMasterInstanceType("c4.xlarge")
                        .withSlaveInstanceType("c4.2xlarge"));
        //      设置集群对所有人可见,不然,在控制台看不到
        request.setVisibleToAllUsers(true);
        //  7 ,创建集群 :runJobFlowRequest.setVisibleToAllUsers(true)
        RunJobFlowResult result = emr.runJobFlow(request);
        System.out.println("The cluster ID is " + result.toString());

    }
}

4 ,两个步骤执行后关闭 :

package test01;

import com.amazonaws.services.elasticmapreduce.model.StepConfig;
import lifeCycle01_Tool.Tool01_Cluster;
import lifeCycle01_Tool.Tool02_Step;

import java.util.ArrayList;
import java.util.List;

public class Test03_MyAPI {

    public static void main(String[] args) {

        //  1 ,创建步骤集合 :
        List<StepConfig> listSteps = new ArrayList<StepConfig>();
        //  2 ,主类,jar包,main参数( 可以有多个参数 )
        final StepConfig step01 = Tool02_Step.getSteps("lifecycle01_tool.Tool10_sqlToFile", "s3://lifecyclebigdata/dataWareHouse/BALABALA/00jar/03_hive/hiveLifeCycle-1.0-SNAPSHOT.jar", "select * from balabala.outer_year_season", "lifecyclebigdata/dataWareHouse/BALABALA/09_testData/res01");
        final StepConfig step02 = Tool02_Step.getSteps("lifecycle01_tool.Tool10_sqlToFile", "s3://lifecyclebigdata/dataWareHouse/BALABALA/00jar/03_hive/hiveLifeCycle-1.0-SNAPSHOT.jar", "select * from balabala.outer_year_season", "lifecyclebigdata/dataWareHouse/BALABALA/09_testData/res02");
        listSteps.add(step01);
        listSteps.add(step02);
        //  3 ,开集群,执行步骤 :
        final String s = Tool01_Cluster.startStepStop(listSteps);
        System.out.println("集群编号"+s);

    }


    public static void main01(String[] args) {

        //  1 ,启动集群 :
        /*final String clusterId = Tool01_Cluster.startEmrCluster();
        System.out.println(clusterId);*/

        //  2 ,关闭集群 : 刚才打印出了集群 id :j-2G9WFA0XGI6OI
        /*Tool01_Cluster.stopClusterById("j-2G9WFA0XGI6OI");
        System.out.println("关闭成功...");*/

        //  3 ,数组合并 :
        /*String[] arr = {"a","b"};
        String[] brr = {"c"};
        final String[] crr = Tool03_Array.arrUnion(arr, brr);
        System.out.println(Arrays.toString(crr));*/

    }

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值