1 ,写一个 spark 程序 :
- 目的 : 测试
- 功能 : 把 sql 结果存储进指定的文件夹
- 执行 :
spark-submit --master yarn --deploy-mode cluster --num-executors 5 --executor-cores 3 --executor-memory 6144m --class lifecycle01_tool.Tool10_sqlToFile s3://lifecyclebigdata/dataWareHouse/BALABALA/00jar/03_hive/hiveLifeCycle-1.0-SNAPSHOT.jar "select * from balabala.outer_year_season" "lifecyclebigdata/dataWareHouse/BALABALA/09_testData/res01"
2 ,步骤类 : HadoopJarStepConfig
- getProperties() : 获取步骤运行时的 java 属性列表,展示给我们的 main 函数知道
- setProperties() : 步骤运行时,设置 java 属性。
- withProperties() : 同上
- setJar() : jar 包路径
- getJar() : 获取我们设置过的 jar 路径
- withJar() : 同上
- setMainClass() : 设置主类
- getMainClass() : 获取到设置的那个主类
- withMainClass : 同上
- setArgs : 设置参数
- getArgs : 获取参数
- withArgs : 同上
3 ,执行一个步骤后关闭 :
package test01;
import com.amazonaws.AmazonClientException;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClientBuilder;
import com.amazonaws.services.elasticmapreduce.model.*;
import java.util.*;
public class Test02_Start_Step_Stop {
public static void main(String[] args) {
AWSCredentials credentials_profile = null;
try {
final ProfileCredentialsProvider dft01 = new ProfileCredentialsProvider("default");
credentials_profile = dft01.getCredentials();
} catch (Exception e) {
throw new AmazonClientException("Cannot load credentials from .aws/credentials file.Make sure that the credentials file exists and that the profile name is defined within it.",e);
}
AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard()
.withCredentials(new AWSStaticCredentialsProvider(credentials_profile))
.withRegion(Regions.CN_NORTHWEST_1)
.build();
Application hadoop = new Application().withName("Hadoop");
Application hive = new Application().withName("Hive");
Application spark = new Application().withName("Spark");
Configuration configuration = new Configuration();
configuration.setClassification("hive-site");
configuration.addPropertiesEntry("javax.jdo.option.ConnectionURL", "jdbc:mysql://sflmysql03.cbgb7etsvnph.rds.cn-northwest-1.amazonaws.com.cn:3306/hivemysql?createDatabaseIfNotExist=true");
configuration.addPropertiesEntry("javax.jdo.option.ConnectionDriverName", "org.mariadb.jdbc.Driver");
configuration.addPropertiesEntry("javax.jdo.option.ConnectionUserName", "sfl");
configuration.addPropertiesEntry("javax.jdo.option.ConnectionPassword", "mypasswd");
List<Configuration> configurations = new LinkedList<Configuration>();
configurations.add(configuration);
List<StepConfig> stepConfigs = new ArrayList<StepConfig>();
HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig()
.withJar("command-runner.jar")
.withArgs("spark-submit","--master","yarn","--deploy-mode","cluster","--num-executors","5","--executor-cores","3",
"--executor-memory","6144m","--class","lifecycle01_tool.Tool10_sqlToFile","s3://lifecyclebigdata/dataWareHouse/BALABALA/00jar/03_hive/hiveLifeCycle-1.0-SNAPSHOT.jar","select * from balabala.outer_year_season","lifecyclebigdata/dataWareHouse/BALABALA/09_testData/res01");
StepConfig sparkStep = new StepConfig()
.withName("Spark Step")
.withActionOnFailure("CONTINUE")
.withHadoopJarStep(sparkStepConf);
stepConfigs.add(sparkStep);
RunJobFlowRequest request = new RunJobFlowRequest()
.withName("HadoopHiveSparkCluster")
.withReleaseLabel("emr-5.27.0")
.withApplications(hadoop,hive,spark)
.withLogUri("s3://lifecyclebigdata/emrLog/")
.withServiceRole("python")
.withAutoScalingRole("EMR_AutoScaling_DefaultRole")
.withConfigurations(configurations)
.withSteps(stepConfigs)
.withEbsRootVolumeSize(10)
.withJobFlowRole("python-ec2")
.withInstances(new JobFlowInstancesConfig()
.withEc2SubnetId("subnet-56ba7e2d")
.withEmrManagedMasterSecurityGroup("sg-3bc38a52")
.withEmrManagedSlaveSecurityGroup("sg-3bc38a52")
.withEc2KeyName("lifecycle-python")
.withInstanceCount(4)
.withKeepJobFlowAliveWhenNoSteps(false)
.withMasterInstanceType("c4.xlarge")
.withSlaveInstanceType("c4.2xlarge"));
request.setVisibleToAllUsers(true);
RunJobFlowResult result = emr.runJobFlow(request);
System.out.println("The cluster ID is " + result.toString());
}
}
4 ,两个步骤执行后关闭 :
package test01;
import com.amazonaws.services.elasticmapreduce.model.StepConfig;
import lifeCycle01_Tool.Tool01_Cluster;
import lifeCycle01_Tool.Tool02_Step;
import java.util.ArrayList;
import java.util.List;
public class Test03_MyAPI {
public static void main(String[] args) {
List<StepConfig> listSteps = new ArrayList<StepConfig>();
final StepConfig step01 = Tool02_Step.getSteps("lifecycle01_tool.Tool10_sqlToFile", "s3://lifecyclebigdata/dataWareHouse/BALABALA/00jar/03_hive/hiveLifeCycle-1.0-SNAPSHOT.jar", "select * from balabala.outer_year_season", "lifecyclebigdata/dataWareHouse/BALABALA/09_testData/res01");
final StepConfig step02 = Tool02_Step.getSteps("lifecycle01_tool.Tool10_sqlToFile", "s3://lifecyclebigdata/dataWareHouse/BALABALA/00jar/03_hive/hiveLifeCycle-1.0-SNAPSHOT.jar", "select * from balabala.outer_year_season", "lifecyclebigdata/dataWareHouse/BALABALA/09_testData/res02");
listSteps.add(step01);
listSteps.add(step02);
final String s = Tool01_Cluster.startStepStop(listSteps);
System.out.println("集群编号"+s);
}
public static void main01(String[] args) {
}
}