java emr_从Java SDK运行Amazon EMR作业

在尝试使用Java SDK运行Amazon EMR作业时遇到问题,作业无法启动。代码中创建了AWS凭证,初始化了EMR客户端并设置了步骤配置,但未成功执行。解决方案中展示了如何正确设置和监控EMR作业,包括添加步骤、执行Hive脚本,并检查步骤状态直到完成或失败。
摘要由CSDN通过智能技术生成

我试图通过Java SDK运行EMR作业.

但它根本没有发射.

我正在粘贴我正在使用的代码.

我也看了documentation.但它没有多大帮助.

package com.zedo.aws.emr;

import com.amazonaws.auth.AWSCredentials;

import com.amazonaws.auth.BasicAWSCredentials;

import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClient;

import com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig;

import com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest;

import com.amazonaws.services.elasticmapreduce.model.RunJobFlowResult;

import com.amazonaws.services.elasticmapreduce.model.StepConfig;

import com.amazonaws.services.elasticmapreduce.util.StepFactory;

public class ExampleEMR {

/**

* @param args

*/

public static void main(String[] args) {

AWSCredentials credentials = new BasicAWSCredentials("", "");

AmazonElasticMapReduceClient emr = new AmazonElasticMapReduceClient(credentials);

StepFactory stepFactory = new StepFactory();

StepConfig enableDebugging = new StepConfig()

.withName("Enable Debugging")

.withActionOnFailure("TERMINATE_JOB_FLOW")

.withHadoopJarStep(stepFactory.newEnableDebuggingStep());

StepConfig installHive = new StepConfig()

.withName("Install Hive")

.withActionOnFailure("TERMINATE_JOB_FLOW")

.withHadoopJarStep(stepFactory.newInstallHiveStep());

StepConfig hiveScript = new StepConfig().withName("Hive Script")

.withActionOnFailure("TERMINATE_JOB_FLOW")

.withHadoopJarStep(stepFactory.newRunHiveScriptStep("s3://"));

RunJobFlowRequest request = new RunJobFlowRequest()

.withName("Hive Interactive")

.withSteps(enableDebugging, installHive)

.withLogUri("s3://myawsbucket/")

.withInstances(new JobFlowInstancesConfig()

.withEc2KeyName("")

.withHadoopVersion("0.20")

.withInstanceCount(5)

.withKeepJobFlowAliveWhenNoSteps(true)

.withMasterInstanceType("m1.small")

.withSlaveInstanceType("m1.small"));

RunJobFlowResult result = emr.runJobFlow(request);

}

}

或者有人能指点我的一些例子链接?

解决方法:

这项工作对我来说:

public void runScriptClientes(Calendar executionDate) {

// creacion credecencial s3

BasicAWSCredentials awsCreds = new BasicAWSCredentials(rb.getString("awsAccessKey"),

rb.getString("awsSecretKey"));

// creacion cliente para conectarse s3

AmazonElasticMapReduceClient emr = new AmazonElasticMapReduceClient(awsCreds);

emr.setRegion(Region.getRegion(Regions.EU_WEST_1));

// calculo de las carpeta a procesar

Map s3DataToProcessInput = getRutasInput(executionDate);

//Map s3DataToProcessOut = getRutaInput();

for (Entry bucket_ : s3DataToProcessInput.entrySet()){

String nameBucket = bucket_.getKey();

FolderS3 folderS3 = bucket_.getValue();

// verificar existencia bucket

if(folderS3.getExistInBucket()){

listaConcurrente.add(folderS3);

StepFactory stepFactory = new StepFactory();

StepConfig stepHive = new StepConfig()

.withName(rb.getString("nameStepClientesS3")+":"+nameBucket)/*nombre del step a ejecutar*/

.withActionOnFailure(ActionOnFailure.CONTINUE) /*accion a seguir si el step falla*/

.withHadoopJarStep(

stepFactory.newRunHiveScriptStep(rb.getString("scriptClienteS3"),

"-d", "s3DataToProcess=s3://"+rb.getString("bucketPropio")+"/"+rb.getString("ruta_input_c1")+folderS3.getNameKey(),

"-d", "s3DataToProcessOut=s3://"+rb.getString("bucketPropioOUT")+"/"+rb.getString("ruta_output_c1")+folderS3.getOutputFolder(),

"-d", "windowTime=tablaparametro"));

AddJobFlowStepsRequest jobFlow = new AddJobFlowStepsRequest().withJobFlowId(rb.getString("jobflowID"))

.withSteps(stepHive);

//mientras el estado sea pending o running

AddJobFlowStepsResult result = emr.addJobFlowSteps(jobFlow);

List id = result.getStepIds();

DescribeStepRequest describe = new DescribeStepRequest().withStepId(id.get(0));

describe.setClusterId(rb.getString("jobflowID"));

describe.setRequestCredentials(awsCreds);

DescribeStepResult res = emr.describeStep(describe);

StepStatus status = res.getStep().getStatus();

String stas = status.getState();

while (stas.equals(StepExecutionState.PENDING.name()) || stas.equals(StepExecutionState.RUNNING.name())){

try {

Thread.sleep(5000);

res = emr.describeStep(describe);

status = res.getStep().getStatus();

stas = status.getState();

log.info(stas);

} catch (InterruptedException e) {

e.printStackTrace();

}

}

if (stas.equals(StepExecutionState.COMPLETED.name())) {

folderS3.setProcessedInput(Boolean.TRUE);

listaConcurrente.remove(folderS3);

log.info("Step finalizado ok : "+folderS3 );

}else if(stas.equals(StepExecutionState.FAILED.name()) || stas.equals(StepExecutionState.CANCELLED.name())){

listaConcurrente.remove(folderS3);

folderS3.setProcessedInput(Boolean.FALSE);

listaConcurrente.add(folderS3);

log.info("Step Fallo o fue Cancelado : "+folderS3 );

}

// leer datos del resultado y cargar en BBDD

}

}

}

标签:java,amazon-emr

来源: https://codeday.me/bug/20190625/1285615.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值