java emr_在AWS EMR上运行Map Reduce的Java示例程序 及 操作小计

importjava.io.IOException;importjava.util.ArrayList;importjava.util.List;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.conf.Configured;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.LongWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;importorg.apache.hadoop.record.compiler.generated.ParseException;importorg.apache.hadoop.util.Tool;importorg.apache.hadoop.util.ToolRunner;importcom.amazonaws.auth.AWSCredentials;importcom.amazonaws.auth.BasicAWSCredentials;importcom.amazonaws.services.ec2.model.InstanceType;importcom.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClient;importcom.amazonaws.services.elasticmapreduce.model.ActionOnFailure;importcom.amazonaws.services.elasticmapreduce.model.Cluster;importcom.amazonaws.services.elasticmapreduce.model.DescribeClusterRequest;importcom.amazonaws.services.elasticmapreduce.model.DescribeClusterResult;importcom.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig;importcom.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig;importcom.amazonaws.services.elasticmapreduce.model.ListStepsRequest;importcom.amazonaws.services.elasticmapreduce.model.ListStepsResult;importcom.amazonaws.services.elasticmapreduce.model.PlacementType;importcom.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest;importcom.amazonaws.services.elasticmapreduce.model.RunJobFlowResult;importcom.amazonaws.services.elasticmapreduce.model.StepConfig;importcom.amazonaws.services.elasticmapreduce.model.StepSummary;importcom.amazonaws.services.elasticmapreduce.model.TerminateJobFlowsRequest;public classAffairClient {private staticAmazonElasticMapReduceClient emr;private static final long SLEEP_TIME = 1000 * 30;private static final String JAR_DIR = "s3://bucketname/affair/";private static final String JAR_NAME = "affair.jar";private static final String INPUT_DIR = "s3://bucketname/affair/input/";private static final String OUTPUT_DIR = "s3://bucketname/affair/output/";private static final String LOG_DIR = "s3://bucketname/affair/log/";private static final String JOB_FLOW_NAME = "this job flow runs a mapreduce affair.";private static final String AWS_ACCESS_KEY = "YOUR_AWS_ACCESS_KEY";private static final String AWS_SECRET_KEY = "YOUR_AWS_SECRET_LEY";public static classAffairMapperextends Mapper{public voidmap(LongWritable key, Text value, Context context)throwsIOException, InterruptedException {

context.write(new Text(value), new Text(""));

}

}public static classAffairReducerextends Reducer{public void reduce(Text key, Iterablevalues, Context context)throwsIOException, InterruptedException {

context.write(key,new Text(""));

}

}public static class AffairJob extends Configured implementsTool {public int run(String[] arg0) throwsException {

Configuration conf=getConf();

conf.set("mapred.reduce.tasks", "" + 1);

Job job= new Job(conf, "Affair MR job");

job.setJarByClass(AffairJob.class);

job.setMapperClass(AffairMapper.class);

job.setReducerClass(AffairReducer.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(Text.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(Text.class);//job.setNumReduceTasks(1);

FileInputFormat.addInputPath(job,newPath(INPUT_DIR));

FileOutputFormat.setOutputPath(job,newPath(OUTPUT_DIR));

job.waitForCompletion(true);return 0;

}public static void main(String[] args) throwsException {int exitCode = ToolRunner.run(newAffairJob(), args);

System.exit(exitCode);

}

}public static void main(String[] args) throwsParseException {//emr jobflow

try{

String mainClass= AffairJob.class.getName();

String stepName= mainClass + " Affair";

runStep(mainClass, JAR_NAME, stepName);

}catch(Exception e) {

e.printStackTrace();

}

}private static voidrunStep(String mainClass, String jarName, String stepName)throwsInterruptedException {

String jarPath= JAR_DIR +JAR_NAME;

HadoopJarStepConfig hadoopJarStep= newHadoopJarStepConfig(jarPath);

hadoopJarStep.setMainClass(mainClass);

hadoopJarStep.setArgs(null);

StepConfig step= newStepConfig().withName(stepName)

.withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)

.withHadoopJarStep(hadoopJarStep);

String logUri=LOG_DIR;

JobFlowInstancesConfig instances=createInstances();

List steps = new ArrayList();

steps.add(step);

String jobFlowId=CreateJobFlow(JOB_FLOW_NAME, logUri, instances, steps);

terminateJobFlow(jobFlowId);

}private static voidterminateJobFlow(String jobFlowId) {

TerminateJobFlowsRequest request= newTerminateJobFlowsRequest().withJobFlowIds(jobFlowId);

emr.terminateJobFlows(request);

}private staticString CreateJobFlow(String jobFlowName, String logUri,

JobFlowInstancesConfig instances, Liststeps)throwsInterruptedException {

AWSCredentials credentials= newBasicAWSCredentials(AWS_ACCESS_KEY, AWS_SECRET_KEY);

emr= newAmazonElasticMapReduceClient(credentials);//run job flow

RunJobFlowRequest request = newRunJobFlowRequest().withName(jobFlowName)

.withLogUri(logUri)

.withSteps(steps)

.withInstances(instances);

RunJobFlowResult result=emr.runJobFlow(request);//get job flow details

String jobFlowId =result.getJobFlowId();boolean runing = true;while(runing) {

Thread.sleep(SLEEP_TIME);

List jobFlowIdList = new ArrayList();

jobFlowIdList.add(jobFlowId);

System.out.println(getJobFlowStatus(jobFlowIdList));for(String clusterId : jobFlowIdList) {

DescribeClusterRequest describeClusterRequest= newDescribeClusterRequest().withClusterId(clusterId);

DescribeClusterResult describeClusterResult=emr.describeCluster(describeClusterRequest);

Cluster cluster=describeClusterResult.getCluster();if(cluster.getStatus().getState().contains("FAILED") ||cluster.getStatus().getState().contains("COMPLETED") ||cluster.getStatus().getState().contains("TERMINATED") ||cluster.getStatus().getState().contains("SHUTTING_DOWN") ||cluster.getStatus().getState().contains("WAITING"))

runing= false;break;

}

}returnjobFlowId;

}private static String getJobFlowStatus(ListjobFlowIdList) {

String info= newString();for(String clusterId : jobFlowIdList) {

DescribeClusterRequest describeClusterRequest= newDescribeClusterRequest().withClusterId(clusterId);

DescribeClusterResult describeClusterResult=emr.describeCluster(describeClusterRequest);

Cluster cluster=describeClusterResult.getCluster();

info+= cluster.getId() + "\t" + cluster.getStatus().getState() + "\t" + cluster.getName() + "\n";

ListStepsRequest listStepsRequest= newListStepsRequest().withClusterId(clusterId);

ListStepsResult listStepsResult=emr.listSteps(listStepsRequest);for(StepSummary step : listStepsResult.getSteps()) {

info+= "\t" + step.getStatus().getState() + "\t" + step.getName() + "\n";

}

}returninfo;

}private staticJobFlowInstancesConfig createInstances() {

JobFlowInstancesConfig instances= newJobFlowInstancesConfig()

.withHadoopVersion("1.0.3")

.withInstanceCount(5)

.withKeepJobFlowAliveWhenNoSteps(false)

.withMasterInstanceType(InstanceType.M1Large.toString())

.withSlaveInstanceType(InstanceType.M1Large.toString())

.withPlacement(new PlacementType("us-east-1a"));returninstances;

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值