importjava.io.IOException;importjava.util.ArrayList;importjava.util.List;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.conf.Configured;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.LongWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;importorg.apache.hadoop.record.compiler.generated.ParseException;importorg.apache.hadoop.util.Tool;importorg.apache.hadoop.util.ToolRunner;importcom.amazonaws.auth.AWSCredentials;importcom.amazonaws.auth.BasicAWSCredentials;importcom.amazonaws.services.ec2.model.InstanceType;importcom.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClient;importcom.amazonaws.services.elasticmapreduce.model.ActionOnFailure;importcom.amazonaws.services.elasticmapreduce.model.Cluster;importcom.amazonaws.services.elasticmapreduce.model.DescribeClusterRequest;importcom.amazonaws.services.elasticmapreduce.model.DescribeClusterResult;importcom.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig;importcom.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig;importcom.amazonaws.services.elasticmapreduce.model.ListStepsRequest;importcom.amazonaws.services.elasticmapreduce.model.ListStepsResult;importcom.amazonaws.services.elasticmapreduce.model.PlacementType;importcom.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest;importcom.amazonaws.services.elasticmapreduce.model.RunJobFlowResult;importcom.amazonaws.services.elasticmapreduce.model.StepConfig;importcom.amazonaws.services.elasticmapreduce.model.StepSummary;importcom.amazonaws.services.elasticmapreduce.model.TerminateJobFlowsRequest;public classAffairClient {private staticAmazonElasticMapReduceClient emr;private static final long SLEEP_TIME = 1000 * 30;private static final String JAR_DIR = "s3://bucketname/affair/";private static final String JAR_NAME = "affair.jar";private static final String INPUT_DIR = "s3://bucketname/affair/input/";private static final String OUTPUT_DIR = "s3://bucketname/affair/output/";private static final String LOG_DIR = "s3://bucketname/affair/log/";private static final String JOB_FLOW_NAME = "this job flow runs a mapreduce affair.";private static final String AWS_ACCESS_KEY = "YOUR_AWS_ACCESS_KEY";private static final String AWS_SECRET_KEY = "YOUR_AWS_SECRET_LEY";public static classAffairMapperextends Mapper{public voidmap(LongWritable key, Text value, Context context)throwsIOException, InterruptedException {
context.write(new Text(value), new Text(""));
}
}public static classAffairReducerextends Reducer{public void reduce(Text key, Iterablevalues, Context context)throwsIOException, InterruptedException {
context.write(key,new Text(""));
}
}public static class AffairJob extends Configured implementsTool {public int run(String[] arg0) throwsException {
Configuration conf=getConf();
conf.set("mapred.reduce.tasks", "" + 1);
Job job= new Job(conf, "Affair MR job");
job.setJarByClass(AffairJob.class);
job.setMapperClass(AffairMapper.class);
job.setReducerClass(AffairReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);//job.setNumReduceTasks(1);
FileInputFormat.addInputPath(job,newPath(INPUT_DIR));
FileOutputFormat.setOutputPath(job,newPath(OUTPUT_DIR));
job.waitForCompletion(true);return 0;
}public static void main(String[] args) throwsException {int exitCode = ToolRunner.run(newAffairJob(), args);
System.exit(exitCode);
}
}public static void main(String[] args) throwsParseException {//emr jobflow
try{
String mainClass= AffairJob.class.getName();
String stepName= mainClass + " Affair";
runStep(mainClass, JAR_NAME, stepName);
}catch(Exception e) {
e.printStackTrace();
}
}private static voidrunStep(String mainClass, String jarName, String stepName)throwsInterruptedException {
String jarPath= JAR_DIR +JAR_NAME;
HadoopJarStepConfig hadoopJarStep= newHadoopJarStepConfig(jarPath);
hadoopJarStep.setMainClass(mainClass);
hadoopJarStep.setArgs(null);
StepConfig step= newStepConfig().withName(stepName)
.withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
.withHadoopJarStep(hadoopJarStep);
String logUri=LOG_DIR;
JobFlowInstancesConfig instances=createInstances();
List steps = new ArrayList();
steps.add(step);
String jobFlowId=CreateJobFlow(JOB_FLOW_NAME, logUri, instances, steps);
terminateJobFlow(jobFlowId);
}private static voidterminateJobFlow(String jobFlowId) {
TerminateJobFlowsRequest request= newTerminateJobFlowsRequest().withJobFlowIds(jobFlowId);
emr.terminateJobFlows(request);
}private staticString CreateJobFlow(String jobFlowName, String logUri,
JobFlowInstancesConfig instances, Liststeps)throwsInterruptedException {
AWSCredentials credentials= newBasicAWSCredentials(AWS_ACCESS_KEY, AWS_SECRET_KEY);
emr= newAmazonElasticMapReduceClient(credentials);//run job flow
RunJobFlowRequest request = newRunJobFlowRequest().withName(jobFlowName)
.withLogUri(logUri)
.withSteps(steps)
.withInstances(instances);
RunJobFlowResult result=emr.runJobFlow(request);//get job flow details
String jobFlowId =result.getJobFlowId();boolean runing = true;while(runing) {
Thread.sleep(SLEEP_TIME);
List jobFlowIdList = new ArrayList();
jobFlowIdList.add(jobFlowId);
System.out.println(getJobFlowStatus(jobFlowIdList));for(String clusterId : jobFlowIdList) {
DescribeClusterRequest describeClusterRequest= newDescribeClusterRequest().withClusterId(clusterId);
DescribeClusterResult describeClusterResult=emr.describeCluster(describeClusterRequest);
Cluster cluster=describeClusterResult.getCluster();if(cluster.getStatus().getState().contains("FAILED") ||cluster.getStatus().getState().contains("COMPLETED") ||cluster.getStatus().getState().contains("TERMINATED") ||cluster.getStatus().getState().contains("SHUTTING_DOWN") ||cluster.getStatus().getState().contains("WAITING"))
runing= false;break;
}
}returnjobFlowId;
}private static String getJobFlowStatus(ListjobFlowIdList) {
String info= newString();for(String clusterId : jobFlowIdList) {
DescribeClusterRequest describeClusterRequest= newDescribeClusterRequest().withClusterId(clusterId);
DescribeClusterResult describeClusterResult=emr.describeCluster(describeClusterRequest);
Cluster cluster=describeClusterResult.getCluster();
info+= cluster.getId() + "\t" + cluster.getStatus().getState() + "\t" + cluster.getName() + "\n";
ListStepsRequest listStepsRequest= newListStepsRequest().withClusterId(clusterId);
ListStepsResult listStepsResult=emr.listSteps(listStepsRequest);for(StepSummary step : listStepsResult.getSteps()) {
info+= "\t" + step.getStatus().getState() + "\t" + step.getName() + "\n";
}
}returninfo;
}private staticJobFlowInstancesConfig createInstances() {
JobFlowInstancesConfig instances= newJobFlowInstancesConfig()
.withHadoopVersion("1.0.3")
.withInstanceCount(5)
.withKeepJobFlowAliveWhenNoSteps(false)
.withMasterInstanceType(InstanceType.M1Large.toString())
.withSlaveInstanceType(InstanceType.M1Large.toString())
.withPlacement(new PlacementType("us-east-1a"));returninstances;
}
}