1. install hadoop-yarn 2.2.0
2. run yarn application command is : yarn jar *.jar [Main-class]
3. build the yarn Client Code for setup the yarn application as the follows:
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
public class Client {
private static final Log LOG = LogFactory.getLog(Client.class);
private Configuration conf;
private YarnClient yarnClient;
private String appName = "";
private final long clientStartTime = System.currentTimeMillis();
private long clientTimeout = 600000;
public Client() {
yarnClient = YarnClient.createYarnClient();
this.conf = new YarnConfiguration();
yarnClient.init(conf);
}
public void init(String[] args) throws ParseException {
appName = "myYarnAPP";
}
public void run() throws YarnException, IOException {
LOG.info("Running Client");
yarnClient.start();
YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers="
+ clusterMetrics.getNumNodeManagers());
List<NodeReport> clusterNodeReports = yarnClient
.getNodeReports(NodeState.RUNNING);
LOG.info("Got Cluster node info from ASM");
for (NodeReport node : clusterNodeReports) {
LOG.info("Got node report from ASM for" + ", nodeId_host="
+ node.getNodeId().getHost() + ", nodeAddress="
+ node.getHttpAddress() + ", nodeRackName="
+ node.getRackName() + ", nodeNumContainers="
+ node.getNumContainers());
}
// Get a new application id
YarnClientApplication app = yarnClient.createApplication();
GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
int maxMem = appResponse.getMaximumResourceCapability().getMemory();
LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
ApplicationSubmissionContext appContext = app
.getApplicationSubmissionContext();
ApplicationId appId = appContext.getApplicationId();
appContext.setApplicationName(appName);
ContainerLaunchContext amContainer = Records
.newRecord(ContainerLaunchContext.class);
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
LOG.info("Copy App Master jar from local filesystem and add to local environment");
FileSystem fs = FileSystem.get(conf);
String appMasterJar = JobConf.findContainingJar(Client.class);
LOG.info("Application Master's jar is " + appMasterJar);
Path jarPath = new Path("*.jar");
String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar";
Path dst = new Path(fs.getHomeDirectory(), pathSuffix);
LOG.info("dst path:===="+dst.toString());
String jarPathParm=dst.toString();
fs.copyFromLocalFile(false, true, jarPath, dst);
FileStatus jarStatus = fs.getFileStatus(dst);
LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
amJarRsrc.setType(LocalResourceType.FILE);
amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
LOG.info("URL:===="+ConverterUtils.getYarnUrlFromPath(dst));
amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
amJarRsrc.setTimestamp(jarStatus.getModificationTime());
amJarRsrc.setSize(jarStatus.getLen());
localResources.put("AppMaster.jar", amJarRsrc);
amContainer.setLocalResources(localResources);
StringBuilder classPathEnv = new StringBuilder(
Environment.CLASSPATH.$()).append(File.pathSeparatorChar)
.append("./*");
// config the yarn running environment
for (String c : conf.getStrings(
YarnConfiguration.YARN_APPLICATION_CLASSPATH,
YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
classPathEnv.append(File.pathSeparatorChar);
classPathEnv.append(c.trim());
}
if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
classPathEnv.append(':');
classPathEnv.append(System.getProperty("java.class.path"));
}
LOG.info("Set the environment for the application master");
Map<String, String> env = new HashMap<String, String>();
env.put("CLASSPATH", classPathEnv.toString());
amContainer.setEnvironment(env);
Vector<CharSequence> vargs = new Vector<CharSequence>(30);
LOG.info("Setting up app master command");
vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
vargs.add("org.yarn.myapp2.AM");
vargs.add(jarPathParm);
vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
+ "/AppMaster.stdout");
vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
+ "/AppMaster.stderr");
StringBuilder command = new StringBuilder();
for (CharSequence str : vargs) {
command.append(str).append(" ");
}
LOG.info("Completed setting up app master command "
+ command.toString());
List<String> commands = new ArrayList<String>();
commands.add(command.toString());
amContainer.setCommands(commands);
Resource capability = Records.newRecord(Resource.class);
capability.setMemory(100);
appContext.setResource(capability);
appContext.setAMContainerSpec(amContainer);
Priority pri = Records.newRecord(Priority.class);
pri.setPriority(0);
appContext.setPriority(pri);
appContext.setQueue("default");
LOG.info("Submitting application to ASM");
yarnClient.submitApplication(appContext);
// Monitor the application
monitorApplication(appId);
}
private void monitorApplication(ApplicationId appId) throws YarnException,
IOException {
while (true) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOG.debug("Thread sleep in monitoring loop interrupted");
}
ApplicationReport report = yarnClient.getApplicationReport(appId);
LOG.info("Got application report from ASM for" + ", appId="
+ appId.getId() + ", clientToAMToken="
+ report.getClientToAMToken() + ", appDiagnostics="
+ report.getDiagnostics() + ", appMasterHost="
+ report.getHost() + ", appQueue=" + report.getQueue()
+ ", appMasterRpcPort=" + report.getRpcPort()
+ ", appStartTime=" + report.getStartTime()
+ ", yarnAppState="
+ report.getYarnApplicationState().toString()
+ ", distributedFinalState="
+ report.getFinalApplicationStatus().toString()
+ ", appTrackingUrl=" + report.getTrackingUrl()
+ ", appUser=" + report.getUser());
YarnApplicationState state = report.getYarnApplicationState();
FinalApplicationStatus dsStatus = report
.getFinalApplicationStatus();
if (YarnApplicationState.FINISHED == state) {
if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
LOG.info("Application has completed successfully. Breaking monitoring loop");
forceKillApplication(appId);
return;
} else {
LOG.info("Application did finished unsuccessfully."
+ " YarnState=" + state.toString()
+ ", DSFinalStatus=" + dsStatus.toString()
+ ". Breaking monitoring loop");
return;
}
} else if (YarnApplicationState.KILLED == state
|| YarnApplicationState.FAILED == state) {
LOG.info("Application did not finish." + " YarnState="
+ state.toString() + ", DSFinalStatus="
+ dsStatus.toString() + ". Breaking monitoring loop");
return;
}
if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
LOG.info("Reached client specified timeout for application. Killing application");
forceKillApplication(appId);
return;
}
}
}
private void forceKillApplication(ApplicationId appId)
throws YarnException, IOException {
yarnClient.killApplication(appId);
}
public static void main(String[] args) throws ParseException,
YarnException, IOException {
Client client = new Client();
client.init(args);
client.run();
}
}
2. build the AM (application master) for setup the container task in different manager node.
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.hadoop.yarn.client.api.NMClient;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
public class AM {
private static final Log LOG = LogFactory.getLog(AM.class);
private Configuration conf;
private AMRMClient amRMClient;
private NMClient nmClient;
private ApplicationAttemptId appAttemptID;
private AtomicInteger numCompletedContainers = new AtomicInteger();
private AtomicInteger numAllocatedContainers = new AtomicInteger();
private AtomicInteger numFailedContainers = new AtomicInteger();
private AtomicInteger numRequestedContainers = new AtomicInteger();
private int numTotalContainers = 4;
private int containerMemory = 100;
private int requestPriority = 0;
private volatile boolean done;
private List<Thread> launchThreads = new ArrayList<Thread>();
private volatile static List nodeList = new ArrayList();
private String jarPathParam;
/**
* @param args
* @throws IOException
* @throws YarnException
* @throws InterruptedException
*/
public static void main(String[] args) throws YarnException, IOException,
InterruptedException {
AM appMaster = new AM();
LOG.info("Initializing ApplicationMaster");
appMaster.init(args);
appMaster.run();
}
public void init(String[] args) {
jarPathParam=args[0];
nodeList.add("host1");
nodeList.add("host2");
nodeList.add("host3");
nodeList.add("host4");
conf = new YarnConfiguration();
Map<String, String> envs = System.getenv();
ContainerId containerId = ConverterUtils.toContainerId(envs
.get(Environment.CONTAINER_ID.name()));
appAttemptID = containerId.getApplicationAttemptId();
LOG.info("Application master for app" + ", appId="
+ appAttemptID.getApplicationId().getId()
+ ", clustertimestamp="
+ appAttemptID.getApplicationId().getClusterTimestamp()
+ ", attemptId=" + appAttemptID.getAttemptId());
}
public void run() throws YarnException, IOException, InterruptedException {
LOG.info("--------Starting ApplicationMaster---------");
amRMClient = AMRMClient.createAMRMClient();
amRMClient.init(conf);
amRMClient.start();
nmClient = NMClient.createNMClient();
nmClient.init(conf);
nmClient.start();
RegisterApplicationMasterResponse response1 = amRMClient
.registerApplicationMaster("host1", 0, "");
int maxMem = response1.getMaximumResourceCapability().getMemory();
LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
for (int i = 0; i < numTotalContainers; ++i) {
ContainerRequest containerAsk = setupContainerAskForRM();
amRMClient.addContainerRequest(containerAsk);
}
//allocated the application
int allocatedContainers = 0;
while (allocatedContainers < numTotalContainers) {
AllocateResponse response = amRMClient.allocate(0);
for (Container container : response.getAllocatedContainers()) {
LOG.info("Container host name:==="
+ container.getNodeId().getHost());
++allocatedContainers;
ContainerLaunchContext ctx = Records
.newRecord(ContainerLaunchContext.class);
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
FileSystem fs = FileSystem.get(conf);
Path dst = new Path(jarPathParam);
FileStatus jarStatus = fs.getFileStatus(dst);
LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
amJarRsrc.setType(LocalResourceType.FILE);
amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
amJarRsrc.setTimestamp(jarStatus.getModificationTime());
amJarRsrc.setSize(jarStatus.getLen());
localResources.put("AppMaster.jar", amJarRsrc);
ctx.setLocalResources(localResources);
StringBuilder classPathEnv = new StringBuilder(
Environment.CLASSPATH.$()).append(File.pathSeparatorChar)
.append("./*");
for (String c : conf.getStrings(
YarnConfiguration.YARN_APPLICATION_CLASSPATH,
YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
classPathEnv.append(File.pathSeparatorChar);
classPathEnv.append(c.trim());
}
if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
classPathEnv.append(':');
classPathEnv.append(System.getProperty("java.class.path"));
}
LOG.info("Set the environment for the application master");
Map<String, String> env = new HashMap<String, String>();
env.put("CLASSPATH", classPathEnv.toString());
ctx.setEnvironment(env);
Vector<CharSequence> vargs = new Vector<CharSequence>(5);
vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
vargs.add("org.yarn.myapp2.NAnalysis");
vargs.add(container.getNodeId().getHost());
vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
+ "/stdout");
vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
+ "/stderr");
StringBuilder command = new StringBuilder();
for (CharSequence str : vargs) {
command.append(str).append(" ");
}
List<String> commands = new ArrayList<String>();
commands.add(command.toString());
ctx.setCommands(commands);
System.out
.println("Launching container " + allocatedContainers);
nmClient.startContainer(container, ctx);
}
Thread.sleep(100);
}
// Now wait for containers to complete
int completedContainers = 0;
while (completedContainers < numTotalContainers) {
AllocateResponse response = amRMClient.allocate(completedContainers
/ numTotalContainers);
for (ContainerStatus status : response
.getCompletedContainersStatuses()) {
++completedContainers;
System.out
.println("Completed container " + completedContainers);
}
Thread.sleep(100);
}
System.out.println("application to be Finish.............");
finish();
}
private void finish() throws YarnException, IOException {
LOG.info("Application completed. Stopping running containers");
nmClient.stop();
LOG.info("Application completed. finish to RM");
FinalApplicationStatus appStatus;
String appMessage = "finished.....................";
appStatus = FinalApplicationStatus.SUCCEEDED;
amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
amRMClient.stop();
}
private ContainerRequest setupContainerAskForRM() {
Priority pri = Records.newRecord(Priority.class);
pri.setPriority(requestPriority);
Resource capability = Records.newRecord(Resource.class);
capability.setMemory(containerMemory);
String[] nodes = null;
if (!nodeList.isEmpty()) {
nodes = new String[1];
nodes[0] = (String) nodeList.get(0);
nodeList.remove(0);
}
ContainerRequest request = new ContainerRequest(capability, nodes,
null, pri, false);
LOG.info("Requested container ask: " + request.toString());
return request;
}
}
3. a test main class is running in different container in manager node,
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
public class NAnalysis {
public static void main(String[] args) throws IOException {
String containerId=args[0];
Configuration conf = new YarnConfiguration();
FileSystem fs = FileSystem.get(conf);
Path createf = new Path("hdfs://ip:9000/yarntest/"+containerId);
FSDataOutputStream os = fs.create(createf);
Writer hdfsOut = new OutputStreamWriter(os, "utf-8");
hdfsOut.write(containerId+":===hello word parallel test .............");
hdfsOut.close();
os.close();
}
}
4. you can find he yarn application running log in: http://managernode:8088/
qa: to specify a node to run the container as the follows api:
ContainerRequest request = new ContainerRequest(capability, nodes,
null, pri, false);