A demo for parallel computation based on yuan api technology log

1. install hadoop-yarn 2.2.0

2. run yarn application command is : yarn jar *.jar [Main-class]

3. build the yarn Client Code for setup the yarn application as the follows:

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;

import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;

public class Client {
	private static final Log LOG = LogFactory.getLog(Client.class);
	private Configuration conf;
	private YarnClient yarnClient;
	private String appName = "";
	private final long clientStartTime = System.currentTimeMillis();
	private long clientTimeout = 600000;

	public Client() {
		yarnClient = YarnClient.createYarnClient();
		this.conf = new YarnConfiguration();
		yarnClient.init(conf);
	}

	public void init(String[] args) throws ParseException {
		appName = "myYarnAPP";
	}

	public void run() throws YarnException, IOException {
		LOG.info("Running Client");
		yarnClient.start();

		YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
		LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers="
				+ clusterMetrics.getNumNodeManagers());

		List<NodeReport> clusterNodeReports = yarnClient
				.getNodeReports(NodeState.RUNNING);
		LOG.info("Got Cluster node info from ASM");
		for (NodeReport node : clusterNodeReports) {
			LOG.info("Got node report from ASM for" + ", nodeId_host="
					+ node.getNodeId().getHost() + ", nodeAddress="
					+ node.getHttpAddress() + ", nodeRackName="
					+ node.getRackName() + ", nodeNumContainers="
					+ node.getNumContainers());
		}

		// Get a new application id
		YarnClientApplication app = yarnClient.createApplication();
		GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
		int maxMem = appResponse.getMaximumResourceCapability().getMemory();
		LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

		ApplicationSubmissionContext appContext = app
				.getApplicationSubmissionContext();
		ApplicationId appId = appContext.getApplicationId();
		appContext.setApplicationName(appName);

		ContainerLaunchContext amContainer = Records
				.newRecord(ContainerLaunchContext.class);

		Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
		LOG.info("Copy App Master jar from local filesystem and add to local environment");
		FileSystem fs = FileSystem.get(conf);
		String appMasterJar = JobConf.findContainingJar(Client.class);
		LOG.info("Application Master's jar is " + appMasterJar);
		Path jarPath = new Path("*.jar");
		String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar";
		Path dst = new Path(fs.getHomeDirectory(), pathSuffix);
		LOG.info("dst path:===="+dst.toString());
		String jarPathParm=dst.toString();
		fs.copyFromLocalFile(false, true, jarPath, dst);
		FileStatus jarStatus = fs.getFileStatus(dst);
		LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
		amJarRsrc.setType(LocalResourceType.FILE);
		amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
		LOG.info("URL:===="+ConverterUtils.getYarnUrlFromPath(dst));
		amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
		amJarRsrc.setTimestamp(jarStatus.getModificationTime());
		amJarRsrc.setSize(jarStatus.getLen());
		localResources.put("AppMaster.jar", amJarRsrc);
		amContainer.setLocalResources(localResources);

		StringBuilder classPathEnv = new StringBuilder(
				Environment.CLASSPATH.$()).append(File.pathSeparatorChar)
				.append("./*");

		// config the yarn running environment
		for (String c : conf.getStrings(
				YarnConfiguration.YARN_APPLICATION_CLASSPATH,
				YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
			classPathEnv.append(File.pathSeparatorChar);
			classPathEnv.append(c.trim());
		}
		if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
			classPathEnv.append(':');
			classPathEnv.append(System.getProperty("java.class.path"));
		}

		LOG.info("Set the environment for the application master");
		Map<String, String> env = new HashMap<String, String>();
		env.put("CLASSPATH", classPathEnv.toString());
		amContainer.setEnvironment(env);

		Vector<CharSequence> vargs = new Vector<CharSequence>(30);
		LOG.info("Setting up app master command");
		vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
		vargs.add("org.yarn.myapp2.AM");
		vargs.add(jarPathParm);
		vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
				+ "/AppMaster.stdout");
		vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
				+ "/AppMaster.stderr");
		StringBuilder command = new StringBuilder();
		for (CharSequence str : vargs) {
			command.append(str).append(" ");
		}
		LOG.info("Completed setting up app master command "
				+ command.toString());
		List<String> commands = new ArrayList<String>();
		commands.add(command.toString());
		amContainer.setCommands(commands);
		Resource capability = Records.newRecord(Resource.class);
		capability.setMemory(100);
		appContext.setResource(capability);
		appContext.setAMContainerSpec(amContainer);
		Priority pri = Records.newRecord(Priority.class);
		pri.setPriority(0);
		appContext.setPriority(pri);
		appContext.setQueue("default");

		LOG.info("Submitting application to ASM");
		yarnClient.submitApplication(appContext);

		// Monitor the application
		monitorApplication(appId);

	}

	private void monitorApplication(ApplicationId appId) throws YarnException,
			IOException {

		while (true) {
			try {
				Thread.sleep(1000);
			} catch (InterruptedException e) {
				LOG.debug("Thread sleep in monitoring loop interrupted");
			}

			ApplicationReport report = yarnClient.getApplicationReport(appId);
			LOG.info("Got application report from ASM for" + ", appId="
					+ appId.getId() + ", clientToAMToken="
					+ report.getClientToAMToken() + ", appDiagnostics="
					+ report.getDiagnostics() + ", appMasterHost="
					+ report.getHost() + ", appQueue=" + report.getQueue()
					+ ", appMasterRpcPort=" + report.getRpcPort()
					+ ", appStartTime=" + report.getStartTime()
					+ ", yarnAppState="
					+ report.getYarnApplicationState().toString()
					+ ", distributedFinalState="
					+ report.getFinalApplicationStatus().toString()
					+ ", appTrackingUrl=" + report.getTrackingUrl()
					+ ", appUser=" + report.getUser());

			YarnApplicationState state = report.getYarnApplicationState();
			FinalApplicationStatus dsStatus = report
					.getFinalApplicationStatus();
			if (YarnApplicationState.FINISHED == state) {
				if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
					LOG.info("Application has completed successfully. Breaking monitoring loop");
					forceKillApplication(appId);
					return;
				} else {
					LOG.info("Application did finished unsuccessfully."
							+ " YarnState=" + state.toString()
							+ ", DSFinalStatus=" + dsStatus.toString()
							+ ". Breaking monitoring loop");
					return;
				}
			} else if (YarnApplicationState.KILLED == state
					|| YarnApplicationState.FAILED == state) {
				LOG.info("Application did not finish." + " YarnState="
						+ state.toString() + ", DSFinalStatus="
						+ dsStatus.toString() + ". Breaking monitoring loop");
				return;
			}

			if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
				LOG.info("Reached client specified timeout for application. Killing application");
				forceKillApplication(appId);
				return;
			}
		}

	}

	private void forceKillApplication(ApplicationId appId)
			throws YarnException, IOException {
		yarnClient.killApplication(appId);
	}

	public static void main(String[] args) throws ParseException,
			YarnException, IOException {
		Client client = new Client();
		client.init(args);
		client.run();
	}

}

2. build the AM (application master) for setup the container task in different manager node.

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.hadoop.yarn.client.api.NMClient;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;


public class AM {
	private static final Log LOG = LogFactory.getLog(AM.class);
	private Configuration conf;

	private AMRMClient amRMClient;
	private NMClient nmClient;

	private ApplicationAttemptId appAttemptID;

	private AtomicInteger numCompletedContainers = new AtomicInteger();
	private AtomicInteger numAllocatedContainers = new AtomicInteger();
	private AtomicInteger numFailedContainers = new AtomicInteger();
	private AtomicInteger numRequestedContainers = new AtomicInteger();

	private int numTotalContainers = 4;
	private int containerMemory = 100;
	private int requestPriority = 0;

	private volatile boolean done;

	private List<Thread> launchThreads = new ArrayList<Thread>();
	private volatile static List nodeList = new ArrayList();
	private String jarPathParam;

	/**
	 * @param args
	 * @throws IOException
	 * @throws YarnException
	 * @throws InterruptedException
	 */
	public static void main(String[] args) throws YarnException, IOException,
			InterruptedException {
		AM appMaster = new AM();
		LOG.info("Initializing ApplicationMaster");
		appMaster.init(args);
		appMaster.run();

	}

	public void init(String[] args) {
		jarPathParam=args[0];
		nodeList.add("host1");
		nodeList.add("host2");
		nodeList.add("host3");
		nodeList.add("host4");

		conf = new YarnConfiguration();
		Map<String, String> envs = System.getenv();
		ContainerId containerId = ConverterUtils.toContainerId(envs
				.get(Environment.CONTAINER_ID.name()));
		appAttemptID = containerId.getApplicationAttemptId();

		LOG.info("Application master for app" + ", appId="
				+ appAttemptID.getApplicationId().getId()
				+ ", clustertimestamp="
				+ appAttemptID.getApplicationId().getClusterTimestamp()
				+ ", attemptId=" + appAttemptID.getAttemptId());
	}

	public void run() throws YarnException, IOException, InterruptedException {
		LOG.info("--------Starting ApplicationMaster---------");
		amRMClient = AMRMClient.createAMRMClient();
		amRMClient.init(conf);
		amRMClient.start();

		nmClient = NMClient.createNMClient();
		nmClient.init(conf);
		nmClient.start();

		RegisterApplicationMasterResponse response1 = amRMClient
				.registerApplicationMaster("host1", 0, "");
		int maxMem = response1.getMaximumResourceCapability().getMemory();
		LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

		for (int i = 0; i < numTotalContainers; ++i) {
			ContainerRequest containerAsk = setupContainerAskForRM();
			amRMClient.addContainerRequest(containerAsk);
		}

		//allocated the application
		int allocatedContainers = 0;
		while (allocatedContainers < numTotalContainers) {
			AllocateResponse response = amRMClient.allocate(0);
			for (Container container : response.getAllocatedContainers()) {
				LOG.info("Container host name:==="
						+ container.getNodeId().getHost());
				++allocatedContainers;
				ContainerLaunchContext ctx = Records
						.newRecord(ContainerLaunchContext.class);

				Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
				FileSystem fs = FileSystem.get(conf);
				Path dst = new Path(jarPathParam);
				FileStatus jarStatus = fs.getFileStatus(dst);
				LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
				amJarRsrc.setType(LocalResourceType.FILE);
				amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
				amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
				amJarRsrc.setTimestamp(jarStatus.getModificationTime());
				amJarRsrc.setSize(jarStatus.getLen());
				localResources.put("AppMaster.jar", amJarRsrc);
				ctx.setLocalResources(localResources);
				
				StringBuilder classPathEnv = new StringBuilder(
						Environment.CLASSPATH.$()).append(File.pathSeparatorChar)
						.append("./*");
				for (String c : conf.getStrings(
						YarnConfiguration.YARN_APPLICATION_CLASSPATH,
						YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
					classPathEnv.append(File.pathSeparatorChar);
					classPathEnv.append(c.trim());
				}
				if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
					classPathEnv.append(':');
					classPathEnv.append(System.getProperty("java.class.path"));
				}
				LOG.info("Set the environment for the application master");
				Map<String, String> env = new HashMap<String, String>();
				env.put("CLASSPATH", classPathEnv.toString());
				ctx.setEnvironment(env);
				
				
				Vector<CharSequence> vargs = new Vector<CharSequence>(5);
				vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
				vargs.add("org.yarn.myapp2.NAnalysis");
				vargs.add(container.getNodeId().getHost());
				vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
						+ "/stdout");
				vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR
						+ "/stderr");
				StringBuilder command = new StringBuilder();
				for (CharSequence str : vargs) {
					command.append(str).append(" ");
				}

				List<String> commands = new ArrayList<String>();
				commands.add(command.toString());
				ctx.setCommands(commands);
				System.out
						.println("Launching container " + allocatedContainers);
				nmClient.startContainer(container, ctx);
			}
			Thread.sleep(100);
		}

		// Now wait for containers to complete
		int completedContainers = 0;
		while (completedContainers < numTotalContainers) {
			AllocateResponse response = amRMClient.allocate(completedContainers
					/ numTotalContainers);
			for (ContainerStatus status : response
					.getCompletedContainersStatuses()) {
				++completedContainers;
				System.out
						.println("Completed container " + completedContainers);
			}
			Thread.sleep(100);
		}
		System.out.println("application to be Finish.............");
		finish();
	}

	private void finish() throws YarnException, IOException {
		LOG.info("Application completed. Stopping running containers");
		nmClient.stop();
		LOG.info("Application completed. finish to RM");
		FinalApplicationStatus appStatus;
		String appMessage = "finished.....................";
		appStatus = FinalApplicationStatus.SUCCEEDED;
		amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
		amRMClient.stop();
	}

	private ContainerRequest setupContainerAskForRM() {

		Priority pri = Records.newRecord(Priority.class);
		pri.setPriority(requestPriority);
		Resource capability = Records.newRecord(Resource.class);
		capability.setMemory(containerMemory);

		String[] nodes = null;
		if (!nodeList.isEmpty()) {
			nodes = new String[1];
			nodes[0] = (String) nodeList.get(0);
			nodeList.remove(0);
		}

		ContainerRequest request = new ContainerRequest(capability, nodes,
				null, pri, false);

		LOG.info("Requested container ask: " + request.toString());
		return request;
	}

}

3. a test main class is running in different container in manager node,

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.conf.YarnConfiguration;

public class NAnalysis {
	public static void main(String[] args) throws IOException {
		String containerId=args[0];
		Configuration conf = new YarnConfiguration();
		FileSystem fs = FileSystem.get(conf);
		Path createf = new Path("hdfs://ip:9000/yarntest/"+containerId);
		FSDataOutputStream os = fs.create(createf);
		Writer hdfsOut = new OutputStreamWriter(os, "utf-8");
		hdfsOut.write(containerId+":===hello word parallel test .............");
		hdfsOut.close();
		os.close();
	}
}

4. you can find he yarn application running log in: http://managernode:8088/

qa: to specify a node to run the container as the follows api:

ContainerRequest request = new ContainerRequest(capability, nodes,
				null, pri, false);





  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值