思考
我们知道在大数据领域,yarn作为资源调度框架,可以运行MR,SPARK,Flink,那它是不是 也可以运行其它任务,YARN作为资源调度框架和任务的具体的类型没有关系,只管任务的执行,那只要用API,是不是也可以自己尝试往Yarn上提交任务
流程图
- client向resourcemanager,申请一个应用
- client得到一个container,启动appmaster
- appmaster向resourcemanager报告状态,比如register
- appmaster请求container资源
- appmaser得到container,通知nodemanager启动container
- container报告执行状态到appmaster
- 报告状态给客户端
- appmaster向resourcemanager报告状态,unregister应用
需求
执行一个shell文件
#!/bin/bash
echo "hello world" > /opt/soft/new.log
实际代码
- 客户端代码
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.util.Apps;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
public class Client {
Configuration conf = new YarnConfiguration();
public void run(String[] args) throws Exception {
final String command = args[0];
final int n = Integer.valueOf(args[1]);
final Path jarPath = new Path(args[2]);
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
// Create yarnClient
YarnConfiguration conf = new YarnConfiguration();
YarnClient yarnClient = YarnClient.createYarnClient();
yarnClient.init(conf);
yarnClient.start();
// Create application via yarnClient
YarnClientApplication app = yarnClient.createApplication();
// Set up the container launch context for the application master
ContainerLaunchContext amContainer =
Records.newRecord(ContainerLaunchContext.class);
//设置appmaster的启动命令
amContainer.setCommands(
Collections.singletonList(
"$JAVA_HOME/bin/java" +
" -Xmx256M" +
" com.xiaomi.simple.ApplicationMaster" +
" " + command +
" " + String.valueOf(n) +
" 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" +
" 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"
)
);
// Setup jar for ApplicationMaster
LocalResource appMasterJar = Records.newRecord(LocalResource.class);
setupAppMasterJar(jarPath, appMasterJar);
amContainer.setLocalResources(
Collections.singletonMap("simpleapp.jar", appMasterJar));
// Setup CLASSPATH for ApplicationMaster
Map<String, String> appMasterEnv = new HashMap<String, String>();
setupAppMasterEnv(appMasterEnv);
amContainer.setEnvironment(appMasterEnv);
// Set up resource type requirements for ApplicationMaster
Resource capability = Records.newRecord(Resource.class);
capability.setMemory(256);
capability.setVirtualCores(1);
// Finally, set-up ApplicationSubmissionContext for the application
ApplicationSubmissionContext appContext =
app.getApplicationSubmissionContext();
appContext.setApplicationName("simple-yarn-app"); // application name
appContext.setAMContainerSpec(amContainer);
appContext.setResource(capability);
appContext.setQueue("default"); // queue
// Submit application
ApplicationId appId = appContext.getApplicationId();
System.out.println("Submitting application " + appId);
yarnClient.submitApplication(appContext);
ApplicationReport appReport = yarnClient.getApplicationReport(appId);
YarnApplicationState appState = appReport.getYarnApplicationState();
while (appState != YarnApplicationState.FINISHED &&
appState != YarnApplicationState.KILLED &&
appState != YarnApplicationState.FAILED) {
Thread.sleep(100);
appReport = yarnClient.getApplicationReport(appId);
appState = appReport.getYarnApplicationState();
}
System.out.println(
"Application " + appId + " finished with" +
" state " + appState +
" at " + appReport.getFinishTime());
}
private void setupAppMasterJar(Path jarPath, LocalResource appMasterJar) throws IOException {
FileStatus jarStat = FileSystem.get(conf).getFileStatus(jarPath);
appMasterJar.setResource(ConverterUtils.getYarnUrlFromPath(jarPath));
appMasterJar.setSize(jarStat.getLen());
appMasterJar.setTimestamp(jarStat.getModificationTime());
appMasterJar.setType(LocalResourceType.FILE);
appMasterJar.setVisibility(LocalResourceVisibility.PUBLIC);
}
private void setupAppMasterEnv(Map<String, String> appMasterEnv) {
for (String c : conf.getStrings(
YarnConfiguration.YARN_APPLICATION_CLASSPATH,
YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
Apps.addToEnvironment(appMasterEnv, Environment.CLASSPATH.name(),
c.trim());
}
Apps.addToEnvironment(appMasterEnv,
Environment.CLASSPATH.name(),
Environment.PWD.$() + File.separator + "*");
}
public static void main(String[] args) throws Exception {
Client c = new Client();
c.run(args);
}
}
- appmaster代码,打印出启动appmaster的脚本内容,方便理解
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.client.api.NMClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.util.Records;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Collections;
/**
*/
public class ApplicationMaster {
public static void main(String[] args) throws Exception {
File file = new File(System.getProperty("user.dir"));
if (file.isDirectory()) {
File[] files = file.listFiles();
readFile(files);
}
final String command = args[0];
final int n = Integer.valueOf(args[1]);
// Initialize clients to ResourceManager and NodeManagers
Configuration conf = new YarnConfiguration();
AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
rmClient.init(conf);
rmClient.start();
NMClient nmClient = NMClient.createNMClient();
nmClient.init(conf);
nmClient.start();
// Register with ResourceManager
System.out.println("registerApplicationMaster 0");
rmClient.registerApplicationMaster("", 0, "");
System.out.println("registerApplicationMaster 1");
// Priority for worker containers - priorities are intra-application
Priority priority = Records.newRecord(Priority.class);
priority.setPriority(0);
// Resource requirements for worker containers
Resource capability = Records.newRecord(Resource.class);
capability.setMemory(128);
capability.setVirtualCores(1);
// Make container requests to ResourceManager
for (int i = 0; i < n; ++i) {
ContainerRequest containerAsk = new ContainerRequest(capability, null, null, priority);
System.out.println("Making res-req " + i);
rmClient.addContainerRequest(containerAsk);
}
// Obtain allocated containers, launch and check for responses
int responseId = 0;
int completedContainers = 0;
while (completedContainers < n) {
AllocateResponse response = rmClient.allocate(responseId++);
for (Container container : response.getAllocatedContainers()) {
// Launch container by create ContainerLaunchContext
ContainerLaunchContext ctx =
Records.newRecord(ContainerLaunchContext.class);
ctx.setCommands(
Collections.singletonList(
command +
" 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" +
" 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"
));
System.out.println("Launching container " + container.getId());
nmClient.startContainer(container, ctx);
}
for (ContainerStatus status : response.getCompletedContainersStatuses()) {
++completedContainers;
System.out.println("Completed container " + status.getContainerId());
}
Thread.sleep(100);
}
// Un-register with ResourceManager
rmClient.unregisterApplicationMaster(
FinalApplicationStatus.SUCCEEDED, "", "");
}
public static void readFile(File[] files) {
try {
// 循环读所有文件
for (int i = 0; i < files.length; i++) {
System.out.println("文件名 " + files[i].getName());
if (files[i].getName().endsWith(".sh")) {
File file1 = new File("./" + files[i].getName());
FileReader reader = new FileReader(file1);
BufferedReader reader1 = new BufferedReader(reader);
String line;
while ((line = reader1.readLine()) != null) {
System.out.println("内容" + line);
}
reader.close();
reader1.close();
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
启动
命令3个参数,第一个执行的文件位置,第二个测试container个数,第三个这个程序的jar包在,hdfs上的位置,在程序省略了把本地jar包上传到hdfs上了,自己提前上传
java com.xiaomi.simple.Client
/opt/soft/test.sh 2 hdfs://centos-6:8020/lei/count/yarn-client-1.0-SNAPSHOT.jar
问题
1.可能是遇到权限问题,自己解决下就行,如果有报错,查看日志即可