本地连接hadoop yarn环境进行本地开发非常方便, 免除了开发一段代码, 打包部署到Linux开发环境去调试这一段的麻烦. 本文将作者实验通过的几种方式做一下记录.
方法一:
整体目录结构:
A). pom.xml 清单
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.david</groupId>
<artifactId>yarnstatusgetter</artifactId>
<version>1.0-SNAPSHOT</version>
<name>yarnstatusgetter</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<version>2.6.0</version>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.13</version>
<configuration>
<useFile>false</useFile>
<disableXmlReport>true</disableXmlReport>
<!-- If you have classpath issue like NoDefClassError,... -->
<!-- useManifestOnlyJar>false</useManifestOnlyJar -->
<includes>
<include>**/*Test.*</include>
<include>**/*Suite.*</include>
</includes>
</configuration>
</plugin>
<!-- 打包依赖包到jar中 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4.1</version>
<configuration>
<!-- get all project dependencies -->
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<!-- MainClass in mainfest make a executable jar -->
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<!-- bind to the packaging phase -->
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
B) 加载 YARN / HDFS配置文件
拷贝YARN / HDFS相关配置文件到maven项目的resources目录下:
- core-site.xml
- hdfs-site.xml
- mapred-site.xml
- yarn-site.xml
CDH集群环境下,可以登录到ClouderaManager中, 分别进入HDFS/YARN配置页面, 依次点击"设置","下载客户端配置"后获取.
C) Yarn测试代码
测试案例功能说明:
检查当前Yarn下正在运行的app列表:
package com.david;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationReportPBImpl;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.cli.ApplicationCLI;
import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.ConverterUtils;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.text.DecimalFormat;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class YarnApplicationManager {
private YarnClient client;
protected PrintStream sout = System.out;
private static final String PRINT_FORMAT =
new StringBuilder(">>> ")
.append("%15s").append("\t")
.append("%15s").append("\t")
.append("%15s").append("\t")
.append("%15s").append("\t")
.append("%15s").append("\t")
.append("%15s").append("\t")
.append("%15s").append("\t")
.append("%15s").append("\t")
.append("%15s").append("\t")
.append(System.getProperty("line.separator")) //windows:/r/n; linux+mac:/r
.toString();
public static void main(String[] args) {
try {
YarnApplicationManager app = new YarnApplicationManager();
app.initYarnClient();
// ①. 获取Yarn应用列表及状态
app.getAppsState();
// ②. 根据appId 杀掉Yarn 任务
// String appId = "application_1584696615034_0134";
// app.killYARNAppByID(appId);
} catch (Exception ex) {
ex.printStackTrace();
}
}
private void initYarnClient() {
Configuration conf = new Configuration();
client = YarnClient.createYarnClient();
client.init(conf);
client.start();
}
private void getAppsState() {
EnumSet<YarnApplicationState> appStates = EnumSet.noneOf(YarnApplicationState.class);
if (appStates.isEmpty()) {
appStates.add(YarnApplicationState.RUNNING);
appStates.add(YarnApplicationState.ACCEPTED);
appStates.add(YarnApplicationState.SUBMITTED);
}
List<ApplicationReport> appsReport = null;
try {
appsReport = client.getApplications(appStates);
} catch (YarnException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
PrintWriter writer = new PrintWriter(new OutputStreamWriter(sout, Charset.forName("UTF-8")));
Set<String> appNameSet = new HashSet<>();
for (ApplicationReport appReport : appsReport) {
ApplicationReportPBImpl app = (ApplicationReportPBImpl) appReport;
//Format number of app's execute progress bar.
DecimalFormat decimalFormat = new DecimalFormat("###.##%");
String progress = decimalFormat.format(appReport.getProgress());
writer.printf(PRINT_FORMAT, appReport.getApplicationId(), appReport.getName(),
appReport.getApplicationType(), appReport.getUser(), appReport.getQueue(),
appReport.getYarnApplicationState(), appReport.getFinalApplicationStatus(), progress,
appReport.getOriginalTrackingUrl());
appNameSet.add(app.getName());
}
writer.flush();
for (ApplicationReport appReport : appsReport) {
String type = appReport.getApplicationType();
if (type.equalsIgnoreCase("mapreduce")) {
continue;
}
}
boolean isSparkStreamingProcessLives = judgeSparkStreamingStatus(appNameSet);
System.out.println("isSparkStreamingProcessLives = " + isSparkStreamingProcessLives);
}
private boolean judgeSparkStreamingStatus(Set<String> appNameSet) {
return appNameSet.contains("Spark shell");
}
private void killYARNAppByID(String applicationId) throws YarnException, IOException {
ApplicationId appId = ConverterUtils.toApplicationId(applicationId);
ApplicationReport appReport = null;
try {
appReport = client.getApplicationReport(appId);
} catch (ApplicationNotFoundException e) {
sout.println("Application with id '" + applicationId +
"' doesn't exist in RM.");
throw e;
}
if (appReport.getYarnApplicationState() == YarnApplicationState.FINISHED
|| appReport.getYarnApplicationState() == YarnApplicationState.KILLED
|| appReport.getYarnApplicationState() == YarnApplicationState.FAILED) {
sout.println("Application " + applicationId + " has already finished ");
} else {
sout.println("Killing application " + applicationId);
client.killApplication(appId);
}
}
private void getAppState() throws Exception {
String[] args = {"-list"};
ApplicationCLI.main(args);
}
private void releaseYarnClient() {
if (null != client) {
try {
client.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
D). 执行maven打包
打包命令:
mvn assembly:assembly
E). 上传到服务器运行
按以下语法执行:
java -cp .:/PATH1/*.jar:/PATH2/*.jar PackageName.MainClassName
如本例中:
java -cp yarnstatusgetter-1.0-SNAPSHOT-jar-with-dependencies.jar com.david.YarnApplicationManager
方法二
如果方法一不成功, 尝试使用方法二解决.
A). Hadoop官网下载与所在集群版本适配的安装包
如:
B). 将winutils.exe 放在下载解压的hadoop压缩包的bin目录下
如:
winutils.exe下载地址:
目前在github上维护着一个名称为winutils的项目,地址为:
https://github.com/SirMin/winutils
找到指定版本的hadoop版本的bin目录下, 下载该文件即可,如下:
C). 重启PC
D). 再次在IDEA中尝试运行连接YARN环境的代码.