当NM的服务初始化完成后,就会进入启动阶段
@Override
protected void serviceStart() throws Exception {
try {
doSecureLogin();
} catch (IOException e) {
throw new YarnRuntimeException("Failed NodeManager login", e);
}
super.serviceStart();
}
protected void serviceStart() throws Exception {
//获取到init时候添加的所以service列表
List<Service> services = getServices();
if (LOG.isDebugEnabled()) {
LOG.debug(getName() + ": starting services, size=" + services.size());
}
//逐个启动service
for (Service service : services) {
// start the service. If this fails that service
// will be stopped and an exception raised
service.start();
}
super.serviceStart();
}
相关service列表有:
DeletionService 文件清理服务
NodeHealthCheckerService 节点健康检查服务
NodeResourceMonitor 节点资源监控服务
ContainerManager 容器管理服务
WebServer web服务
AsyncDispatcher 事件分发器
NodeStatusUpdater 节点状态更新服务
DeletionService
没有覆盖父类AbstractService.java中的serviceStart() 方法
NodeHealthCheckerService
没有覆盖父类CompositeServer中的serviceStart()方法
子服务NodeHealthScriptRunner
/**
* Method used to start the Node health monitoring.
*
*/
@Override
protected void serviceStart() throws Exception {
// if health script path is not configured don't start the thread.
if (!shouldRun(conf)) {
LOG.info("Not starting node health monitor");
return;
}
//创建一个Timer
nodeHealthScriptScheduler = new Timer("NodeHealthMonitor-Timer", true);
// Start the timer task immediately and
// then periodically at interval time.
//开启定时调度TimerTask,TimerTask就是去执行shell脚本
nodeHealthScriptScheduler.scheduleAtFixedRate(timer, 0, intervalTime);
super.serviceStart();
}
子服务LocalDirsHandlerService
/**
* Method used to start the disk health monitoring, if enabled.
*/
@Override
protected void serviceStart() throws Exception {
if (isDiskHealthCheckerEnabled) {
//创建Timer
dirsHandlerScheduler = new Timer("DiskHealthMonitor-Timer", true);
//定期调度MonitorTimerTask
dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask,
diskHealthCheckInterval, diskHealthCheckInterval);
}
super.serviceStart();
}
检测函数如下,因为是对文件系统的检测,无非是读写执行这些权限的检查
@Override
public void run() {
checkDirs();
}
private void checkDirs() {
boolean disksStatusChange = false;
Set<String> failedLocalDirsPreCheck =
new HashSet<String>(localDirs.getFailedDirs());
Set<String> failedLogDirsPreCheck =
new HashSet<String>(logDirs.getFailedDirs());
if (localDirs.checkDirs()) {
disksStatusChange = true;
}
if (logDirs.checkDirs()) {
disksStatusChange = true;
}
Set<String> failedLocalDirsPostCheck =
new HashSet<String>(localDirs.getFailedDirs());
Set<String> failedLogDirsPostCheck =
new HashSet<String>(logDirs.getFailedDirs());
boolean disksFailed = false;
boolean disksTurnedGood = false;
disksFailed =
disksTurnedBad(failedLocalDirsPreCheck, failedLocalDirsPostCheck);
disksTurnedGood =
disksTurnedGood(failedLocalDirsPreCheck, failedLocalDirsPostCheck);
// skip check if we have new failed or good local dirs since we're going to
// log anyway
if (!disksFailed) {
disksFailed =
disksTurnedBad(failedLogDirsPreCheck, failedLogDirsPostCheck);
}
if (!disksTurnedGood) {
disksTurnedGood =
disksTurnedGood(failedLogDirsPreCheck, failedLogDirsPostCheck);
}
logDiskStatus(disksFailed, disksTurnedGood);
if (disksStatusChange) {
updateDirsAfterTest();
}
lastDisksCheckTime = System.currentTimeMillis();
}
NodeResourceMonitor
NodeResourceMonitorImpl没有覆盖父类CompositeService的serviceStart方法
ContainerManager
@Override
protected void serviceStart() throws Exception {
// Enqueue user dirs in deletion context
Configuration conf = getConfig();
//
final InetSocketAddress initialAddress = conf.getSocketAddr(
YarnConfiguration.NM_BIND_HOST,
YarnConfiguration.NM_ADDRESS,
YarnConfiguration.DEFAULT_NM_ADDRESS,
YarnConfiguration.DEFAULT_NM_PORT);
boolean usingEphemeralPort = (initialAddress.getPort() == 0);
if (context.getNMStateStore().canRecover() && usingEphemeralPort) {
throw new IllegalArgumentException("Cannot support recovery with an "
+ "ephemeral server port. Check the setting of "
+ YarnConfiguration.NM_ADDRESS);
}
// If recovering then delay opening the RPC service until the recovery
// of resources and containers have completed, otherwise requests from
// clients during recovery can interfere with the recovery process.
final boolean delayedRpcServerStart =
context.getNMStateStore().canRecover();
Configuration serverConf = new Configuration(conf);
// always enforce it to be token-based.
serverConf.set(
CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION,
SaslRpcServer.AuthMethod.TOKEN.toString());
//实例化YarnRPC类,类配置参数:yarn.ipc.rpc.class,默认是org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC
YarnRPC rpc = YarnRPC.create(conf);
//创建RPC服务器,用于接收AM的控制,具体协议可参考ContainerManagementProtocol
server =
rpc.getServer(ContainerManagementProtocol.class, this, initialAddress,
serverConf, this.context.getNMTokenSecretManager(),
conf.getInt(YarnConfiguration.NM_CONTAINER_MGR_THREAD_COUNT,
YarnConfiguration.DEFAULT_NM_CONTAINER_MGR_THREAD_COUNT));
// Enable service authorization?
if (conf.getBoolean(
CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
false)) {
refreshServiceAcls(conf, new NMPolicyProvider());
}
//containerManager rpc server启动阶段不接受客户端的控制请求
LOG.info("Blocking new container-requests as container manager rpc" +
" server is still starting.");
this.setBlockNewContainerRequests(true);
String bindHost = conf.get(YarnConfiguration.NM_BIND_HOST);
String nmAddress = conf.getTrimmed(YarnConfiguration.NM_ADDRESS);
String hostOverride = null;
if (bindHost != null && !bindHost.isEmpty()
&& nmAddress != null && !nmAddress.isEmpty()) {
//a bind-host case with an address, to support overriding the first
//hostname found when querying for our hostname with the specified
//address, combine the specified address with the actual port listened
//on by the server
hostOverride = nmAddress.split(":")[0];
}
// setup node ID
InetSocketAddress connectAddress;
if (delayedRpcServerStart) {
connectAddress = NetUtils.getConnectAddress(initialAddress);
} else {
server.start();
connectAddress = NetUtils.getConnectAddress(server);
}
NodeId nodeId = buildNodeId(connectAddress, hostOverride);
((NodeManager.NMContext)context).setNodeId(nodeId);
this.context.getNMTokenSecretManager().setNodeId(nodeId);
this.context.getContainerTokenSecretManager().setNodeId(nodeId);
// start remaining services
super.serviceStart();
if (delayedRpcServerStart) {
waitForRecoveredContainers();
server.start();
// check that the node ID is as previously advertised
connectAddress = NetUtils.getConnectAddress(server);
NodeId serverNode = buildNodeId(connectAddress, hostOverride);
if (!serverNode.equals(nodeId)) {
throw new IOException("Node mismatch after server started, expected '"
+ nodeId + "' but found '" + serverNode + "'");
}
}
LOG.info("ContainerManager started at " + connectAddress);
LOG.info("ContainerManager bound to " + initialAddress);
}
相关参数:
yarn.nodemanager.bind-host NM进程绑定IP,默认0.0.0.0
yarn.nodemanager.address NM进程绑定地址,一般配置为0.0.0.0:45454
yarn.ipc.rpc.class rpc类,默认是org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC
yarn.nodemanager.container-manager.thread-count rpc处理handler的线程数,默认20
ContainerManager子服务之ResourceLocalizationService
@Override
public void serviceStart() throws Exception {
cacheCleanup.scheduleWithFixedDelay(new CacheCleanup(dispatcher),
cacheCleanupPeriod, cacheCleanupPeriod, TimeUnit.MILLISECONDS);
//创建资源本地服务
server = createServer();
//启动服务
server.start();
localizationServerAddress =
getConfig().updateConnectAddr(YarnConfiguration.NM_BIND_HOST,
YarnConfiguration.NM_LOCALIZER_ADDRESS,
YarnConfiguration.DEFAULT_NM_LOCALIZER_ADDRESS,
server.getListenerAddress());
LOG.info("Localizer started on port " + server.getPort());
super.serviceStart();
}
Server createServer() {
Configuration conf = getConfig();
YarnRPC rpc = YarnRPC.create(conf);
if (UserGroupInformation.isSecurityEnabled()) {
secretManager = new LocalizerTokenSecretManager();
}
//参见LocalizationProtocol协议
Server server = rpc.getServer(LocalizationProtocol.class, this,
localizationServerAddress, conf, secretManager,
conf.getInt(YarnConfiguration.NM_LOCALIZER_CLIENT_THREAD_COUNT,
YarnConfiguration.DEFAULT_NM_LOCALIZER_CLIENT_THREAD_COUNT));
// Enable service authorization?
if (conf.getBoolean(
CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
false)) {
server.refreshServiceAcl(conf, new NMPolicyProvider());
}
return server;
}
相关参数:
localizer.client.thread-count 处理本地请求的线程数,默认5
ContainerManager子服务之ContainersLauncher
无serviceStart()
子服务之AuxServices
@Override
public void serviceStart() throws Exception {
// TODO fork(?) services running as configured user
// monitor for health, shutdown/restart(?) if any should die
for (Map.Entry<String, AuxiliaryService> entry : serviceMap.entrySet()) {
AuxiliaryService service = entry.getValue();
String name = entry.getKey();
service.start();
service.registerServiceListener(this);
ByteBuffer meta = service.getMetaData();
if(meta != null) {
serviceMetaData.put(name, meta);
}
}
super.serviceStart();
}
子服务之ContainerMonitor
@Override
protected void serviceStart() throws Exception {
//启动monitor线程,执行run()
if (this.isEnabled()) {
this.monitoringThread.start();
}
super.serviceStart();
}
@Override
public void run() {
while (true) {
// Print the processTrees for debugging.
if (LOG.isDebugEnabled()) {
StringBuilder tmp = new StringBuilder("[ ");
for (ProcessTreeInfo p : trackingContainers.values()) {
tmp.append(p.getPID());
tmp.append(" ");
}
LOG.debug("Current ProcessTree list : "
+ tmp.substring(0, tmp.length()) + "]");
}
// Add new containers
//添加新的container
synchronized (containersToBeAdded) {
for (Entry<ContainerId, ProcessTreeInfo> entry : containersToBeAdded
.entrySet()) {
ContainerId containerId = entry.getKey();
ProcessTreeInfo processTreeInfo = entry.getValue();
LOG.info("Starting resource-monitoring for " + containerId);
trackingContainers.put(containerId, processTreeInfo);
}
containersToBeAdded.clear();
}
// Remove finished containers
// 删除完成的container
synchronized (containersToBeRemoved) {
for (ContainerId containerId : containersToBeRemoved) {
if (containerMetricsEnabled) {
ContainerMetrics.forContainer(
containerId, containerMetricsPeriodMs).finished();
}
trackingContainers.remove(containerId);
LOG.info("Stopping resource-monitoring for " + containerId);
}
containersToBeRemoved.clear();
}
// Now do the monitoring for the trackingContainers
// Check memory usage and kill any overflowing containers
// 检测所有已跟踪容器,如果资源使用超出限制则kill
long vmemStillInUsage = 0;
long pmemStillInUsage = 0;
for (Iterator<Map.Entry<ContainerId, ProcessTreeInfo>> it =
trackingContainers.entrySet().iterator(); it.hasNext();) {
Map.Entry<ContainerId, ProcessTreeInfo> entry = it.next();
ContainerId containerId = entry.getKey();
ProcessTreeInfo ptInfo = entry.getValue();
try {
String pId = ptInfo.getPID();
// Initialize any uninitialized processTrees
//如果PID为空则需要重新初始化
if (pId == null) {
// get pid from ContainerId
pId = containerExecutor.getProcessId(ptInfo.getContainerId());
if (pId != null) {
// pId will be null, either if the container is not spawned yet
// or if the container's pid is removed from ContainerExecutor
LOG.debug("Tracking ProcessTree " + pId
+ " for the first time");
ResourceCalculatorProcessTree pt =
ResourceCalculatorProcessTree.getResourceCalculatorProcessTree(pId, processTreeClass, conf);
ptInfo.setPid(pId);
ptInfo.setProcessTree(pt);
if (containerMetricsEnabled) {
ContainerMetrics usageMetrics = ContainerMetrics
.forContainer(containerId, containerMetricsPeriodMs);
int cpuVcores = ptInfo.getCpuVcores();
final int vmemLimit = (int) (ptInfo.getVmemLimit() >> 20);
final int pmemLimit = (int) (ptInfo.getPmemLimit() >> 20);
usageMetrics.recordResourceLimit(
vmemLimit, pmemLimit, cpuVcores);
usageMetrics.recordProcessId(pId);
}
}
}
// End of initializing any uninitialized processTrees
//初始化后仍然为空的则放弃跟踪
if (pId == null) {
continue; // processTree cannot be tracked
}
LOG.debug("Constructing ProcessTree for : PID = " + pId
+ " ContainerId = " + containerId);
ResourceCalculatorProcessTree pTree = ptInfo.getProcessTree();
pTree.updateProcessTree(); // update process-tree
long currentVmemUsage = pTree.getVirtualMemorySize();
long currentPmemUsage = pTree.getRssMemorySize();
// if machine has 6 cores and 3 are used,
// cpuUsagePercentPerCore should be 300% and
// cpuUsageTotalCoresPercentage should be 50%
float cpuUsagePercentPerCore = pTree.getCpuUsagePercent();
float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore /
resourceCalculatorPlugin.getNumProcessors();
// Multiply by 1000 to avoid losing data when converting to int
int milliVcoresUsed = (int) (cpuUsageTotalCoresPercentage * 1000
* maxVCoresAllottedForContainers /nodeCpuPercentageForYARN);
// as processes begin with an age 1, we want to see if there
// are processes more than 1 iteration old.
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
long vmemLimit = ptInfo.getVmemLimit();
long pmemLimit = ptInfo.getPmemLimit();
LOG.info(String.format(
"Memory usage of ProcessTree %s for container-id %s: ",
pId, containerId.toString()) +
formatUsageString(
currentVmemUsage, vmemLimit, currentPmemUsage, pmemLimit));
// Add usage to container metrics
if (containerMetricsEnabled) {
ContainerMetrics.forContainer(
containerId, containerMetricsPeriodMs).recordMemoryUsage(
(int) (currentPmemUsage >> 20));
ContainerMetrics.forContainer(
containerId, containerMetricsPeriodMs).recordCpuUsage
((int)cpuUsagePercentPerCore, milliVcoresUsed);
}
boolean isMemoryOverLimit = false;
String msg = "";
int containerExitStatus = ContainerExitStatus.INVALID;
if (isVmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("virtual",
currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit,
pId, containerId, pTree);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
} else if (isPmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentPmemUsage, curRssMemUsageOfAgedProcesses,
pmemLimit)) {
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("physical",
currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit,
pId, containerId, pTree);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
}
if (isMemoryOverLimit) {
// Virtual or physical memory over limit. Fail the container and
// remove
// the corresponding process tree
LOG.warn(msg);
// warn if not a leader
if (!pTree.checkPidPgrpidForMatch()) {
LOG.error("Killed container process with PID " + pId
+ " but it is not a process group leader.");
}
// kill the container
eventDispatcher.getEventHandler().handle(
new ContainerKillEvent(containerId,
containerExitStatus, msg));
it.remove();
LOG.info("Removed ProcessTree with root " + pId);
} else {
// Accounting the total memory in usage for all containers that
// are still
// alive and within limits.
vmemStillInUsage += currentVmemUsage;
pmemStillInUsage += currentPmemUsage;
}
} catch (Exception e) {
// Log the exception and proceed to the next container.
LOG.warn("Uncaught exception in ContainerMemoryManager "
+ "while managing memory of " + containerId, e);
}
}
try {
Thread.sleep(monitoringInterval);
} catch (InterruptedException e) {
LOG.warn(ContainersMonitorImpl.class.getName()
+ " is interrupted. Exiting.");
break;
}
}
}
private String formatErrorMessage(String memTypeExceeded,
long currentVmemUsage, long vmemLimit,
long currentPmemUsage, long pmemLimit,
String pId, ContainerId containerId, ResourceCalculatorProcessTree pTree) {
return
String.format("Container [pid=%s,containerID=%s] is running beyond %s memory limits. ",
pId, containerId, memTypeExceeded) +
"Current usage: " +
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit) +
". Killing container.\n" +
"Dump of the process-tree for " + containerId + " :\n" +
pTree.getProcessTreeDump();
}
private String formatUsageString(long currentVmemUsage, long vmemLimit,
long currentPmemUsage, long pmemLimit) {
return String.format("%sB of %sB physical memory used; " +
"%sB of %sB virtual memory used",
TraditionalBinaryPrefix.long2String(currentPmemUsage, "", 1),
TraditionalBinaryPrefix.long2String(pmemLimit, "", 1),
TraditionalBinaryPrefix.long2String(currentVmemUsage, "", 1),
TraditionalBinaryPrefix.long2String(vmemLimit, "", 1));
}
}