AbstractYarnScheduler 扩展了AbstractService 实现了 ResourceScheduler 。
SchedulerNode是AbstractYarnScheduler的一个参数化构建类,代表一个计算结点的各种属性,如下:
availableResource:可用的资源。
usedResource:已经使用的资源;
totalResourceCapability:总共资源容量
reservedContainer:保留的资源
numContainers:正在运行的容器数。
private final Map<ContainerId, RMContainer> launchedContainers = new HashMap<ContainerId, RMContainer>();//已经启动了的容器映射。
private final RMNode rmNode;//Node Manager的相关信息
private final String nodeName;//结点名称
private volatile Set<String> labels = null; //结点标签
RMNode 是一个接口,有以下内容:
public interface RMNode {
/** negative value means no timeout */
public static final int OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT = -1;//如果此值为负,代表永不超时。
/**
* the node id of of this node.
* @return the node id of this node.
*/
public NodeId getNodeID();//结点ID
/**
* the hostname of this node
* @return hostname of this node
*/
public String getHostName();//主机名
/**
* the command port for this node
* @return command port for this node
*/
public int getCommandPort();//命令端口
/**
* the http port for this node
* @return http port for this node
*/
public int getHttpPort();//Http端口
/**
* the ContainerManager address for this node.
* @return the ContainerManager address for this node.
*/
public String getNodeAddress();//结点的ContainerManager地址
/**
* the http-Address for this node.
* @return the http-url address for this node
*/
public String getHttpAddress();//结点的http地址
/**
* the latest health report received from this node.
* @return the latest health report received from this node.
*/
public String getHealthReport();//健康状态报告
/**
* the time of the latest health report received from this node.
* @return the time of the latest health report received from this node.
*/
public long getLastHealthReportTime();//最后一次收到的健康报告时间
/**
* the node manager version of the node received as part of the
* registration with the resource manager
*/
public String getNodeManagerVersion();//node manager的版本
/**
* the total available resource.
* @return the total available resource.
*/
public Resource getTotalCapability();
/**
* The rack name for this node manager.
* @return the rack name.
*/
public String getRackName();
/**
* the {@link Node} information for this node.
* @return {@link Node} information for this node.
*/
public Node getNode();
public NodeState getState();
public List<ContainerId> getContainersToCleanUp();
public List<ApplicationId> getAppsToCleanup();
/**
* Update a {@link NodeHeartbeatResponse} with the list of containers and
* applications to clean up for this node.
* @param response the {@link NodeHeartbeatResponse} to update
*/
public void updateNodeHeartbeatResponseForCleanup(NodeHeartbeatResponse response);
public NodeHeartbeatResponse getLastNodeHeartBeatResponse();
/**
* Reset lastNodeHeartbeatResponse's ID to 0.
*/
void resetLastNodeHeartBeatResponse();
/**
* Get and clear the list of containerUpdates accumulated across NM
* heartbeats.
*
* @return containerUpdates accumulated across NM heartbeats.
*/
public List<UpdatedContainerInfo> pullContainerUpdates();
/**
* Get set of labels in this node
*
* @return labels in this node
*/
public Set<String> getNodeLabels();
}
NodeState是结点状态的枚举
public enum NodeState {
/** New node */
NEW,
/** Running node */
RUNNING,
/** Node is unhealthy */
UNHEALTHY,
/** Node is out of service */
DECOMMISSIONED,
/** Node has not sent a heartbeat for some configured time threshold*/
LOST,
/** Node has rebooted */
REBOOTED;
public boolean isUnusable() {
return (this == UNHEALTHY || this == DECOMMISSIONED || this == LOST);
}
}
NodeHeartbeatResponse
public interface NodeHeartbeatResponse {
int getResponseId();
NodeAction getNodeAction(); NodeAction是一个枚举,有三种状态 NORMAL, RESYNC, SHUTDOWN
List<ContainerId> getContainersToCleanup();
List<ContainerId> getContainersToBeRemovedFromNM();
List<ApplicationId> getApplicationsToCleanup();
void setResponseId(int responseId);
void setNodeAction(NodeAction action);
MasterKey getContainerTokenMasterKey();
void setContainerTokenMasterKey(MasterKey secretKey);
MasterKey getNMTokenMasterKey();
void setNMTokenMasterKey(MasterKey secretKey);
void addAllContainersToCleanup(List<ContainerId> containers);
// This tells NM to remove finished containers from its context. Currently, NM
// will remove finished containers from its context only after AM has actually
// received the finished containers in a previous allocate response
void addContainersToBeRemovedFromNM(List<ContainerId> containers);
void addAllApplicationsToCleanup(List<ApplicationId> applications);
long getNextHeartBeatInterval();
void setNextHeartBeatInterval(long nextHeartBeatInterval);
String getDiagnosticsMessage();
void setDiagnosticsMessage(String diagnosticsMessage);
// Credentials (i.e. hdfs tokens) needed by NodeManagers for application
// localizations and logAggreations.
Map<ApplicationId, ByteBuffer> getSystemCredentialsForApps();
void setSystemCredentialsForApps(
Map<ApplicationId, ByteBuffer> systemCredentials);
}
SchedulerApplication 有以下三个主要字段:
private Queue queue;
private final String user;
private T currentAttempt;