概述
NM端startContainer会经历一系列event:
- request_resource_localization类型的LocalizerEvent
LocalizerTracker是LocalizerEvent的事件处理器。LocalizerTracker根据资源的类型进行资源本地化。资源有3种类型:public、private和application。public resources的资源本地化使用PublicLocalizer完成,它通过线程池异步下载资源。private和application的资源本地化将启动独立的进程ContainerLocalizer执行下载资源的工作。 - localized类型的ResourceEvent
- resource_localized类型的ContainerEvent
- scheduler_container类型的ContainerSchedulerEvent
- LaunchContainer类型的ContainersLaunchEvent
LocalizerEvent的处理器 —— LocalizerTracker
处理request_resource_localization类型逻辑如下:
private final PublicLocalizer publicLocalizer;
@Override
public void handle(LocalizerEvent event) {
String locId = event.getLocalizerId();
switch (event.getType()) {
case REQUEST_RESOURCE_LOCALIZATION:
// 0) find running localizer or start new thread
LocalizerResourceRequestEvent req =
(LocalizerResourceRequestEvent)event;
switch (req.getVisibility()) {
case PUBLIC:
//将请求加入publicLocalizer的内部队列,再由线程池执行异步下载资源的任务
publicLocalizer.addResource(req);
break;
case PRIVATE:
case APPLICATION:
synchronized (privLocalizers) {
LocalizerRunner localizer = privLocalizers.get(locId);
if (localizer != null && localizer.killContainerLocalizer.get()) {
// Old localizer thread has been stopped, remove it and creates
// a new localizer thread.
LOG.info("New " + event.getType() + " localize request for "
+ locId + ", remove old private localizer.");
cleanupPrivLocalizers(locId);
localizer = null;
}
if (null == localizer) {
LOG.info("Created localizer for " + locId);
localizer = new LocalizerRunner(req.getContext(), locId);
privLocalizers.put(locId, localizer);
//启动LocalizerRunner线程,LocalizerRunner线程在运行中会启动ContainerLocalizer进程下载资源
localizer.start();
}
// 1) propagate event
localizer.addResource(req);
}
break;
}
break;
}
}
假设req是public的,调用PublicLocalizer#addResource()方法。
PublicLocalizer介绍
在addResource()方法中,将LocalizerResourceRequestEvent放入线程池中,执行异步下载资源,代表异步执行的Future放入ExecutorCompletionService中存储,该future与LocalizerResourceRequestEvent的对应关系放入pending中存储;
在run()方法中,循环从ExecutorCompletionService中取出一个future,调用future#get()方法阻塞等待该future完成,然后创建ResourceLocalizedEvent.
class PublicLocalizer extends Thread {
final FileContext lfs;
final Configuration conf;
final ExecutorService threadPool;
final CompletionService<Path> queue;
// Its shared between public localizer and dispatcher thread.
//存储future与LocalizerResourceRequestEvent的对应关系
final Map<Future<Path>,LocalizerResourceRequestEvent> pending;
PublicLocalizer(Configuration conf) {
super("Public Localizer");
this.lfs = getLocalFileContext(conf);
this.conf = conf;
this.pending = Collections.synchronizedMap(
new HashMap<Future<Path>, LocalizerResourceRequestEvent>());
this.threadPool = createLocalizerExecutor(conf);
this.queue = new ExecutorCompletionService<Path>(threadPool);
}
public void addResource(LocalizerResourceRequestEvent request) {
// TODO handle failures, cancellation, requests by other containers
LocalizedResource rsrc = request.getResource();
LocalResourceRequest key = rsrc.getRequest();
LOG.info("Downloading public resource: " + key);
/*
* Here multiple containers may request the same resource. So we need
* to start downloading only when
* 1) ResourceState == DOWNLOADING
* 2) We are able to acquire non blocking semaphore lock.
* If not we will skip this resource as either it is getting downloaded
* or it FAILED / LOCALIZED.
*/
if (rsrc.tryAcquire()) {
if (rsrc.getState() == ResourceState.DOWNLOADING) {
LocalResource resource = request.getResource().getRequest();
try {
Path publicRootPath =
dirsHandler.getLocalPathForWrite("." + Path.SEPARATOR
+ ContainerLocalizer.FILECACHE,
ContainerLocalizer.getEstimatedSize(resource), true);
Path publicDirDestPath =
publicRsrc.getPathForLocalization(key, publicRootPath,
delService);
if (publicDirDestPath == null) {
return;
}
if (!publicDirDestPath.getParent().equals(publicRootPath)) {
createParentDirs(publicDirDestPath, publicRootPath);
if (diskValidator != null) {
diskValidator.checkStatus(
new File(publicDirDestPath.toUri().getPath()));
} else {
throw new DiskChecker.DiskErrorException(
"Disk Validator is null!");
}
}
// explicitly synchronize pending here to avoid future task
// completing and being dequeued before pending updated
synchronized (pending) {
//将LocalizerResourceRequestEvent放入线程池中,执行异步下载资源
pending.put(queue.submit(new FSDownload(lfs, null, conf,
publicDirDestPath, resource, request.getContext().getStatCache())),
request);
}
} catch (IOException e) {
rsrc.unlock();
publicRsrc.handle(new ResourceFailedLocalizationEvent(request
.getResource().getRequest(), e.getMessage()));
LOG.error("Local path for public localization is not found. "
+ " May be disks failed.", e);
} catch (IllegalArgumentException ie) {
rsrc.unlock();
publicRsrc.handle(new ResourceFailedLocalizationEvent(request
.getResource().getRequest(), ie.getMessage()));
LOG.error("Local path for public localization is not found. "
+ " Incorrect path. " + request.getResource().getRequest()
.getPath(), ie);
} catch (RejectedExecutionException re) {
rsrc.unlock();
publicRsrc.handle(new ResourceFailedLocalizationEvent(request
.getResource().getRequest(), re.getMessage()));
LOG.error("Failed to submit rsrc " + rsrc + " for download."
+ " Either queue is full or threadpool is shutdown.", re);
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Skip downloading resource: " + key + " since it's in"
+ " state: " + rsrc.getState());
}
rsrc.unlock();
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Skip downloading resource: " + key + " since it is locked"
+ " by other threads");
}
}
}
private void createParentDirs(Path destDirPath, Path destDirRoot)
throws IOException {
if (destDirPath == null || destDirPath.equals(destDirRoot)) {
return;
}
createParentDirs(destDirPath.getParent(), destDirRoot);
createDir(destDirPath, PUBLIC_FILECACHE_FOLDER_PERMS);
}
private void createDir(Path dirPath, FsPermission perms)
throws IOException {
lfs.mkdir(dirPath, perms, false);
if (!perms.equals(perms.applyUMask(lfs.getUMask()))) {
lfs.setPermission(dirPath, perms);
}
}
@Override
public void run() {
try {
// TODO shutdown, better error handling esp. DU
while (!Thread.currentThread().isInterrupted()) {
try {
Future<Path> completed = queue.take();
LocalizerResourceRequestEvent assoc = pending.remove(completed);
try {
if (null == assoc) {
LOG.error("Localized unknown resource to " + completed);
// TODO delete
return;
}
//阻塞等待该future完成
Path local = completed.get();
LocalResourceRequest key = assoc.getResource().getRequest();
//然后创建ResourceLocalizedEvent
publicRsrc.handle(new ResourceLocalizedEvent(key, local, FileUtil
.getDU(new File(local.toUri()))));
assoc.getResource().unlock();
} catch (ExecutionException e) {
String user = assoc.getContext().getUser();
ApplicationId applicationId = assoc.getContext().getContainerId().getApplicationAttemptId().getApplicationId();
LocalResourcesTracker tracker =
getLocalResourcesTracker(LocalResourceVisibility.APPLICATION, user, applicationId);
final String diagnostics = "Failed to download resource " +
assoc.getResource() + " " + e.getCause();
tracker.handle(new ResourceFailedLocalizationEvent(
assoc.getResource().getRequest(), diagnostics));
publicRsrc.handle(new ResourceFailedLocalizationEvent(
assoc.getResource().getRequest(), diagnostics));
LOG.error(diagnostics);
assoc.getResource().unlock();
} catch (CancellationException e) {
// ignore; shutting down
}
} catch (InterruptedException e) {
return;
}
}
} catch(Throwable t) {
LOG.error("Error: Shutting down", t);
} finally {
LOG.info("Public cache exiting");
threadPool.shutdownNow();
}
}
}
ResourceLocalizedEvent是localized类型的ResourceEvent
public ResourceLocalizedEvent(LocalResourceRequest rsrc, Path location,
long size) {
super(rsrc, ResourceEventType.LOCALIZED);
this.size = size;
this.location = location;
}
ResourceEvent的处理器—— LocalizedResource和处理Locallized类型ResourceEvent的transition —— FetchSuccessTransition
LocalizedResource添加处理Locallized类型ResourceEvent的transition
.addTransition(ResourceState.DOWNLOADING, ResourceState.LOCALIZED,
ResourceEventType.LOCALIZED, new FetchSuccessTransition())
FetchSuccessTransition处理Locallized类型ResourceEvent
/**
* Resource localized, notify waiting containers.
*/
@SuppressWarnings("unchecked") // dispatcher not typed
private static class FetchSuccessTransition extends ResourceTransition {
@Override
public void transition(LocalizedResource rsrc, ResourceEvent event) {
ResourceLocalizedEvent locEvent = (ResourceLocalizedEvent) event;
rsrc.localPath =
Path.getPathWithoutSchemeAndAuthority(locEvent.getLocation());
rsrc.size = locEvent.getSize();
for (ContainerId container : rsrc.ref) {
rsrc.dispatcher.getEventHandler().handle(
new ContainerResourceLocalizedEvent(
container, rsrc.rsrc, rsrc.localPath));
}
}
}
ContainerResourceLocalizedEvent是resource_localized类型的ContainerEvent
public ContainerResourceLocalizedEvent(ContainerId container, LocalResourceRequest rsrc,
Path loc) {
super(container, ContainerEventType.RESOURCE_LOCALIZED, rsrc);
this.loc = loc;
}
处理ContainerEvent的处理器——ContainerImpl和处理resource_localized类型的ContainerEvent的Transition —— LocalizedTransition
ContainerImpl添加处理resource_localized类型的ContainerEvent的Transition
// From LOCALIZING State
.addTransition(ContainerState.LOCALIZING,
EnumSet.of(ContainerState.LOCALIZING, ContainerState.SCHEDULED),
ContainerEventType.RESOURCE_LOCALIZED, new LocalizedTransition())
LocalizedTransition处理resource_localized类型的ContainerEvent
/**
* Transition when one of the requested resources for this container
* has been successfully localized.
*/
static class LocalizedTransition implements
MultipleArcTransition<ContainerImpl,ContainerEvent,ContainerState> {
@SuppressWarnings("unchecked")
@Override
public ContainerState transition(ContainerImpl container,
ContainerEvent event) {
ContainerResourceLocalizedEvent rsrcEvent = (ContainerResourceLocalizedEvent) event;
LocalResourceRequest resourceRequest = rsrcEvent.getResource();
Path location = rsrcEvent.getLocation();
Set<String> syms =
container.resourceSet.resourceLocalized(resourceRequest, location);
if (null == syms) {
LOG.info("Localized resource " + resourceRequest +
" for container " + container.containerId);
return ContainerState.LOCALIZING;
}
// check to see if this resource should be uploaded to the shared cache
// as well
if (shouldBeUploadedToSharedCache(container, resourceRequest)) {
container.resourceSet.getResourcesToBeUploaded()
.put(resourceRequest, location);
}
if (!container.resourceSet.getPendingResources().isEmpty()) {
return ContainerState.LOCALIZING;
}
container.dispatcher.getEventHandler().handle(
new ContainerLocalizationEvent(LocalizationEventType.
CONTAINER_RESOURCES_LOCALIZED, container));
//提交ContainerSchedulerEvent给AsyncDispatcher
container.sendScheduleEvent();
container.metrics.endInitingContainer();
// If this is a recovered container that has already launched, skip
// uploading resources to the shared cache. We do this to avoid uploading
// the same resources multiple times. The tradeoff is that in the case of
// a recovered container, there is a chance that resources don't get
// uploaded into the shared cache. This is OK because resources are not
// acknowledged by the SCM until they have been uploaded by the node
// manager.
if (container.recoveredStatus != RecoveredContainerStatus.LAUNCHED
&& container.recoveredStatus != RecoveredContainerStatus.COMPLETED) {
// kick off uploads to the shared cache
container.dispatcher.getEventHandler().handle(
new SharedCacheUploadEvent(
container.resourceSet.getResourcesToBeUploaded(), container
.getLaunchContext(), container.getUser(),
SharedCacheUploadEventType.UPLOAD));
}
return ContainerState.SCHEDULED;
}
}
ContainerImpl创建并提交scheduler_container类型的ContainerSchedulerEvent给AsyncDispatcher
创建scheduler_container类型的ContainerSchedulerEvent,并提交给AsyncDispatcher
private void sendScheduleEvent() {
if (recoveredStatus == RecoveredContainerStatus.PAUSED) {
ContainersLauncherEventType launcherEvent;
launcherEvent = ContainersLauncherEventType.RECOVER_PAUSED_CONTAINER;
dispatcher.getEventHandler()
.handle(new ContainersLauncherEvent(this, launcherEvent));
} else {
dispatcher.getEventHandler().handle(new ContainerSchedulerEvent(this,
ContainerSchedulerEventType.SCHEDULE_CONTAINER));
}
}
处理各种类型ContainerSchedulerEvent的处理器 —— ContainerScheduler
ContainerScheduler启动container
/**
* Handle ContainerSchedulerEvents.
* @param event ContainerSchedulerEvent.
*/
@Override
public void handle(ContainerSchedulerEvent event) {
switch (event.getType()) {
case SCHEDULE_CONTAINER:
scheduleContainer(event.getContainer());
break;
..........
}
处理scheduler_container类型的ContainerSchedulerEvent的方法
protected void scheduleContainer(Container container) {
boolean isGuaranteedContainer = container.getContainerTokenIdentifier().
getExecutionType() == ExecutionType.GUARANTEED;
// Given a guaranteed container, we enqueue it first and then try to start
// as many queuing guaranteed containers as possible followed by queuing
// opportunistic containers based on remaining resources available. If the
// container still stays in the queue afterwards, we need to preempt just
// enough number of opportunistic containers.
if (isGuaranteedContainer) {
enqueueContainer(container);
// When opportunistic container not allowed (which is determined by
// max-queue length of pending opportunistic containers <= 0), start
// guaranteed containers without looking at available resources.
boolean forceStartGuaranteedContainers = (maxOppQueueLength <= 0);
startPendingContainers(forceStartGuaranteedContainers);
// if the guaranteed container is queued, we need to preempt opportunistic
// containers for make room for it
if (queuedGuaranteedContainers.containsKey(container.getContainerId())) {
reclaimOpportunisticContainerResources(container);
}
} else {
// Given an opportunistic container, we first try to start as many queuing
// guaranteed containers as possible followed by queuing opportunistic
// containers based on remaining resource available, then enqueue the
// opportunistic container. If the container is enqueued, we do another
// pass to try to start the newly enqueued opportunistic container.
startPendingContainers(false);
boolean containerQueued = enqueueContainer(container);
// container may not get queued because the max opportunistic container
// queue length is reached. If so, there is no point doing another pass
if (containerQueued) {
startPendingContainers(false);
}
}
}
/**
* Start pending containers in the queue.
* @param forceStartGuaranteedContaieners When this is true, start guaranteed
* container without looking at available resource
*/
private void startPendingContainers(boolean forceStartGuaranteedContaieners) {
// Start guaranteed containers that are paused, if resources available.
boolean resourcesAvailable = startContainers(
queuedGuaranteedContainers.values(), forceStartGuaranteedContaieners);
// Start opportunistic containers, if resources available.
if (resourcesAvailable) {
startContainers(queuedOpportunisticContainers.values(), false);
}
}
private boolean startContainers(
Collection<Container> containersToBeStarted, boolean force) {
Iterator<Container> cIter = containersToBeStarted.iterator();
boolean resourcesAvailable = true;
while (cIter.hasNext() && resourcesAvailable) {
Container container = cIter.next();
if (tryStartContainer(container, force)) {
cIter.remove();
} else {
resourcesAvailable = false;
}
}
return resourcesAvailable;
}
private boolean tryStartContainer(Container container, boolean force) {
boolean containerStarted = false;
// call startContainer without checking available resource when force==true
if (force || resourceAvailableToStartContainer(
container)) {
startContainer(container);
containerStarted = true;
}
return containerStarted;
}
发送LaunchContainer类型的ContainersLaunchEvent
private void startContainer(Container container) {
LOG.info("Starting container [" + container.getContainerId()+ "]");
// Skip to put into runningContainers and addUtilization when recover
if (!runningContainers.containsKey(container.getContainerId())) {
runningContainers.put(container.getContainerId(), container);
this.utilizationTracker.addContainerResources(container);
}
if (container.getContainerTokenIdentifier().getExecutionType() ==
ExecutionType.OPPORTUNISTIC) {
this.metrics.startOpportunisticContainer(container.getResource());
}
//发送LaunchContainer类型的ContainersLaunchEvent
container.sendLaunchEvent();
}