mapreduce源码分析之默认的任务调度器——JobQueueTaskScheduler

四.默认的任务调度器是JobQueueTaskScheduler

分配任务:

默认的任务调度器是JobQueueTaskScheduler,它的assignTasks方法分配任务,貌似这个方法有一点点小复杂,接下来慢慢分析!

---------------------------------------------------------------------------------------------------------------

@Override

publicsynchronized List<Task> assignTasks(TaskTrackerStatustaskTracker)

throwsIOException {


ClusterStatusclusterStatus = taskTrackerManager.getClusterStatus();

finalint numTaskTrackers = clusterStatus.getTaskTrackers();

finalint clusterMapCapacity = clusterStatus.getMaxMapTasks();

finalint clusterReduceCapacity = clusterStatus.getMaxReduceTasks();


Collection<JobInProgress>jobQueue =

jobQueueJobInProgressListener.getJobQueue();


//

//Get map + reduce counts for the current tracker.

//getMaxMapTasks()方法是Getthe maximum concurrent(并发的,同时的)tasks for //this node.

//countMapTasks()方法是Returnthe current MapTask count

finalint trackerMapCapacity = taskTracker.getMaxMapTasks();

finalint trackerReduceCapacity = taskTracker.getMaxReduceTasks();

finalint trackerRunningMaps = taskTracker.countMapTasks();

finalint trackerRunningReduces = taskTracker.countReduceTasks();


//Assigned tasks

List<Task>assignedTasks = new ArrayList<Task>();


//

//Compute (running + pending) map and reduce task numbers across pool

//计算剩余的mapreduce的工作量

intremainingReduceLoad = 0;

intremainingMapLoad = 0;

synchronized(jobQueue) {

for(JobInProgress job : jobQueue) {

if(job.getStatus().getRunState() == JobStatus.RUNNING) {

remainingMapLoad +=(job.desiredMaps() - job.finishedMaps());

if(job.scheduleReduces()) {

remainingReduceLoad +=

(job.desiredReduces() -job.finishedReduces());

}

}

}

}


//Compute the 'load factor' for maps and reduces

//计算剩余的map任务量占整个集群全部运行map任务能力的百分比

doublemapLoadFactor = 0.0;

if(clusterMapCapacity > 0) {

mapLoadFactor =(double)remainingMapLoad / clusterMapCapacity;

}

//同上,计算reduce任务。。。

doublereduceLoadFactor = 0.0;

if(clusterReduceCapacity > 0) {

reduceLoadFactor =(double)remainingReduceLoad / clusterReduceCapacity;

}

//

//In the below steps, we allocate first map tasks (if appropriate),

//and then reduce tasks if appropriate. We go through all jobs

//in order of job arrival; jobs only get serviced if their

//predecessors(前任,前辈)are serviced, too.

//


//

//We assign tasks to the current taskTracker if the given machine

//has a workload that's less than the maximum load of that kind of

//task.我们分配给的tasktracker,这些tasktracker的工作量(workload)比同种类型

//任务的最大载入量要小

//However, if the cluster is close to getting loaded i.e. we don't

//have enough _padding_ for speculative executions etc., we only

//schedule the "highest priority" task i.e. the task from thejob

//with the highest priority.

//

//计算该tasktrackermap执行能力,就是它能执行多个map任务,Math.ceilx//是比x大的最小数

finalint trackerCurrentMapCapacity =

Math.min((int)Math.ceil(mapLoadFactor* trackerMapCapacity),

trackerMapCapacity);

//还剩下的map执行能力

intavailableMapSlots = trackerCurrentMapCapacity - trackerRunningMaps;

booleanexceededMapPadding = false;

if(availableMapSlots > 0) {

exceededMapPadding =

exceededPadding(true,clusterStatus, trackerMapCapacity);

}

intnumLocalMaps = 0;

intnumNonLocalMaps = 0;

scheduleMaps:

for(int i=0; i < availableMapSlots; ++i) {

synchronized(jobQueue) {

for(JobInProgress job : jobQueue) {

if(job.getStatus().getRunState() != JobStatus.RUNNING) {

continue;

}


Taskt = null;

//Try to schedule a node-local or rack-local Map task

t=

job.obtainNewLocalMapTask(taskTracker,numTaskTrackers,

taskTrackerManager.getNumberOfUniqueHosts());

if(t != null) {

assignedTasks.add(t);

++numLocalMaps;

//Don't assign map tasks to the hilt!

//Leave some free slots in the cluster for future task-failures,

//speculative tasks etc. beyond the highest priority job

if(exceededMapPadding) {

break scheduleMaps;

}

//Try all jobs again for the next Map task

break;

}

//Try to schedule a node-local or rack-local Map task

t=

job.obtainNewNonLocalMapTask(taskTracker,numTaskTrackers,

taskTrackerManager.getNumberOfUniqueHosts());

if(t != null) {

assignedTasks.add(t);

++numNonLocalMaps;

//We assign at most 1 off-switch or speculative task

//This is to prevent TaskTrackers from stealing local-tasks

//from other TaskTrackers.

breakscheduleMaps;

}

}

}

}

intassignedMaps = assignedTasks.size();


//

//Same thing, but for reduce tasks

//However we _never_ assign more than 1 reduce task perheartbeat

//

finalint trackerCurrentReduceCapacity =

Math.min((int)Math.ceil(reduceLoadFactor* trackerReduceCapacity),

trackerReduceCapacity);

finalint availableReduceSlots =

Math.min((trackerCurrentReduceCapacity- trackerRunningReduces), 1);

booleanexceededReducePadding = false;

if(availableReduceSlots > 0) {

exceededReducePadding =exceededPadding(false, clusterStatus,

trackerReduceCapacity);

synchronized(jobQueue) {

for(JobInProgress job : jobQueue) {

if(job.getStatus().getRunState() != JobStatus.RUNNING ||

job.numReduceTasks == 0) {

continue;

}


Taskt =

job.obtainNewReduceTask(taskTracker,numTaskTrackers,

taskTrackerManager.getNumberOfUniqueHosts()

);

if(t != null) {

assignedTasks.add(t);

break;

}

//Don't assign reduce tasks to the hilt!

//Leave some free slots in the cluster for future task-failures,

//speculative tasks etc. beyond the highest priority job

if(exceededReducePadding) {

break;

}

}

}

}

if(LOG.isDebugEnabled()) {

LOG.debug("Taskassignments for " + taskTracker.getTrackerName() + " -->" +

"[" + mapLoadFactor +", " + trackerMapCapacity + ", " +

trackerCurrentMapCapacity + "," + trackerRunningMaps + "] -> [" +

(trackerCurrentMapCapacity -trackerRunningMaps) + ", " +

assignedMaps + " (" +numLocalMaps + ", " + numNonLocalMaps +

")] [" +reduceLoadFactor + ", " + trackerReduceCapacity + ", "+

trackerCurrentReduceCapacity +"," + trackerRunningReduces +

"]-> [" + (trackerCurrentReduceCapacity -trackerRunningReduces) +

"," + (assignedTasks.size()-assignedMaps) + "]");

}


returnassignedTasks;

}


JobInProgressobtainNewNonLocalMapTask方法是用来分配maptask的,它主要调用findNewMapTask方法,根据TaskNode所在的NodenonRunningCache中查找TaskInProgress

Reduce同上。

这个调度器还要再好好研究研究,有几个地方还是有些模糊。。。。。。。。。。。。。。。。。。。。

hadoop还有公平调度器和容量调度器,在hadoop实战中均有讲述!


评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值