任务领取
每个jstorm的工作机器会定时的扫描zookeeper的任务分配的目录,看是否有自己的任务,如果有,那么把对应的信息写到本地机器的制定目录中,这个工作主要是有SyncSupervisorEvent 线程中的run方法来完成的,我们主要来分析这个函数, 在这个之前,我们来看下这个类的成员,便于后面的分析
//标示supervisor的唯一id,因为一台机器上只有一个supervisor,所以也用这个来判断机器
private String supervisorId;
private EventManager processEventManager;
private EventManager syncSupEventManager;
//storm集群的状态信息(zk上信息操作接口)
private StormClusterState stormClusterState;
//本地信息接口
private LocalState localState;
run方法:
@Override
public void run() {
LOG.debug("Synchronizing supervisor, interval seconds:" + TimeUtils.time_delta(lastTime));
lastTime = TimeUtils.current_time_secs();
try {
RunnableCallback syncCallback = new EventManagerZkPusher(this, syncSupEventManager);
/**
* Step 1: get all assignments and register /ZK-dir/assignment and every assignment watch
*
*/
//通过zk目录获取集群中所有的任务 toplogy_id --> assignment
Map<String, Assignment> assignments = Cluster.get_all_assignment(stormClusterState, syncCallback);
LOG.debug("Get all assignments " + assignments);
/**
* Step 2: get topologyIds list from STORM-LOCAL-DIR/supervisor/stormdist/
*/
//通过本地的目录信息,获取所有在本机上的任务
List<String> downloadedTopologyIds = StormConfig.get_supervisor_toplogy_list(conf);
LOG.debug("Downloaded storm ids: " + downloadedTopologyIds);
/**
* Step 3: get <port,LocalAssignments> from ZK local node's assignment
*/
//通过zk信息,获取所有分配到本机的所有工作进程(通过遍历所有任务下的所有工作进程,看他的nodeid是不是等于supervisorId)
Map<Integer, LocalAssignment> zkAssignment = getLocalAssign(stormClusterState, supervisorId, assignments);
Map<Integer, LocalAssignment> localAssignment;
Set<String> updateTopologys;
/**
* Step 4: writer local assignment to LocalState
*/
try {
LOG.debug("Writing local assignment " + zkAssignment);
localAssignment = (Map<Integer, LocalAssignment>) localState.get(Common.LS_LOCAL_ASSIGNMENTS);
if (localAssignment == null) {
localAssignment = new HashMap<Integer, LocalAssignment>();
}
//更新状态
localState.put(Common.LS_LOCAL_ASSIGNMENTS, zkAssignment);
//比较新老状态,获取需要更新的任务(根据任务时间戳判断)
updateTopologys = getUpdateTopologys(localAssignment, zkAssignment, assignments);
Set<String> reDownloadTopologys = getNeedReDownloadTopologys(localAssignment);
//需要重新下载的也放到更新中去
if (reDownloadTopologys != null) {
updateTopologys.addAll(reDownloadTopologys);
}
} catch (IOException e) {
LOG.error("put LS_LOCAL_ASSIGNMENTS " + zkAssignment + " of localState failed");
throw e;
}
/**
* Step 5: download code from ZK
*/
Map<String, String> topologyCodes = getTopologyCodeLocations(assignments, supervisorId);
// downloadFailedTopologyIds which can't finished download binary from nimbus
Set<String> downloadFailedTopologyIds = new HashSet<String>();
downloadTopology(topologyCodes, downloadedTopologyIds, updateTopologys, assignments, downloadFailedTopologyIds);
/**
* Step 6: remove any downloaded useless topology
*/
//删除无用的toplogy(本地路径中还有信息,但是代码路径中没有了)
removeUselessTopology(topologyCodes, downloadedTopologyIds);
/**
* Step 7: push syncProcesses Event
*/
// processEventManager.add(syncProcesses);
syncProcesses.run(zkAssignment, downloadFailedTopologyIds);
// If everything is OK, set the trigger to update heartbeat of
// supervisor
heartbeat.updateHbTrigger(true);
} catch (Exception e) {
LOG.error("Failed to Sync Supervisor", e);
// throw new RuntimeException(e);
}
}
从zookeeper获取所有的任务
先看代码
public static Map<String, Assignment> get_all_assignment(StormClusterState stormClusterState, RunnableCallback callback) throws Exception {
Map<String, Assignment> ret = new HashMap<String, Assignment>();
// get /assignments {topology_id}
//获取zookeeper assignments目录下所有任务
List<String> assignments = stormClusterState.assignments(callback);
if (assignments == null) {
LOG.debug("No assignment of ZK");
return ret;
}
//对于每个任务,获取任务的详细信息
for (String topology_id : assignments) {
Assignment assignment = stormClusterState.assignment_info(topology_id, callback);
if (assignment == null</