文章目录
dolphinscheduler的DAG流程
简要说明
此流程主要看org.apache.dolphinscheduler.server.master.runner.WorkflowExecuteRunnable#buildFlowDag
dolphinscheduler有很多_log 的表。之所以有这些表,笔者认为,当生成一个任务实例的时候,需要记录那个时刻的参数。
如果没有这些log 表,那么表中只会记录最新的记录。
如在某一个时刻生成了一个流程实例,此时该流程还在等待执行,当对流程进行修改,如果没有_log,则查出的参数为最新参数,而非生成流程实例的参数,所以我们需要_log 表。
buildFlowDag
private void buildFlowDag() throws Exception {
//根据版本号和流程定义code获取当前'流程实例'的'流程定义'记录
processDefinition = processService.findProcessDefinition(processInstance.getProcessDefinitionCode(),
processInstance.getProcessDefinitionVersion());
processInstance.setProcessDefinition(processDefinition);
//找出cmdParam 中的StartNodeIdList(放的id)并根据此查询对应的TaskInstance
List<TaskInstance> recoverNodeList = getRecoverTaskInstanceList(processInstance.getCommandParam());
//根据版本号和流程定义code获取 最新的ProcessTaskRelationLog
List<ProcessTaskRelation> processTaskRelations =
processService.findRelationByCode(processDefinition.getCode(), processDefinition.getVersion());
// 根据流程定义中的 pretask.和posttask 获取最新的任务定义
List<TaskDefinitionLog> taskDefinitionLogs =
processService.getTaskDefineLogListByRelation(processTaskRelations);
//将获取当前taskDefinitionLogs 的本身信息 以及所有的pretaskCodes
List<TaskNode> taskNodeList = processService.transformTask(processTaskRelations, taskDefinitionLogs);
forbiddenTaskMap.clear();
//如果taskExecuteType == TaskExecuteType.STREAM 则放入forbiddenTaskMap
taskNodeList.forEach(taskNode -> {
if (taskNode.isForbidden()) {
forbiddenTaskMap.put(taskNode.getCode(), taskNode);
}
});
// generate process to get DAG info
List<String> recoveryNodeCodeList = getRecoveryNodeCodeList(recoverNodeList);
List<String> startNodeNameList = parseStartNodeName(processInstance.getCommandParam());
//将入参转换成DAG执行所需要的必要元素
ProcessDag processDag = generateFlowDag(taskNodeList, startNodeNameList, recoveryNodeCodeList,
processInstance.getTaskDependType());
if (processDag == null) {
logger.error("processDag is null");
return;
}
//生成该processInstance的 DAG
// generate process dag
dag = DagHelper.buildDagGraph(processDag);
logger.info("Build dag success, dag: {}", dag);
}
将入参转换成DAG执行所需要的必要元素
//此方法我认为是做获取 所有的需要执行的任务节点 以及任务节点之间的关系
public static ProcessDag generateFlowDag(List<TaskNode> totalTaskNodeList,
List<String> startNodeNameList,
List<String> recoveryNodeCodeList,
TaskDependType depNodeType) throws Exception {
//如果是 TaskDependType.TASK_POST startNodeIdList且不为null 又或者StartNodeList 不为null 则使用前端传递的参数。否则使用processInstace 关联的任务节点
List<TaskNode> destTaskNodeList = generateFlowNodeListByStartNode(totalTaskNodeList, startNodeNameList,
recoveryNodeCodeList, depNodeType);
if (destTaskNodeList.isEmpty()) {
return null;
}
//将destTaskNodeList每一个元素 解析成TaskNodeRelation。每一个元素都可能会有很多pretask。TaskNodeRelation.成员有startNode,endNode 分别代表 pretaskCode 和postTaskCode
List<TaskNodeRelation> taskNodeRelations = generateRelationListByFlowNodes(destTaskNodeList);
ProcessDag processDag = new ProcessDag();
processDag.setEdges(taskNodeRelations);
processDag.setNodes(destTaskNodeList);
return processDag;
}
DAG 参数概览
package org.apache.dolphinscheduler.common.graph;
/**
* analysis of DAG
* Node: node
* NodeInfo:node description information
* EdgeInfo: edge description information
*/
public class DAG<Node, NodeInfo, EdgeInfo> {
private static final Logger logger = LoggerFactory.getLogger(DAG.class);
private final ReadWriteLock lock = new ReentrantReadWriteLock();
/**
* node map, key is node, value is node information
*/
//key 为taskCode value 是taskInstance的一些信息
private final Map<Node, NodeInfo> nodesMap;
/**
* edge map. key is node of origin;value is Map with key for destination node and value for edge
*/
//key startNode ,value 是所有依赖于startNode节点的map,该map key 依赖于startNode 的 taskCode,value 是该key 的具体信息
private final Map<Node, Map<Node, EdgeInfo>> edgesMap;
/**
* reversed edge set,key is node of destination, value is Map with key for origin node and value for edge
*/
//key 是 endNode 的 taskCode ,map是endNode 依赖的所有节点
private final Map<Node, Map<Node, EdgeInfo>> reverseEdgesMap;
public DAG() {
nodesMap = new HashMap<>();
edgesMap = new HashMap<>();
reverseEdgesMap = new HashMap<>();
}
}
DAG生成逻辑
public static DAG<String, TaskNode, TaskNodeRelation> buildDagGraph(ProcessDag processDag) {
DAG<String, TaskNode, TaskNodeRelation> dag = new DAG<>();
//添加DAG中的nodesMap
// add vertex
if (CollectionUtils.isNotEmpty(processDag.getNodes())) {
for (TaskNode node : processDag.getNodes()) {
dag.addNode(Long.toString(node.getCode()), node);
}
}
// add edge
if (CollectionUtils.isNotEmpty(processDag.getEdges())) {
for (TaskNodeRelation edge : processDag.getEdges()) {
//添加DAG中的edgesMap 与reverseEdgesMap
dag.addEdge(edge.getStartNode(), edge.getEndNode());
}
}
return dag;
}
生成DAG中的edgesMap 与reverseEdgesMap
public boolean addEdge(Node fromNode, Node toNode, EdgeInfo edge, boolean createNode) {
lock.writeLock().lock();
try {
// Whether an edge can be successfully added(fromNode -> toNode)
//1.如果fromNode 指向了 toNode 2.如果DAG的nodeMap 里面不包含fromNode 或者toNode 3.如果toNode的下游节点或者下下游,以此类推的节点指向了fromNode节点。则返回false 报错
if (!isLegalAddEdge(fromNode, toNode, createNode)) {
logger.error("serious error: add edge({} -> {}) is invalid, cause cycle!", fromNode, toNode);
return false;
}
addNodeIfAbsent(fromNode, null);
addNodeIfAbsent(toNode, null);
addEdge(fromNode, toNode, edge, edgesMap);
addEdge(toNode, fromNode, edge, reverseEdgesMap);
return true;
} finally {
lock.writeLock().unlock();
}
}
至此buildDAG 的方法解析结束,具体看DAG里包含的三个参数