exec_plan_fragment
fe:
1.fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
public void exec() throws Exception {
sendFragment
execState.execRemoteFragmentAsync( 请求be
如果某一个节点执行失败了,会取消所有执行的plan fragement。
if (code != TStatusCode.OK) {
cancelInternal(InternalService.PPlanFragmentCancelReason.INTERNAL_ERROR);
cancelRemoteFragmentsAsync
backendExecState.cancelFragmentInstance
service FrontendService {
718 TGetDbsResult getDbNames(1:TGetDbsParams params)
719 TGetTablesResult getTableNames(1:TGetTablesParams params)
720 TDescribeTableResult describeTable(1:TDescribeTableParams params)
721 TShowVariableResult showVariables(1:TShowVariableRequest params)
// be在执行plan_fragement_exexutor.cpp执行计划时候,上报执行状态
722 TReportExecStatusResult reportExecStatus(1:TReportExecStatusParams params)
723 // be在完成比如create table语句后,发送
724 MasterService.TMasterResult finishTask(1:MasterService.TFinishTaskRequest request)
//
725 MasterService.TMasterResult report(1:MasterService.TReportRequest request)
be:
1.exec_plan_fragment rpc接口,_exec_env->fragment_mgr()->exec_plan_fragment( backend_service.cpp
2.runtime/fragment_mgr.cpp执行exec_plan_fragment
std::shared_ptr<FragmentExecState> exec_state;
exec_state->prepare(params));
FragmentExecState::prepare(
_executor.prepare(params);
PlanFragmentExecutor _executor;
3.runtime/plan_fragment_executor.cpp
ExecNode::create_tree(
finish_task
fe: 将任务从AgentTaskQueue中移除
文件名master/MasterImpl.java
public TMasterResult finishTask(TFinishTaskRequest request) {
finishCreateReplica
AgentTaskQueue.removeTask( 将任务从 AgentTaskQueue移除
be:完成任务发送finishTask rpc请求
文件名be/src/agent/task_worker_pool.cpp
void TaskWorkerPool::_finish_task(const TFinishTaskRequest& finish_task_request) {
submit_tasks
service BackendService {
AgentService.TAgentResult submit_tasks(1:list<AgentService.TAgentTaskRequest> tasks);
be: 将task放入threadpool中出来
be/src/agent/agent_server.cpp task_worker_pool.cpp
void TaskWorkerPool::submit_task(const TAgentTaskRequest& task) {
if (_register_task_info(task_type, signature)) { 添加当前be正在处理的任务
_finish_task(finish_task_request); 发送finishTask rpc给fe
_remove_task_info(agent_task_req.task_type, agent_task_req.signature); 将任务从be中移除
fe:
report
be:每隔interval时间上报be当前正在处理的任务
be/src/agent/agent_server.cpp task_worker_pool.cpp
void TaskWorkerPool::_report_task_worker_thread_callback() {
_handle_report(request, ReportType::TASK);
} while (!_stop_background_threads_latch.wait_for(
MonoDelta::FromSeconds(config::report_task_interval_seconds)));
fe:
rpc 接口:src/main/java/org/apache/doris/master/MasterImpl.java: reportHandler.handleReport(request);
ReportHandler.java: public TMasterResult handleReport(TReportRequest request) throws TException {
ReportTask reportTask = new ReportTask(beId, tasks, disks, tablets, reportVersion);
try {
增加任务
putToQueue(reportTask);
取出任务:
@Override
protected void runOneCycle() {
while (true) {
ReportTask task = null;
try {
task = reportQueue.take();
task.exec();
} catch (InterruptedException e) {
LOG.warn("got interupted exception when executing report", e);
}
}
执行任务
protected void exec() {
if (tasks != null) {
ReportHandler.taskReport(beId, tasks);
}
List<AgentTask> diffTasks = AgentTaskQueue.getDiffTasks(backendId, runningTasks);
diffTask是fe中存在的任务,而be节点中不存在的任务
if (task.shouldResend(taskReportTime)) { // 重做任务
batchTask.addTask(task);
}
cancel_plan_fragement:
fe:
be:
runtime/fragment_mgr.cpp
exec_state->cancel(reason, msg);
Status FragmentExecState::cancel(const PPlanFragmentCancelReason& reason, const std::string& msg) {
_executor.cancel(reason, msg); // exexutor执行cancel
runtime/plan_fragment_executor.cpp
void PlanFragmentExecutor::cancel(const PPlanFragmentCancelReason& reason, const std::string& msg) {
env->stream_mgr()->cancel(id);
env->result_mgr()->cancel(id);
runtime/data_stream_mgr.cpp
void DataStreamMgr::cancel(const TUniqueId& fragment_instance_id) {
for (auto& it : recvrs) {
it->cancel_stream();
}
runtime/data_stream_recvr.cc
void DataStreamRecvr::cancel_stream() {
for (int i = 0; i < _sender_queues.size(); ++i) {
_sender_queues[i]->cancel();
}
}
DataStreamRecvr::SenderQueue::cancel() { _is_cancelled=true; 标记取消状态
Status DataStreamRecvr::SenderQueue::get_batch(RowBatch** next_batch) {
if (_is_cancelled) {
return Status::Cancelled("Cancelled");
}
reportExecStatus:
runtime/fragment_mgr.cpp 上报执行心跳
void FragmentExecState::coordinator_callback(const Status& status, RuntimeProfile* profile,
bool done) {中上报
coord->reportExecStatus(res, params);
初始化FragmentExecState对象会设置callback
FragmentExecState::FragmentExecState(const TUniqueId& query_id,
const TUniqueId& fragment_instance_id, int backend_num,
ExecEnv* exec_env, const TNetworkAddress& coord_addr)
: _query_id(query_id),
_fragment_instance_id(fragment_instance_id),
_backend_num(backend_num),
_exec_env(exec_env),
_coord_addr(coord_addr),
_executor(exec_env, std::bind<void>(std::mem_fn(&FragmentExecState::coordinator_callback),
this, std::placeholders::_1, std::placeholders::_2,
std::placeholders::_3)),
plan_fragment_executor执行的时候会上报
void PlanFragmentExecutor::report_profile() { 上报exec正在执行心跳
send_report(false);