doris计算分析

exec_plan_fragment

fe:

1.fe-core/src/main/java/org/apache/doris/qe/Coordinator.java

public void exec() throws Exception {

sendFragment

execState.execRemoteFragmentAsync( 请求be

如果某一个节点执行失败了,会取消所有执行的plan fragement。

 if (code != TStatusCode.OK) {

cancelInternal(InternalService.PPlanFragmentCancelReason.INTERNAL_ERROR);

cancelRemoteFragmentsAsync

backendExecState.cancelFragmentInstance

 service FrontendService {
718     TGetDbsResult getDbNames(1:TGetDbsParams params)
719     TGetTablesResult getTableNames(1:TGetTablesParams params)
720     TDescribeTableResult describeTable(1:TDescribeTableParams params)
721     TShowVariableResult showVariables(1:TShowVariableRequest params)

           // be在执行plan_fragement_exexutor.cpp执行计划时候,上报执行状态
722     TReportExecStatusResult reportExecStatus(1:TReportExecStatusParams params)
723     // be在完成比如create table语句后,发送
724     MasterService.TMasterResult finishTask(1:MasterService.TFinishTaskRequest request)

           //
725     MasterService.TMasterResult report(1:MasterService.TReportRequest request)

be:

1.exec_plan_fragment rpc接口,_exec_env->fragment_mgr()->exec_plan_fragment( backend_service.cpp
2.runtime/fragment_mgr.cpp执行exec_plan_fragment
 std::shared_ptr<FragmentExecState> exec_state;
exec_state->prepare(params));
FragmentExecState::prepare(
_executor.prepare(params);
PlanFragmentExecutor _executor;
3.runtime/plan_fragment_executor.cpp
ExecNode::create_tree(

finish_task

fe: 将任务从AgentTaskQueue中移除

文件名master/MasterImpl.java

public TMasterResult finishTask(TFinishTaskRequest request) {

finishCreateReplica

AgentTaskQueue.removeTask(    将任务从 AgentTaskQueue移除

be:完成任务发送finishTask rpc请求

文件名be/src/agent/task_worker_pool.cpp 

void TaskWorkerPool::_finish_task(const TFinishTaskRequest& finish_task_request) {     

submit_tasks

service BackendService {

AgentService.TAgentResult submit_tasks(1:list<AgentService.TAgentTaskRequest> tasks);

be:  将task放入threadpool中出来

be/src/agent/agent_server.cpp  task_worker_pool.cpp     

void TaskWorkerPool::submit_task(const TAgentTaskRequest& task) {   

if (_register_task_info(task_type, signature)) { 添加当前be正在处理的任务

_finish_task(finish_task_request); 发送finishTask rpc给fe
_remove_task_info(agent_task_req.task_type, agent_task_req.signature); 将任务从be中移除

fe:

report

be:每隔interval时间上报be当前正在处理的任务

be/src/agent/agent_server.cpp   task_worker_pool.cpp

void TaskWorkerPool::_report_task_worker_thread_callback() {   

_handle_report(request, ReportType::TASK);        

} while (!_stop_background_threads_latch.wait_for(
            MonoDelta::FromSeconds(config::report_task_interval_seconds)));   

fe:

rpc 接口:src/main/java/org/apache/doris/master/MasterImpl.java:   reportHandler.handleReport(request);

ReportHandler.java:    public TMasterResult handleReport(TReportRequest request) throws TException {

 ReportTask reportTask = new ReportTask(beId, tasks, disks, tablets, reportVersion);
        try {

             增加任务
            putToQueue(reportTask);

取出任务:

   @Override
    protected void runOneCycle() {
        while (true) {
            ReportTask task = null;
            try {
                task = reportQueue.take();
                task.exec();
            } catch (InterruptedException e) {
                LOG.warn("got interupted exception when executing report", e);
            }
        }

执行任务

protected void exec() {
            if (tasks != null) {
                ReportHandler.taskReport(beId, tasks);
            }

List<AgentTask> diffTasks = AgentTaskQueue.getDiffTasks(backendId, runningTasks);

diffTask是fe中存在的任务,而be节点中不存在的任务

 if (task.shouldResend(taskReportTime)) { // 重做任务
                batchTask.addTask(task);
            }

cancel_plan_fragement:

fe:

be:

runtime/fragment_mgr.cpp

exec_state->cancel(reason, msg);

Status FragmentExecState::cancel(const PPlanFragmentCancelReason& reason, const std::string& msg) {

_executor.cancel(reason, msg); // exexutor执行cancel

runtime/plan_fragment_executor.cpp                                                                             

void PlanFragmentExecutor::cancel(const PPlanFragmentCancelReason& reason, const std::string& msg) {

        env->stream_mgr()->cancel(id);
        env->result_mgr()->cancel(id);

runtime/data_stream_mgr.cpp

void DataStreamMgr::cancel(const TUniqueId& fragment_instance_id) {

for (auto& it : recvrs) {
        it->cancel_stream();
    }

runtime/data_stream_recvr.cc

void DataStreamRecvr::cancel_stream() {
    for (int i = 0; i < _sender_queues.size(); ++i) {
        _sender_queues[i]->cancel();
    }
}

DataStreamRecvr::SenderQueue::cancel() { _is_cancelled=true; 标记取消状态

Status DataStreamRecvr::SenderQueue::get_batch(RowBatch** next_batch) {

if (_is_cancelled) {
        return Status::Cancelled("Cancelled");
    }

reportExecStatus:

runtime/fragment_mgr.cpp 上报执行心跳

void FragmentExecState::coordinator_callback(const Status& status, RuntimeProfile* profile,
bool done) {中上报

coord->reportExecStatus(res, params);

初始化FragmentExecState对象会设置callback

FragmentExecState::FragmentExecState(const TUniqueId& query_id,
                                     const TUniqueId& fragment_instance_id, int backend_num,
                                     ExecEnv* exec_env, const TNetworkAddress& coord_addr)
        : _query_id(query_id),
          _fragment_instance_id(fragment_instance_id),
          _backend_num(backend_num),
          _exec_env(exec_env),
          _coord_addr(coord_addr),
          _executor(exec_env, std::bind<void>(std::mem_fn(&FragmentExecState::coordinator_callback),
                                              this, std::placeholders::_1, std::placeholders::_2,
                                              std::placeholders::_3)),

plan_fragment_executor执行的时候会上报

void PlanFragmentExecutor::report_profile() { 上报exec正在执行心跳

send_report(false);

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值