【数据库篇】MySQL源码整体分析

最新推荐文章于 2024-09-19 22:35:09 发布

苒翼

最新推荐文章于 2024-09-19 22:35:09 发布

阅读量3.7k

点赞数 6

本文链接：https://blog.csdn.net/zxz1580977728/article/details/109382948

版权

数据库专栏收录该内容

5 篇文章 4 订阅

订阅专栏

本文主要分析8.0.22Mysql的SELECT执行流程，源码来自：https://github.com/mysql/mysql-server。本文概括性的分析了Mysql通过网络请求获取客户端的SQL，然后将SQL转换成LEX语法树，再转换成具体执行，最后从内存或磁盘中加载PAGE得到最终想要的RECORD。

一、先介绍Mysql入口及网络请求

1.1、mysql的入口

//main.cc
int main(int argc, char **argv) {
    return mysqld_main(argc, argv);
}

1.2、mysql的初始化，my_init应该是初始化参数配置等，mysql_thread_create创建线程处理网络请求

//mysqld.cc
int mysqld_main(int argc, char **argv){
    my_init() // init my_sys library & pthreads
    init_common_variables();
    setup_conn_event_handler_threads();
    ->int error = mysql_thread_create(key_thread_handle_con_sockets, &hThread, &connection_attrib,socket_conn_event_handler, mysqld_socket_acceptor);
}

1.3、线程入口，循环监听并处理网络请求

//mysqld.cc
extern "C" void *socket_conn_event_handler(void *arg) {
  my_thread_init();
  Connection_acceptor<Mysqld_socket_listener> *conn_acceptor = static_cast<Connection_acceptor<Mysqld_socket_listener> *>(arg);
  conn_acceptor->connection_event_loop();
  decrement_handler_count();
  my_thread_end();
  return 0;
}

//connection_handler_manager.cc
void Connection_handler_manager::process_new_connection(
    Channel_info *channel_info) {
  if (connection_events_loop_aborted() ||
      !check_and_incr_conn_count(channel_info->is_admin_connection())) {
    channel_info->send_error_and_close_channel(ER_CON_COUNT_ERROR, 0, true);
    delete channel_info;
    return;
  }

  if (m_connection_handler->add_connection(channel_info)) {
    inc_aborted_connects();
    delete channel_info;
  }
}

1.4、do_command 处理客户端发送的具体请求

//connection_handler_manager.cc
void Connection_handler_manager::process_new_connection(Channel_info *channel_info) {
  if (connection_events_loop_aborted() || !check_and_incr_conn_count(channel_info->is_admin_connection())) {
    channel_info->send_error_and_close_channel(ER_CON_COUNT_ERROR, 0, true);
    delete channel_info;
    return;
  }

  if (m_connection_handler->add_connection(channel_info)) {
    inc_aborted_connects();
    delete channel_info;
  }
}

//connection_handler_one_thread.cc
bool One_thread_connection_handler::add_connection(Channel_info *channel_info) {
    while (thd_connection_alive(thd)) {
      if (do_command(thd)) break;
    }
}

二、将SQL转换成具体的LEX树

2.1、处理命令

//sql_parse.cc
bool do_command(THD *thd) {
    NET *net = nullptr;
    enum enum_server_command command;
    net = thd->get_protocol_classic()->get_net();
    net_new_transaction(net);
    thd->m_server_idle = true;
    rc = thd->get_protocol()->get_command(&com_data, &command);
    thd->m_server_idle = false;
   return_value = dispatch_command(thd, &com_data, command);
   thd->get_protocol_classic()->get_output_packet()->shrink(thd->variables.net_buffer_length);
}

2.2、根据用户请求信息的第一个字段表示这个请求类型

//sql_parse.cc
bool dispatch_command(THD *thd, const COM_DATA *com_data, enum enum_server_command command) {
    switch (command) {
        case COM_INIT_DB: {}
        case COM_STMT_CLOSE: {}
        //如果是查询语句
        case COM_QUERY: { 
            thd->m_digest = &thd->m_digest_state;
            thd->m_digest->reset(thd->m_token_array, max_digest_length);
            //从网络数据包中读取Query并存入thd->query
            alloc_query(thd, com_data->com_query.query,com_data->com_query.length)
            ->char *query = static_cast<char *>(thd->alloc(packet_length + 1));
            ->memcpy(query, packet, packet_length);
            ->thd->set_query(query, packet_length);
            //解析
            mysql_parse(thd, &parser_state);
        }
    }
}

2.3、据网上了解使用bison插件将sql转换成lex语法树，入口是MYSQLparse

//sql_parse.cc
void mysql_parse(THD *thd, Parser_state *parser_state) {
    lex_start(thd);
    //解析sql
    err = parse_sql(thd, parser_state, nullptr);
    ->bool mysql_parse_status = thd->sql_parser();
      //SQL parser function generated by YACC from sql_yacc.yy.
      ->extern int MYSQLparse(class THD * thd, class Parse_tree_root * *root); 
      ->MYSQLparse(this, &root)
      ->lex->make_sql_cmd(root)
        ->m_sql_cmd = parse_tree->make_cmd(thd);
          //忽略了一些逻辑，没有细究
          ->if (thd->lex->sql_command == SQLCOM_SELECT)
            ->return new (thd->mem_root) Sql_cmd_select(thd->lex->result);
          ->else
            ->return new (thd->mem_root) Sql_cmd_do(nullptr);
    //执行
    error = mysql_execute_command(thd, true);
}return new (thd->mem_root) Sql_cmd_do(nullptr);

2.4、lex->m_sql_cmd->execute(thd);开始执行具体查询

//sql_parse.cc
int mysql_execute_command(THD *thd, bool first_level) {
    LEX *const lex = thd->lex;
    SELECT_LEX *const select_lex = lex->select_lex;
    TABLE_LIST *const first_table = select_lex->get_table_list();
    TABLE_LIST *all_tables;
     if (!(lex->sql_command == SQLCOM_UPDATE_MULTI) && !(lex->sql_command == SQLCOM_SET_OPTION) && !(lex->sql_command == SQLCOM_DROP_TABLE && lex->drop_temporary && lex->drop_if_exists) && all_tables_not_ok(thd, all_tables)) {
        binlog_gtid_end_transaction(thd);
        return 0;
    }
    int ret = launch_hook_trans_begin(thd, all_tables);

    switch (lex->sql_command) {
        case SQLCOM_SHOW_TABLE_STATUS:
        case SQLCOM_SHOW_OPEN_TABLES:
        case SQLCOM_SHOW_VARIABLES:
        case SQLCOM_SHOW_STORAGE_ENGINES:
        case SQLCOM_SHOW_PROFILE: {}
        case SQLCOM_PREPARE: {}
        case SQLCOM_INSERT:
        case SQLCOM_REPLACE_SELECT:
        case SQLCOM_INSERT_SELECT:
        case SQLCOM_DELETE:
        case SQLCOM_UPDATE:
        case SQLCOM_LOAD: {
            res = lex->m_sql_cmd->execute(thd);
            break;
        }
        case SQLCOM_SHOW_PROCESSLIST:{
            mysqld_list_processes(thd, (thd->security_context()->check_access(PROCESS_ACL) ? NullS : thd->security_context()->priv_user().str), lex->verbose);
            break;
        }
        case SQLCOM_SHOW_ENGINE_LOGS: {
            res = ha_show_status(thd, lex->create_info->db_type, HA_ENGINE_LOGS);
            break;
        }
        case SQLCOM_COMMIT: {
            trans_commit(thd)
            thd->mdl_context.release_transactional_locks();
            my_ok(thd);
            break;
        }
        case SQLCOM_ROLLBACK: {}
        case SQLCOM_XA_START:
        case SQLCOM_SELECT:
        case SQLCOM_DO:
        case SQLCOM_DROP_SRS: {
            Enable_derived_merge_guard derived_merge_guard(thd, is_show_cmd_using_system_view(thd));
            res = lex->m_sql_cmd->execute(thd);
            break;
        }
    }
    close_thread_tables(thd);
    binlog_gtid_end_transaction(thd);  // finalize GTID life-cycle
    return res || thd->is_error();
}

三、根据LEX树创建具体的执行者并执行

3.1、重点有两个，一个是unit->optimize(thd, nullptr)根据查询词法转换成具体的执行者；另一个就是unit->execute(thd)执行查询

//sql_select.cc
bool Sql_cmd_dml::execute(THD *thd) {
    lex = thd->lex;
    result = lex->result;
    SELECT_LEX_UNIT *const unit = lex->unit;
    prepare(thd)
    res = execute_inner(thd);
    ->SELECT_LEX_UNIT *unit = lex->unit;
    ->unit->optimize(thd, nullptr)
    ->unit->execute(thd)
}

3.2、先简单分析下unit->optimize(thd, nullptr)，比如下面的代码，如果有查询条件中有索引则创建RefIterator查询B+Tree，如果有非索引查询条件通过FilterIterator处理

bool SELECT_LEX_UNIT::optimize(THD *thd, TABLE *materialize_destination) {
    //遍历平级查询
    for (SELECT_LEX *sl = first_select(); sl; sl = sl->next_select()) {
        thd->lex->set_current_select(sl);
        set_limit(thd, sl)
        //优化子查询
        sl->optimize(thd) //bool SELECT_LEX::optimize(THD *thd)
        ->JOIN *const join_local = new (thd->mem_root) JOIN(thd, this);
        //优化表关联
        ->join->optimize()
        //遍历子查询里面的表关联
        ->for (SELECT_LEX_UNIT *unit = first_inner_unit(); unit; unit = unit->next_unit())
          ->unit->optimize(thd, nullptr))  //优化关联
    }
    
    if (item != nullptr) {
       item->create_iterators(thd);  //item是Item_subselect
       ->indexsubquery_engine->create_iterators(thd);  //indexsubquery_engine是subselect_hash_sj_engine
         ->void subselect_hash_sj_engine::create_iterators(THD *thd)
           ->tab->iterator = NewIterator<RefIterator<false>>(thd, tab->table(), &tab->ref(), false, tab, nullptr);
           ->if (cond != nullptr)
             ->tab->iterator = NewIterator<FilterIterator>(thd, move(tab->iterator), cond);
    }
}

3.3、unit->execute(thd)开始执行查询，所有的SQL命令执行都被封装成了RowIterator对象，比如where索引（RefIterator）、where过滤条件（FilterIterator）、排序（SortingIterator）、表关联（NestedLoopiterator），从m_root_iterator->Read()开始一级一级执行得出最终结果

//sql_union.cc
bool SELECT_LEX_UNIT::execute(THD *thd) {
    Change_current_select save_select(thd);
    return ExecuteIteratorQuery(thd);
}

//sql_union.cc
bool SELECT_LEX_UNIT::ExecuteIteratorQuery(THD *thd) {
     List<Item> *fields = get_field_list(); //查询到的结果
     ->return first_select()->join->fields;  //JOIN::JOIN(THD *thd_arg, SELECT_LEX *select) : fields(&select->fields_list)...
     Query_result *query_result = this->query_result();
     ->return m_query_result;   //sql_lex.h
     query_result->start_execution(thd)
     query_result->send_result_set_metadata(thd, *fields, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)
     set_executed();
     
     ha_rows *send_records_ptr;
     send_records_ptr = &first_select()->join->send_records; //如果是简单查询
     for (;;) {
         int error = m_root_iterator->Read();
         ++*send_records_ptr;
         query_result->send_data(thd, *fields) //遍历行
     }
     thd->current_found_rows = *send_records_ptr;
     return query_result->send_eof(thd); //发送结束
}

四、具体的数据库引擎执行比如Innodb

4.1、Mysql抽象出数据库引擎接口

class handler {
	TABLE *table;                   /* The current open table */
	int ha_open(TABLE *table, const char *name, int mode, int test_if_locked, const dd::Table *table_def);  //打开表
	int ha_close(void);  //关闭表
	int ha_rnd_next(uchar *buf);  //全表扫描取下一条
	int ha_index_init(uint idx, bool sorted);  //初始化索引
	int ha_index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag);  //索引查找数据
	int ha_index_next(uchar *buf);  //范围索引或全索引扫描时查询下一条
	...
}

4.2、Innodb实现

class ha_innobase : public handler {
    
}

int ha_innobase::index_read(uchar *buf, const uchar *key_ptr, uint key_len, enum ha_rkey_function find_flag) {
	if (!m_prebuilt->table->is_intrinsic()) {
		ret = row_search_mvcc(buf, mode, m_prebuilt, match_mode, 0);   //通过多版本控制协议查询
	}else{
    	ret = row_search_no_mvcc(buf, mode, m_prebuilt, match_mode, 0);
    }
}

4.3、row_search_mvcc查询数据，如果是索引查询则从顶至下遍历B+ Tree至叶节点，如果是全表或全索引扫描则叶节点比如从左往右查询，如果在数据在内存中则直接获取page对象，如果在磁盘中则通过page信息定位并加载page

dberr_t row_search_mvcc(byte *buf, page_cur_mode_t mode, row_prebuilt_t *prebuilt, ulint match_mode, const ulint direction) {
    //根据search_tuple放置cursor,从顶至下遍历B+ Tree,得到比如二级索引的record值
	btr_pcur_open_with_no_init(index, search_tuple, mode, BTR_SEARCH_LEAF, pcur,0, &mtr);
	//通过二级索引的record值获取主键索引的record
	err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec, thr, &clust_rec, &offsets, &heap, need_vrow ? &vrow : nullptr, &mtr, prebuilt->get_lob_undo());
}