MYDUMPER源码阅读

林德赐

已于 2022-07-24 22:54:37 修改

阅读量820

点赞数

分类专栏： MYSQL源码文章标签： mysql

于 2022-07-24 12:58:56 首次发布

本文链接：https://blog.csdn.net/lindeci87/article/details/125958121

版权

MYSQL源码专栏收录该内容

8 篇文章 0 订阅

订阅专栏

6、create_job_to_dump_table函数

7、new_table_job函数

8、write_table_data_into_file函数

9、write_row_into_file_in_sql_mode函数分析

1、MYDUMPER的地位

MyDumper是一个数据库的逻辑备份工具,和官方的mysqldump,mysqlpump类似。本人多年接触过的MYSQL DBA同事，生产环境首选逻辑备份工具都是MyDumper。

2、MyDumper怎么实现备份的一致性

mysqldump是单线程，备份一致性原理：

FLUSH TABLES WITH READ LOCK

SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ

START TRANSACTION /!40100 WITH CONSISTENT SNAPSHOT

SHOW MASTER STATUS

UNLOCK TABLES

MyDumper的多线程是表级别，那它是怎么获取备份一致性的？决定通过源码了解它的执行流程。

3、源码地址

GitHub - mydumper/mydumper: Official MyDumper project

4、直接从main函数开始分析

调用经过初始化、获取命令行参数后，调用start_dump函数

int main(int argc, char *argv[]) {
  ……

  if (daemon_mode) {
    run_daemon();
  } else {
    start_dump();
  }

  ……
}

5、start_dump函数分析

5.1 代码注解

void start_dump() {
  MYSQL *conn = create_main_connection();//该连接执行FTWR，对DB造成阻塞，然后获取备份一致性
  MYSQL *second_conn = conn;             //该连接用来获取数据库的其它信息
  ……
  for (n = 0; n < num_threads; n++) {//num_threads对应命令行参数中的threads
    nits[n] = 0;
    nitl[n] = NULL;
  }
  ……
  //对命令行中的参数updated-since的逻辑处理
  if (updated_since > 0) {
    u = g_strdup_printf("%s/not_updated_tables", dump_directory);
    nufile = g_fopen(u, "w");
    if (!nufile) {
      g_critical("Couldn't write not_updated_tables file (%d)", errno);
      exit(EXIT_FAILURE);
    }
    get_not_updated(conn, nufile);  //获取updated-since之前没更新的表
  }
  //对命令行中的参数no-locks的处理逻辑
  if (!no_locks) {
        ……
		long_query_wait(conn);
	}

  //发现TIDB对MYDUMPER进行多处改造，为了兼容TIDB
  ……
  }else{
    //对命令行中的参数no-locks的逻辑处理
    if (!no_locks) {
	  	……
  		if (lock_all_tables) {
        send_lock_all_tables(conn);
      } else {
        //我们比较关心备份的一致性，可以通过这段代码理解流程。在主线程中执行下面两步
        //1、FLUSH TABLES
        //2、FLUSH TABLES WITH READ LOCK
        ……
        if (mysql_query(conn, "FLUSH TABLES")) {
        ……        
        if (mysql_query(conn, "FLUSH TABLES WITH READ LOCK")) {
        ……

  // tokudb 不支持一致性读操作，进行特殊处理

  // Do not start a transaction when lock all tables instead of FTWRL,
  // since it can implicitly release read locks we hold
  // TODO: this should be deleted as main connection is not being used for export data
  // 开启一致性读操作：START TRANSACTION /*!40108 WITH CONSISTENT SNAPSHOT */
  if (!lock_all_tables) {
    mysql_query(conn, "START TRANSACTION /*!40108 WITH CONSISTENT SNAPSHOT */");
  }
  ……
  //对命令行中的参数set-names的逻辑处理
  if (detected_server == SERVER_TYPE_MYSQL) {
    if (set_names_str)
  		mysql_query(conn, set_names_str);
    write_snapshot_info(conn, mdfile);
  }

  //对命令行中的参数stream的逻辑处理
  if (stream){
    initialize_stream();
  }
  //对命令行中的参数exec的逻辑处理
  if (exec_command != NULL){
    initialize_exec_command();
    stream=TRUE;
  
  }
  //重点：开始初始化多线程的参数，less_locking的数据类型是gboolean less_locking = FALSE;
  GThread **threads = g_new(GThread *, num_threads * (less_locking + 1));
  struct thread_data *td =
      g_new(struct thread_data, num_threads * (less_locking + 1));
  //对命令行中的参数less-locking的逻辑处理
  if (less_locking) {
    ……
    for (n = num_threads; n < num_threads * 2; n++) {
      ……
      threads[n] = g_thread_create((GThreadFunc)working_thread,
                                   &td[n], TRUE, NULL);
      ……
  ……
  //创建num_threads个working_thread函数
  for (n = 0; n < num_threads; n++) {
    ……
    threads[n] =
        g_thread_create((GThreadFunc)working_thread, &td[n], TRUE, NULL);
    ……
  }

  // IMPORTANT: At this point, all the threads are in sync
  // 这段很重要，所有子线程处于sync状态
  g_async_queue_unref(conf.ready);
  conf.ready=NULL;
  //对命令行中的参数trx-consistency-only的逻辑处理，
  //获取备份的一致性后，主线程开始UNLOCK TABLES
  if (trx_consistency_only) {
    ……
    mysql_query(conn, "UNLOCK TABLES /* trx-only */");
    ……
      //Releasing binlog lock释放binlog锁
      g_message("Releasing binlog lock");
      release_binlog_function(second_conn);
    ……
  }
  //默认开启all-tablespaces，改参数针对NDB引擎。该SQL涉及视图关联、嵌套查询，如果表数据量很多，则查询不出结果，会发生TIMEOUT。需要在备份命令行的参数中设置all-tablespaces=N
  if (dump_tablespaces){
    create_job_to_dump_tablespaces(conn,&conf);
  }
  ……
  //对命令行中的参数--database、--tables_list进行处理，如果指定，则创建对应的JOB。
  //如果没设置，则查询出非系统库、表
  //子线程跟主线程之间的同步，通过GLib库的异步队列实现
  ……
  if (less_locking) {
    ……
    else {
    for (iter = non_innodb_table; iter != NULL; iter = iter->next) {
      ……
      //创建备份表的job
      create_job_to_dump_table(conn, dbt, &conf, FALSE);
      ……
    }
    ……
  }
  //等待 less locking jobs 完成
  if (less_locking) {
    g_message("Waiting less locking jobs to complete");
    for (n = num_threads; n < num_threads * 2; n++) {
      g_thread_join(threads[n]);
    }
    g_async_queue_unref(conf.queue_less_locking);
    conf.queue_less_locking=NULL;
  }

  if (!no_locks && !trx_consistency_only) {
    ……
    mysql_query(conn, "UNLOCK TABLES /* FTWRL */");
    ……
  }
  //Shutdown所有子线程？
  g_message("Shutdown jobs enqueued");
  for (n = 0; n < num_threads; n++) {
    struct job *j = g_new0(struct job, 1);
    j->type = JOB_SHUTDOWN;
    g_async_queue_push(conf.queue, j);
  }
  //等待所有子线程完成
  g_message("Waiting jobs to complete");
  for (n = 0; n < num_threads; n++) {
    g_thread_join(threads[n]);
  }
  ……
  // close main connection 关闭主线程的连接
  mysql_close(conn);
  ……

  // TODO: We need to create jobs for metadata.
  ……

  if (stream) {
    g_async_queue_push(stream_queue, g_strdup(""));
    if (exec_command!=NULL){
      wait_exec_command_to_finish();
    }else
      wait_stream_to_finish();
    if (no_delete == FALSE && output_directory_param == NULL)
      if (g_rmdir(output_directory) != 0)
        g_critical("Backup directory not removed: %s", output_directory);
  }

  g_free(td);
  g_free(threads);
  if (disk_check_thread!=NULL){
    disk_limits=NULL;
  }
}

5.2 流程图讲解

线程之间通过GLib库的异步队列进行通讯。

现在基本没人用这库，很想替换成C++高版本的线程、信号量。

5.3 审计日志佐证

6、create_job_to_dump_table函数

调用new_table_job进行表备份

void create_job_to_dump_table(MYSQL *conn, struct db_table *dbt,
                struct configuration *conf, gboolean is_innodb) {
……
  if (rows_per_file)
    chunks = get_chunks_for_table(conn, dbt->database->name, dbt->table, conf);

  if (partitions){
    ……

  } else if (chunks) {
    ……
    for (iter = chunks; iter != NULL; iter = iter->next) {
      ……
  } else {
    ……
    tj = new_table_job(dbt, NULL, NULL, 0, get_primary_key_string(conn, dbt->database->name, dbt->table));
    ……
  }
}

7、new_table_job函数

struct table_job * new_table_job(struct db_table *dbt, char *partition, char *where, guint nchunk, char *order_by){
  struct table_job *tj = g_new0(struct table_job, 1);
// begin Refactoring: We should review this, as dbt->database should not be free, so it might be no need to g_strdup.
  // from the ref table?? TODO
  tj->database=dbt->database->name;
  //调用g_strdup进行备份
  tj->table=g_strdup(dbt->table);
// end
  tj->partition=partition;
  tj->where=where;
  tj->order_by=order_by;
  tj->nchunk=nchunk;
//  tj->filename = build_data_filename(dbt->database->filename, dbt->table_filename, tj->nchunk, 0);
  tj->dbt=dbt;
  return tj;
}

8、write_table_data_into_file函数

/* Do actual data chunk reading/writing magic */
guint64 write_table_data_into_file(MYSQL *conn, struct table_job * tj){
  guint64 num_rows = 0;
//  guint64 num_rows_st = 0;
  MYSQL_RES *result = NULL;
  char *query = NULL;

  /* Ghm, not sure if this should be statement_size - but default isn't too big
   * for now */

  /* Poor man's database code */
  query = g_strdup_printf(
      "SELECT %s %s FROM `%s`.`%s` %s %s %s %s %s %s %s",
      (detected_server == SERVER_TYPE_MYSQL) ? "/*!40001 SQL_NO_CACHE */" : "",
      tj->dbt->select_fields->str, tj->database, tj->table, tj->partition?tj->partition:"", (tj->where || where_option ) ? "WHERE" : "",
      tj->where ? tj->where : "",  (tj->where && where_option ) ? "AND" : "", where_option ? where_option : "", tj->order_by ? "ORDER BY" : "",
      tj->order_by ? tj->order_by : "");
  if (mysql_query(conn, query) || !(result = mysql_use_result(conn))) {
    // ERROR 1146
    if (success_on_1146 && mysql_errno(conn) == 1146) {
      g_warning("Error dumping table (%s.%s) data: %s ", tj->database, tj->table,
                mysql_error(conn));
    } else {
      g_critical("Error dumping table (%s.%s) data: %s ", tj->database, tj->table,
                 mysql_error(conn));
      errors++;
    }
    goto cleanup;
  }

  /* Poor man's data dump code */
  if (load_data)
    num_rows = write_row_into_file_in_load_data_mode(conn, result, tj->dbt, tj->nchunk);
  else
    num_rows=write_row_into_file_in_sql_mode(conn, result, tj->dbt, tj->nchunk, tj->where==NULL?1:2);
  if (mysql_errno(conn)) {
    g_critical("Could not read data from %s.%s: %s", tj->database, tj->table,
               mysql_error(conn));
    errors++;
  }


cleanup:
  g_free(query);

  if (result) {
    mysql_free_result(result);
  }

  return num_rows;
}

通过下面的SQL查询出结果。

select xx,xx,xx,……,xx /*!40001 SQL_NO_CACHE */ from 表 WHERE xx and xx order by xx

然后调用函数write_row_into_file_in_sql_mode逐行写入文件

9、write_row_into_file_in_sql_mode函数分析

guint64 write_row_into_file_in_sql_mode(MYSQL *conn, MYSQL_RES *result, struct db_table * dbt, guint nchunk, guint sections){
  ……
  while ((row = mysql_fetch_row(result))) {
    lengths = mysql_fetch_lengths(result);
    num_rows++;
    ……
    //逐行写入文件
    write_row_into_string(conn, dbt, row, fields, lengths, num_fields, escaped, statement_row);

    if (statement->len + statement_row->len + 1 > statement_size) {
      ……
      filesize+=statement->len+1;
      st_in_file++;
      //判断是否需要切换文件
      if (chunk_filesize &&
          (guint)ceil((float)filesize / 1024 / 1024) >
              chunk_filesize) {
        ……
      }
      g_string_set_size(statement, 0);
    } else {
      ……
    }
  }
  ……  
  m_close(sql_file);
  g_mutex_lock(dbt->rows_lock);
  dbt->rows+=num_rows;
  g_mutex_unlock(dbt->rows_lock);
  return num_rows;
}