目录
8、write_table_data_into_file函数
9、write_row_into_file_in_sql_mode函数分析
1、MYDUMPER的地位
MyDumper是一个数据库的逻辑备份工具,和官方的mysqldump,mysqlpump类似。本人多年接触过的MYSQL DBA同事,生产环境首选逻辑备份工具都是MyDumper。
2、MyDumper怎么实现备份的一致性
mysqldump是单线程,备份一致性原理:
FLUSH TABLES WITH READ LOCK
SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ
START TRANSACTION /!40100 WITH CONSISTENT SNAPSHOT
SHOW MASTER STATUS
UNLOCK TABLES
MyDumper的多线程是表级别,那它是怎么获取备份一致性的?决定通过源码了解它的执行流程。
3、源码地址
GitHub - mydumper/mydumper: Official MyDumper project
4、直接从main函数开始分析
调用经过初始化、获取命令行参数后,调用start_dump函数
int main(int argc, char *argv[]) {
……
if (daemon_mode) {
run_daemon();
} else {
start_dump();
}
……
}
5、start_dump函数分析
5.1 代码注解
void start_dump() {
MYSQL *conn = create_main_connection();//该连接执行FTWR,对DB造成阻塞,然后获取备份一致性
MYSQL *second_conn = conn; //该连接用来获取数据库的其它信息
……
for (n = 0; n < num_threads; n++) {//num_threads对应命令行参数中的threads
nits[n] = 0;
nitl[n] = NULL;
}
……
//对命令行中的参数updated-since的逻辑处理
if (updated_since > 0) {
u = g_strdup_printf("%s/not_updated_tables", dump_directory);
nufile = g_fopen(u, "w");
if (!nufile) {
g_critical("Couldn't write not_updated_tables file (%d)", errno);
exit(EXIT_FAILURE);
}
get_not_updated(conn, nufile); //获取updated-since之前没更新的表
}
//对命令行中的参数no-locks的处理逻辑
if (!no_locks) {
……
long_query_wait(conn);
}
//发现TIDB对MYDUMPER进行多处改造,为了兼容TIDB
……
}else{
//对命令行中的参数no-locks的逻辑处理
if (!no_locks) {
……
if (lock_all_tables) {
send_lock_all_tables(conn);
} else {
//我们比较关心备份的一致性,可以通过这段代码理解流程。在主线程中执行下面两步
//1、FLUSH TABLES
//2、FLUSH TABLES WITH READ LOCK
……
if (mysql_query(conn, "FLUSH TABLES")) {
……
if (mysql_query(conn, "FLUSH TABLES WITH READ LOCK")) {
……
// tokudb 不支持一致性读操作,进行特殊处理
// Do not start a transaction when lock all tables instead of FTWRL,
// since it can implicitly release read locks we hold
// TODO: this should be deleted as main connection is not being used for export data
// 开启一致性读操作:START TRANSACTION /*!40108 WITH CONSISTENT SNAPSHOT */
if (!lock_all_tables) {
mysql_query(conn, "START TRANSACTION /*!40108 WITH CONSISTENT SNAPSHOT */");
}
……
//对命令行中的参数set-names的逻辑处理
if (detected_server == SERVER_TYPE_MYSQL) {
if (set_names_str)
mysql_query(conn, set_names_str);
write_snapshot_info(conn, mdfile);
}
//对命令行中的参数stream的逻辑处理
if (stream){
initialize_stream();
}
//对命令行中的参数exec的逻辑处理
if (exec_command != NULL){
initialize_exec_command();
stream=TRUE;
}
//重点:开始初始化多线程的参数,less_locking的数据类型是gboolean less_locking = FALSE;
GThread **threads = g_new(GThread *, num_threads * (less_locking + 1));
struct thread_data *td =
g_new(struct thread_data, num_threads * (less_locking + 1));
//对命令行中的参数less-locking的逻辑处理
if (less_locking) {
……
for (n = num_threads; n < num_threads * 2; n++) {
……
threads[n] = g_thread_create((GThreadFunc)working_thread,
&td[n], TRUE, NULL);
……
……
//创建num_threads个working_thread函数
for (n = 0; n < num_threads; n++) {
……
threads[n] =
g_thread_create((GThreadFunc)working_thread, &td[n], TRUE, NULL);
……
}
// IMPORTANT: At this point, all the threads are in sync
// 这段很重要,所有子线程处于sync状态
g_async_queue_unref(conf.ready);
conf.ready=NULL;
//对命令行中的参数trx-consistency-only的逻辑处理,
//获取备份的一致性后,主线程开始UNLOCK TABLES
if (trx_consistency_only) {
……
mysql_query(conn, "UNLOCK TABLES /* trx-only */");
……
//Releasing binlog lock释放binlog锁
g_message("Releasing binlog lock");
release_binlog_function(second_conn);
……
}
//默认开启all-tablespaces,改参数针对NDB引擎。该SQL涉及视图关联、嵌套查询,如果表数据量很多,则查询不出结果,会发生TIMEOUT。需要在备份命令行的参数中设置all-tablespaces=N
if (dump_tablespaces){
create_job_to_dump_tablespaces(conn,&conf);
}
……
//对命令行中的参数--database、--tables_list进行处理,如果指定,则创建对应的JOB。
//如果没设置,则查询出非系统库、表
//子线程跟主线程之间的同步,通过GLib库的异步队列实现
……
if (less_locking) {
……
else {
for (iter = non_innodb_table; iter != NULL; iter = iter->next) {
……
//创建备份表的job
create_job_to_dump_table(conn, dbt, &conf, FALSE);
……
}
……
}
//等待 less locking jobs 完成
if (less_locking) {
g_message("Waiting less locking jobs to complete");
for (n = num_threads; n < num_threads * 2; n++) {
g_thread_join(threads[n]);
}
g_async_queue_unref(conf.queue_less_locking);
conf.queue_less_locking=NULL;
}
if (!no_locks && !trx_consistency_only) {
……
mysql_query(conn, "UNLOCK TABLES /* FTWRL */");
……
}
//Shutdown所有子线程?
g_message("Shutdown jobs enqueued");
for (n = 0; n < num_threads; n++) {
struct job *j = g_new0(struct job, 1);
j->type = JOB_SHUTDOWN;
g_async_queue_push(conf.queue, j);
}
//等待所有子线程完成
g_message("Waiting jobs to complete");
for (n = 0; n < num_threads; n++) {
g_thread_join(threads[n]);
}
……
// close main connection 关闭主线程的连接
mysql_close(conn);
……
// TODO: We need to create jobs for metadata.
……
if (stream) {
g_async_queue_push(stream_queue, g_strdup(""));
if (exec_command!=NULL){
wait_exec_command_to_finish();
}else
wait_stream_to_finish();
if (no_delete == FALSE && output_directory_param == NULL)
if (g_rmdir(output_directory) != 0)
g_critical("Backup directory not removed: %s", output_directory);
}
g_free(td);
g_free(threads);
if (disk_check_thread!=NULL){
disk_limits=NULL;
}
}
5.2 流程图讲解
线程之间通过GLib库的异步队列进行通讯。
现在基本没人用这库,很想替换成C++高版本的线程、信号量。
5.3 审计日志佐证
6、create_job_to_dump_table函数
调用new_table_job进行表备份
void create_job_to_dump_table(MYSQL *conn, struct db_table *dbt,
struct configuration *conf, gboolean is_innodb) {
……
if (rows_per_file)
chunks = get_chunks_for_table(conn, dbt->database->name, dbt->table, conf);
if (partitions){
……
} else if (chunks) {
……
for (iter = chunks; iter != NULL; iter = iter->next) {
……
} else {
……
tj = new_table_job(dbt, NULL, NULL, 0, get_primary_key_string(conn, dbt->database->name, dbt->table));
……
}
}
7、new_table_job函数
struct table_job * new_table_job(struct db_table *dbt, char *partition, char *where, guint nchunk, char *order_by){
struct table_job *tj = g_new0(struct table_job, 1);
// begin Refactoring: We should review this, as dbt->database should not be free, so it might be no need to g_strdup.
// from the ref table?? TODO
tj->database=dbt->database->name;
//调用g_strdup进行备份
tj->table=g_strdup(dbt->table);
// end
tj->partition=partition;
tj->where=where;
tj->order_by=order_by;
tj->nchunk=nchunk;
// tj->filename = build_data_filename(dbt->database->filename, dbt->table_filename, tj->nchunk, 0);
tj->dbt=dbt;
return tj;
}
8、write_table_data_into_file函数
/* Do actual data chunk reading/writing magic */
guint64 write_table_data_into_file(MYSQL *conn, struct table_job * tj){
guint64 num_rows = 0;
// guint64 num_rows_st = 0;
MYSQL_RES *result = NULL;
char *query = NULL;
/* Ghm, not sure if this should be statement_size - but default isn't too big
* for now */
/* Poor man's database code */
query = g_strdup_printf(
"SELECT %s %s FROM `%s`.`%s` %s %s %s %s %s %s %s",
(detected_server == SERVER_TYPE_MYSQL) ? "/*!40001 SQL_NO_CACHE */" : "",
tj->dbt->select_fields->str, tj->database, tj->table, tj->partition?tj->partition:"", (tj->where || where_option ) ? "WHERE" : "",
tj->where ? tj->where : "", (tj->where && where_option ) ? "AND" : "", where_option ? where_option : "", tj->order_by ? "ORDER BY" : "",
tj->order_by ? tj->order_by : "");
if (mysql_query(conn, query) || !(result = mysql_use_result(conn))) {
// ERROR 1146
if (success_on_1146 && mysql_errno(conn) == 1146) {
g_warning("Error dumping table (%s.%s) data: %s ", tj->database, tj->table,
mysql_error(conn));
} else {
g_critical("Error dumping table (%s.%s) data: %s ", tj->database, tj->table,
mysql_error(conn));
errors++;
}
goto cleanup;
}
/* Poor man's data dump code */
if (load_data)
num_rows = write_row_into_file_in_load_data_mode(conn, result, tj->dbt, tj->nchunk);
else
num_rows=write_row_into_file_in_sql_mode(conn, result, tj->dbt, tj->nchunk, tj->where==NULL?1:2);
if (mysql_errno(conn)) {
g_critical("Could not read data from %s.%s: %s", tj->database, tj->table,
mysql_error(conn));
errors++;
}
cleanup:
g_free(query);
if (result) {
mysql_free_result(result);
}
return num_rows;
}
通过下面的SQL查询出结果。
select xx,xx,xx,……,xx /*!40001 SQL_NO_CACHE */ from 表 WHERE xx and xx order by xx
然后调用函数write_row_into_file_in_sql_mode逐行写入文件
9、write_row_into_file_in_sql_mode函数分析
guint64 write_row_into_file_in_sql_mode(MYSQL *conn, MYSQL_RES *result, struct db_table * dbt, guint nchunk, guint sections){
……
while ((row = mysql_fetch_row(result))) {
lengths = mysql_fetch_lengths(result);
num_rows++;
……
//逐行写入文件
write_row_into_string(conn, dbt, row, fields, lengths, num_fields, escaped, statement_row);
if (statement->len + statement_row->len + 1 > statement_size) {
……
filesize+=statement->len+1;
st_in_file++;
//判断是否需要切换文件
if (chunk_filesize &&
(guint)ceil((float)filesize / 1024 / 1024) >
chunk_filesize) {
……
}
g_string_set_size(statement, 0);
} else {
……
}
}
……
m_close(sql_file);
g_mutex_lock(dbt->rows_lock);
dbt->rows+=num_rows;
g_mutex_unlock(dbt->rows_lock);
return num_rows;
}