MySQL 千万级数据迁移

最新推荐文章于 2024-05-14 22:01:15 发布

jerry-89

最新推荐文章于 2024-05-14 22:01:15 发布

阅读量619

点赞数

分类专栏： MYSQL主从文章标签： mysql 数据库 sql

本文链接：https://blog.csdn.net/eagle89/article/details/128550158

版权

MYSQL主从专栏收录该内容

154 篇文章 18 订阅

订阅专栏

背景介绍

项目中表数据存在超过50%的逻辑删除数据，影响查询性能，本次计划将逻辑有效的数据迁移到新表，使得表数据量减少，索引文件减少，提高查询效率。
大体思路

通过存储过程实现表数据迁移，迁移过程中涉及动态删除索引、动态创建索引等操作，导入时通过分页的方式来导入逻辑有效数据。

    计算表数据量大小
    创建备份表（只保留表结构）
    删除备份表索引（保留主键索引）
    导入有效数据到备份表
    给备份表创建索引（索引与原表一致）
        这里使用视图先准备好需要创建的索引数据
        再通过存储过程从视图中读取出数据继续创建
    重命名备份表，重命名新表

步骤详情

Step-1：计算表数据量大小

    CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_01_CountUsefulData`(
       in schemaName varchar(50), -- 输入参数，schema名称
       in tableName varchar(50), -- 输入参数，表名称
       in pageSize INT, -- 输入参数，每页大小
       OUT totalPages INT, -- 输出参数，总页数
       OUT minId INT, -- 输出参数，最新id
       OUT maxId INT -- 输出参数，最大id
       )
    BEGIN
       SET @begin_date = now();
       SET @pageSize = pageSize;

       SET @stmt = CONCAT('SELECT count(1), min(id), max(id) FROM ', tableName, ' where is_deleted = false into @totalRecords, @minId, @maxId');
       prepare stmt from @stmt;
       execute stmt;
       deallocate prepare stmt;

       SET totalPages = CEILING(@totalRecords/pageSize);
       SET minId = @minId;
       SET maxId = @maxId;
       SET @ct = TIMESTAMPDIFF(SECOND, @begin_date, now());
       select '计算表数据量' as '任务名称', tableName as '表名', @totalRecords as '总条数', @pageSize as '每页大小', @ct as '用时/秒';
    END

Step-2：创建备份表（只保留表结构）

    CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_02_CreateBakTable`(
       in schemaName varchar(50), -- 输入参数，schema名称
       in originalTableName varchar(50), -- 输入参数，原表名称
       in newTableName varchar(50) -- 输入参数，新表名称
    )
    BEGIN
        set @stmt = CONCAT('create table ', newTableName, ' like ', originalTableName);
       select '创建备份表' as '任务名称', newTableName as '备份表', originalTableName as '原表', @stmt as '备份SQL';

       prepare stmt from @stmt;
       execute stmt;
       deallocate prepare stmt;
    END

Step-3：删除备份表索引（保留主键索引）

根据传入的schema名称和表名称动态删除索引（保留主键索引不删）

    CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_03_RemoveTableIndex`(
       in tableSchema varchar(50), -- 输入参数
       in newTableName varchar(50) -- 输入参数
    )
    BEGIN
       select '删除表索引开始' as '任务名称', tableSchema as 'TableSchema', newTableName as '表名';
       SET @begin_time = now();

       -- 查询表索引，并拼接成drop语句
       set @stmt_idx = CONCAT("SELECT GROUP_CONCAT(concat('drop index ',INDEX_NAME)) as idx FROM ",
           "(select DISTINCT(INDEX_NAME) FROM INFORMATION_SCHEMA.STATISTICS T WHERE T.TABLE_SCHEMA = '", tableSchema, "'"
           " AND T.TABLE_NAME = '", newTableName, "' AND T.INDEX_NAME <> 'PRIMARY') t1 into @dropsql");

       prepare stmt from @stmt_idx;
       execute stmt;
       deallocate prepare stmt;

       IF @dropsql is not NULL THEN
           -- 拼接删除语句
           set @stmt = CONCAT('ALTER TABLE ', newTableName, ' ', @dropsql, ';');
           select '删除表索引' as '任务名称', @stmt as '删除SQL';

           prepare stmt from @stmt;
           execute stmt;
           deallocate prepare stmt;
       ELSE
           select '删除表索引失败' as '任务名称', newTableName as '该表没有索引，无需删除';
       END IF;

       SET @cost = TIMESTAMPDIFF(SECOND, @begin_time, now());
       select '删除表索引结束' as '任务名称', newTableName as '表名', @cost as '耗时(秒)';
    END

Step-4：导入逻辑有效数据到备份表

    CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_04_ImportTableData`(
       in tableSchema VARCHAR(50),
       in originalTableName VARCHAR(50),
       in newTableName VARCHAR(50),
       in pageSize int,
       in minId int,
       in maxId int
    )
    BEGIN
       DECLARE fromId int;
       SELECT '导入数据开始' as '任务名称', tableSchema 'Schema Name', originalTableName '原表名', newTableName as '新表名', pageSize as '每页数据大小', minId as '最小id', maxId as '最大Id';
       SET @pageSize = pageSize;
       SET @newTableName = newTableName;
       SET @originalTableName = originalTableName;
       SET @minId = minId;
       SET @maxId = maxId;
       set @fromId = @minId;

       SET @query_begin_date = now();
       WHILE @minId <= @maxId DO

           SET @stmt = CONCAT('INSERT INTO ', @newTableName, ' SELECT * FROM ', @originalTableName, ' where is_deleted = false and id BETWEEN ? and ?;');

           SET @minId = @minId;
           SET @nextId = @minId + @pageSize;

           -- select @stmt as '备份SQL', @minId as 'MinId', @maxId as 'MaxId';
           -- 开启事务，执行一批数据后就提交事务
           BEGIN
           START TRANSACTION;
               prepare stmt from @stmt;
               execute stmt USING @minId, @nextId;
               deallocate prepare stmt;
           COMMIT;
           END;

           -- select @stmt as '备份SQL', @minId as 'MinId', @nextId as 'NextId', @ct as '导入数据用时/秒';
           SET @minId = @nextId + 1;

       END WHILE;
       SET @ct = TIMESTAMPDIFF(SECOND, @query_begin_date, now());
       select '导入数据结束' as '任务名称', @ct as '耗时(秒)';

    END

Step-5：给备份表创建索引（索引与原表一致）

包括唯一索引、联合索引的创建

    CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_05_CreateTableIndex`(
       in schemaName VARCHAR(50),
       in originalTableName varchar(50),
       in newTableName varchar(50)
    )
    BEGIN

       select '创建索引开始' as '任务名称', newTableName as '表名';
       SET @begin_time = now();

       -- 根据schemaName, tableName 查询表索引信息，并创建视图（动态游标的处理方案）
       call Step_05_01_CreateTableIndex(schemaName, originalTableName);

       -- 从上一步的视图中读取数据
       call Step_05_02_CreateTableIndex(schemaName, newTableName);

       SET @cost = TIMESTAMPDIFF(SECOND, @begin_time, now());
       select '创建索引结束' as '任务名称', newTableName as '表名', @cost as '耗时(秒)';

    END

    CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_05_01_CreateTableIndex`(
       in tableSchema varchar(50),
       in newTableName varchar(50)
    )
    BEGIN
      DROP VIEW IF EXISTS tmp_table_idx_view;
       SET @sqlstr = "CREATE VIEW tmp_table_idx_view as ";
       SET @sqlstr = CONCAT(@sqlstr , "SELECT non_unique, index_name, seq_in_index, column_name, index_type, index_comment FROM INFORMATION_SCHEMA.STATISTICS t WHERE t.TABLE_SCHEMA = '",
                                   tableSchema, "' AND t.TABLE_NAME ='", newTableName, "' AND t.INDEX_NAME <> 'PRIMARY'");

       PREPARE stmt FROM @sqlstr;
       EXECUTE stmt;
       DEALLOCATE PREPARE stmt;
    END

重点：索引结构动态SQL的拼接如下：

    CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_05_02_CreateTableIndex`(
       in tableSchema varchar(50),
       in tableName varchar(50)
    )
    BEGIN
       DECLARE is_finished int;
       DECLARE non_unique int;        -- 非主键索引 0:是主键索引 1:非主键索引
       DECLARE idxname VARCHAR(50);   -- 索引名称
       DECLARE idxseq int;               -- 索引序号
       DECLARE clmname VARCHAR(50);   -- 索引列名称
       DECLARE idxtype VARCHAR(50);   -- 索引类型
       DECLARE idxcmt VARCHAR(50);       -- 索引备注
       DECLARE pre_sql VARCHAR(500) DEFAULT 'ADD $unique INDEX `$idxname`($idxcln) USING $idxtype $cmt';
       DECLARE result_sql VARCHAR(3500) DEFAULT '';

       DECLARE cursor_query CURSOR FOR SELECT * FROM tmp_table_idx_view; -- 从视图中读取数据
       DECLARE CONTINUE HANDLER FOR NOT FOUND SET is_finished = 1;

       SET @begin_time = now();
       select '创建表索引step2开始' as '任务名称', tableSchema as 'TableSchema', tableName as '表名';

       OPEN cursor_query;
       get_idx: LOOP

           FETCH cursor_query into non_unique, idxname, idxseq, clmname, idxtype, idxcmt;
           IF is_finished=1 THEN
               LEAVE get_idx;
           END IF;

           if idxseq = 1 then
               -- 新创建
               set result_sql = CONCAT(result_sql, ',', pre_sql);
               set result_sql = replace(result_sql,'$clmns', '');
               set result_sql = replace(result_sql, '$idxname', idxname);
               set result_sql = replace(result_sql, '$idxcln', CONCAT('`', clmname, '`', '$clmns'));
               set result_sql = replace(result_sql, '$idxtype', idxtype);

               if non_unique = 0 THEN
                   set result_sql = replace(result_sql, '$unique', 'UNIQUE');
               else
                   set result_sql = replace(result_sql, '$unique', '');
               end if;

               if idxcmt is NULL THEN
                   set result_sql = replace(result_sql, '$cmt', '');
               else
                   set result_sql = replace(result_sql, '$cmt', CONCAT(' COMMENT \'', idxcmt, '\''));
               end if;

           elseif idxseq > 1 then
               -- 替换
               set result_sql = replace(result_sql, '$clmns', CONCAT(',`', clmname, '`', '$clmns'));
           end if;

       END LOOP get_idx;
       CLOSE cursor_query;

       set result_sql = replace(result_sql , '$clmns', '');
       set result_sql = replace(result_sql, '$idxcln', '');
       set result_sql = CONCAT(replace(result_sql ,'$clmns',''), ';');

       set result_sql = right(result_sql, length(result_sql) - 1);
       set result_sql = CONCAT('ALTER TABLE `', tableName,'` ', result_sql);

       select '创建表索引step2中...' as '任务名称', result_sql as 'SQL';

       set @stmt = result_sql;
       PREPARE stmt FROM @stmt;
       EXECUTE stmt;
       DEALLOCATE PREPARE stmt;
       SET @cost = TIMESTAMPDIFF(SECOND, @begin_time, now());
       select '创建表索引step2结束' as '任务名称', result_sql as 'SQL', @cost as '耗时(秒)';

       set @stmt = NULL;
       set result_sql = NULL;
       DROP VIEW IF EXISTS tmp_table_idx_view;

    END

Step-6：重命名备份表，重命名新表

    CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_06_RenameTableName`(
      in schemaName VARCHAR(50),
      in originalTableName VARCHAR(50),
      in newTableName VARCHAR(50)
    )
    BEGIN
      /*
      原表名: t_user
      备份表: bak_t_user_年月日时分秒
      重命名后:
        旧表: t_user --> bak_bak_t_user_年月日时分秒
        新表: bak_t_user_年月日时分秒 --> t_user
      */

      SET @begin_time = now();
      select '重命名表名开始' as '任务名称', schemaName as 'SchemaName', newTableName as '表名称', originalTableName as '原表名称';

      set @stmt = CONCAT('RENAME TABLE ', originalTableName, ' to bak_', newTableName, ', ', newTableName, ' to ', originalTableName);

      prepare stmt from @stmt;
      execute stmt;
      deallocate prepare stmt;

      SET @cost = TIMESTAMPDIFF(SECOND, @begin_time, now());
      select '重命名表名结束' as '任务名称', @stmt as '重命名表名SQL语句', @cost as '耗时(秒)';
    END