背景介绍
项目中有一张请求日志表,由于前期没有做好分表策略,数据量增长到了500w查询超时才发现问题,准备采用mysql分表处理
需求分析
- 请求日志表有统计报表需求
- 请求日志月增量大概在50w左右
- 原表结构
CREATE TABLE `request_log` (
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id',
`req_id` int(11) NOT NULL COMMENT '请求方id',
`res_id` int(11) NOT NULL COMMENT '资源id',
`time_spend` bigint(20) NOT NULL COMMENT '响应时长(ms)',
`req_status` bit(1) NOT NULL COMMENT '请求结果',
`req_time` datetime NOT NULL COMMENT '请求时间',
PRIMARY KEY (`id`) USING BTREE,
KEY `idx1` (`req_id`) USING BTREE,
KEY `idx2` (`res_id`) USING BTREE,
KEY `idx3` (`req_time`) USING BTREE
) ENGINE=Innodb DEFAULT CHARSET=utf8 COMMENT='请求日志表';
思路
- 考虑到报表需求,需要对每个分表做统计,设计一张统计表汇总每个月的请求情况
- 考虑到月增量不大,采用按月分表的形式进行分表
- 为了不大量修改项目代码, 采用mysql 的merge table方式进行分表的数据合并
设计
分表
按照原表的 req_time,采用按月分表的方式,将原表切分成多个子表,并从原表中将对应月份的数据导入到子表中,SQL脚本如下
由于本项目中的子表不多,这里采用人工分表进行,也可以编写一个存储过程批量生成
CREATE TABLE IF NOT EXISTS `request_log_202101` (
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id',
`req_id` int(11) NOT NULL COMMENT '请求方id',
`res_id` int(11) NOT NULL COMMENT '资源id',
`time_spend` bigint(20) NOT NULL COMMENT '响应时长(ms)',
`req_status` bit(1) NOT NULL COMMENT '请求结果',
`req_time` datetime NOT NULL COMMENT '请求时间',
PRIMARY KEY (`id`) USING BTREE,
KEY `idx1` (`req_id`) USING BTREE,
KEY `idx2` (`res_id`) USING BTREE,
KEY `idx3` (`req_time`) USING BTREE
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='请求日志表_202101';
insert into request_log_202101 select * from request_log where req_time BETWEEN '2021-01-01 00:00:00' and '2021-01-31 23:59:59';
重命名原表
rename table request_log to old_request_log;
合并表
这里只是UNION 了
request_log_202101
子表,后面通过存储过程更新合并表
CREATE TABLE IF NOT EXISTS `request_log` (
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id',
`req_id` int(11) NOT NULL COMMENT '请求方id',
`res_id` int(11) NOT NULL COMMENT '资源id',
`time_spend` bigint(20) NOT NULL COMMENT '响应时长(ms)',
`req_status` bit(1) NOT NULL COMMENT '请求结果',
`req_time` datetime NOT NULL COMMENT '请求时间',
INDEX (`id`),
KEY `idx1` (`req_id`) USING BTREE,
KEY `idx2` (`res_id`) USING BTREE,
KEY `idx3` (`req_time`) USING BTREE
) ENGINE=MERGE DEFAULT CHARSET=utf8 UNION(request_log_202101) insert_method=last COMMENT='请求日志表_202101' AUTO_INCREMENT 1;
创建分表存储过程
存储过程按当前月份创建分表,并更新 request_log 的 union 属性,将所有分表逻辑关联起来
DROP PROCEDURE `req_log_split`;
CREATE PROCEDURE `req_log_split`()
BEGIN
# 请求日志分表
DECLARE dt CHAR(6);
DECLARE tableName CHAR(27);
DECLARE merges VARCHAR(4000);
SET dt = DATE_FORMAT(now(),'%Y%m');
SET tableName = CONCAT('request_log_', dt);
SET @createDdl = concat('CREATE TABLE IF NOT EXISTS ', tableName, '(
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT \'id\',
`req_id` int(11) NOT NULL COMMENT \'请求方id\',
`res_id` int(11) NOT NULL COMMENT \'资源id\',
`time_spend` bigint(20) NOT NULL COMMENT \'响应时长(ms)\',
`req_status` bit(1) NOT NULL COMMENT \'请求结果\',
`req_time` datetime NOT NULL COMMENT \'请求时间\',
PRIMARY KEY (`id`) USING BTREE,
KEY `idx1` (`req_id`) USING BTREE,
KEY `idx2` (`res_id`) USING BTREE,
KEY `idx3` (`req_time`) USING BTREE
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT=\'请求日志_',dt,'\';');
PREPARE createStmt FROM @createDdl;
EXECUTE createStmt;
select group_concat(table_name) into merges from information_schema.`TABLES` where table_type='BASE TABLE' and table_schema=DATABASE() and table_name like 'request_log_20%' order by table_name;
SET @alterSql = concat('ALTER TABLE request_log UNION = (', merges,')');
PREPARE alterStmt FROM @alterSql;
EXECUTE alterStmt;
commit;
END;
执行存储过程
更新合并表的union
call req_log_split();
创建定时任务
每天凌晨1点更新合并表的 union
CREATE EVENT `req_log_split_event` ON SCHEDULE EVERY 1 DAY STARTS '2021-05-18 01:00:00' ON COMPLETION PRESERVE ENABLE DO call req_log_split();
请求日志统计
后续只需要将所有的统计信息汇总到 request_log_cnt,项目中修改设计统计的部分sql即可
CREATE TABLE `request_log_cnt` (
`req_id` int(11) NOT NULL COMMENT '请求方id',
`res_id` int(11) NOT NULL COMMENT '资源id',
`dt` char(6) NOT NULL COMMENT '分表标识',
`total_cnt` int(11) NOT NULL DEFAULT 0 COMMENT '总请求次数',
`success_cnt` int(11) NOT NULL DEFAULT 0 COMMENT '请求成功次数',
`failure_cnt` int(11) NOT NULL DEFAULT 0 COMMENT '请求失败次数',
`avg_spend` int(11) NOT NULL DEFAULT 0 COMMENT '平均响应时长(ms)',
PRIMARY KEY (`req_id`,`res_id`,`dt`) USING BTREE,
KEY idx(`req_id`) USING BTREE,
KEY idx2(`res_id`) USING BTREE
) ENGINE=INNODB DEFAULT CHARSET=utf8 COMMENT='请求日志统计表';
-- 初始化
insert into request_log_cnt
SELECT
req_id,
res_id,
DATE_FORMAT(req_time, '%Y%m') as dt,
count(1) total_cnt,
sum(case when req_status=1 then 1 else 0 end) success_cnt,
sum(case when req_status!=1 then 1 else 0 end) failure_cnt,
avg(time_spend) avg_spend
FROM
request_log
GROUP BY
req_id,
res_id,
DATE_FORMAT(req_time,'%Y%m')
请求统计表定时更新
DROP PROCEDURE `refresh_req_log_cnt`;
CREATE PROCEDURE `refresh_req_log_cnt`()
BEGIN
replace into request_log_cnt
SELECT
req_id,
res_id,
DATE_FORMAT(req_time, '%Y%m') as dt,
count(1) total_cnt,
sum(case when req_status=1 then 1 else 0 end) success_cnt,
sum(case when req_status!=1 then 1 else 0 end) failure_cnt,
avg(time_spend) avg_spend
FROM
request_log
GROUP BY
req_id,
res_id,
DATE_FORMAT(req_time,'%Y%m');
commit;
END;
-- 每天凌晨01:10 更新统计表
CREATE EVENT `refresh_req_log_cnt_event` ON SCHEDULE EVERY 1 DAY STARTS '2021-05-18 01:10:00' ON COMPLETION PRESERVE ENABLE DO call refresh_req_log_cnt();