mysql 分区全自动维护(维护天和月)
一、新建数据库和存储过程
1.1 新建分区记录表
-- auto-generated definition
create table t_partition_maintenance_logs
(
id int unsigned auto_increment
primary key,
schema_name varchar(20) default '' null comment '数据库名',
table_name varchar(50) default '' null comment '表名',
partition_name varchar(20) default '' null comment '分区名',
partition_time int unsigned default 0 null comment '分区时间',
sql_text varchar(512) default '' null comment 'sql脚本',
status tinyint default 0 null comment 'sql脚本执行状态,0失败1成功',
create_at int unsigned default 0 null comment 'sql脚本执行时间'
)
comment '分区表统一维护日志表' charset = utf8mb4;
1.2 函数主要分割 xxxxx,zzzzz 取 xxxxx 和 zzzzz
create
definer = root@localhost function Func_split_val(f_string text, f_delimiter varchar(5), f_order int) returns varchar(255)
deterministic
BEGIN
declare result varchar(255) default '';
set result = reverse(substring_index(reverse(substring_index(f_string,f_delimiter,f_order)),f_delimiter,1));
return result;
END;
1.3 自动维护存储过程
-
如果是天,维护15个分区,如果是月的维护三个分区
-
天的分区名字:p20230102
-
月的分区名字:m202307
create
definer = root@`%` procedure Proc_Partition_maintenance()
BEGIN
-- 按天分区表最少空余15个分区
DECLARE v_day_least_par_num INT DEFAULT 15;
-- 按月分区表最少空余3个分区
DECLARE v_month_least_par_num INT DEFAULT 3;
-- 分区表个数
DECLARE v_partition_table_num INT DEFAULT 0;
DECLARE v_i INT DEFAULT 1;
DECLARE v_schema_name VARCHAR(20) DEFAULT '';
DECLARE v_table_name VARCHAR(50) DEFAULT '';
DECLARE v_column VARCHAR(20) DEFAULT '';
DECLARE v_time VARCHAR(30) DEFAULT '';
DECLARE v_time_first INT DEFAULT 0;
DECLARE v_time_second INT DEFAULT 0;
DECLARE v_type VARCHAR(10) DEFAULT '';
DECLARE v_diff_days INT DEFAULT 0;
DECLARE v_diff_partitions INT DEFAULT 0;
DECLARE v_partition_name VARCHAR(50) DEFAULT '';
DECLARE v_partition_time INT DEFAULT 0;
DECLARE v_sql_status TINYINT DEFAULT 0;
-- 查询所有库中RANGE分区的表,存入临时表
DROP TEMPORARY TABLE IF EXISTS tmp_partition_table;
CREATE TEMPORARY TABLE tmp_partition_table(id INT NOT NULL AUTO_INCREMENT, schema_name VARCHAR(20), table_name VARCHAR(50), PRIMARY KEY (id));
# 插入查询分区表的SQL
INSERT INTO tmp_partition_table(schema_name, table_name)
SELECT DISTINCT TABLE_SCHEMA, TABLE_NAME FROM information_schema.PARTITIONS
WHERE PARTITION_NAME IS NOT NULL AND PARTITION_ORDINAL_POSITION > 0 AND PARTITION_METHOD = 'RANGE';
SELECT * FROM tmp_partition_table;
SELECT COUNT(1) INTO v_partition_table_num FROM tmp_partition_table;
WHILE(v_i <= v_partition_table_num) DO
SELECT schema_name, table_name INTO v_schema_name, v_table_name FROM tmp_partition_table WHERE id = v_i;
SELECT M.PARTITION_EXPRESSION, GROUP_CONCAT(M.PARTITION_DESCRIPTION) INTO v_column, v_time FROM (
SELECT PARTITION_EXPRESSION, PARTITION_DESCRIPTION FROM information_schema.`PARTITIONS`
WHERE TABLE_SCHEMA = v_schema_name AND TABLE_NAME = v_table_name AND PARTITION_METHOD = 'RANGE'
AND PARTITION_NAME IS NOT NULL AND PARTITION_ORDINAL_POSITION > 0 ORDER BY PARTITION_ORDINAL_POSITION DESC LIMIT 2
) M GROUP BY M.PARTITION_EXPRESSION;
SELECT crawler.Func_split_val(v_time,',',1) INTO v_time_first;
SELECT crawler.Func_split_val(v_time,',',2) INTO v_time_second;
IF(v_time_first > 0 AND v_time_second > 0) THEN
SET v_diff_days = DATEDIFF(FROM_UNIXTIME(v_time_first,'%Y-%m-%d'),FROM_UNIXTIME(v_time_second,'%Y-%m-%d'));
IF(v_diff_days = 1) THEN
SELECT COUNT(1) INTO v_diff_partitions FROM information_schema.`PARTITIONS`
WHERE TABLE_SCHEMA = v_schema_name AND TABLE_NAME = v_table_name AND PARTITION_METHOD = 'RANGE'
AND PARTITION_DESCRIPTION >= UNIX_TIMESTAMP(CURRENT_DATE())
AND PARTITION_NAME IS NOT NULL AND PARTITION_ORDINAL_POSITION > 0;
IF(v_diff_partitions < v_day_least_par_num) THEN
SET v_partition_time = UNIX_TIMESTAMP(DATE_ADD(FROM_UNIXTIME(v_time_first,'%Y-%m-%d'),INTERVAL 1 DAY));
SET v_partition_name = CONCAT('p',DATE_FORMAT(DATE_ADD(FROM_UNIXTIME(v_partition_time),INTERVAL -1 DAY),'%Y%m%d'));
IF EXISTS(SELECT 1 FROM information_schema.`PARTITIONS` WHERE TABLE_SCHEMA = v_schema_name AND TABLE_NAME = v_table_name AND PARTITION_NAME = v_partition_name) THEN
SET v_partition_name = CONCAT('d',DATE_FORMAT(DATE_ADD(FROM_UNIXTIME(v_partition_time),INTERVAL -1 MONTH),'%Y%m'));
END IF;
SET @v_sql=CONCAT('ALTER TABLE ',v_schema_name,'.',v_table_name,' ADD PARTITION (PARTITION ',v_partition_name,' VALUES LESS THAN (',v_partition_time,') ENGINE = InnoDB);');
-- SELECT CAST(@v_sql AS CHAR(10000) CHARACTER SET utf8);
PREPARE stmt FROM @v_sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt; -- 释放掉
IF EXISTS(SELECT 1 FROM information_schema.`PARTITIONS` WHERE TABLE_SCHEMA = v_schema_name AND TABLE_NAME = v_table_name AND PARTITION_NAME = v_partition_name) THEN
SET v_sql_status = 1;
END IF;
INSERT INTO t_partition_maintenance_logs(schema_name, table_name, partition_name, partition_time, sql_text, `status`, create_at)
VALUES(v_schema_name, v_table_name, v_partition_name, v_partition_time, @v_sql, v_sql_status, UNIX_TIMESTAMP());
END IF;
-- 按天分区
ELSEIF(v_diff_days >= 28 AND v_diff_days <= 31) THEN
-- 按月分区
SELECT COUNT(1) INTO v_diff_partitions FROM information_schema.`PARTITIONS`
WHERE TABLE_SCHEMA = v_schema_name AND TABLE_NAME = v_table_name AND PARTITION_METHOD = 'RANGE'
AND PARTITION_DESCRIPTION >= UNIX_TIMESTAMP(DATE_ADD(LAST_DAY(NOW()),INTERVAL 1 DAY))
AND PARTITION_NAME IS NOT NULL AND PARTITION_ORDINAL_POSITION > 0;
IF(v_diff_partitions < v_month_least_par_num) THEN
SET v_partition_time = UNIX_TIMESTAMP(DATE_ADD(FROM_UNIXTIME(v_time_first,'%Y-%m-01'),INTERVAL 1 MONTH));
SET v_partition_name = CONCAT('p',DATE_FORMAT(DATE_ADD(FROM_UNIXTIME(v_partition_time),INTERVAL -1 MONTH),'%Y%m'));
IF EXISTS(SELECT 1 FROM information_schema.`PARTITIONS` WHERE TABLE_SCHEMA = v_schema_name AND TABLE_NAME = v_table_name AND PARTITION_NAME = v_partition_name) THEN
SET v_partition_name = CONCAT('m',DATE_FORMAT(DATE_ADD(FROM_UNIXTIME(v_partition_time),INTERVAL -1 MONTH),'%Y%m'));
END IF;
SET @v_sql=CONCAT('ALTER TABLE ',v_schema_name,'.',v_table_name,' ADD PARTITION (PARTITION ',v_partition_name,' VALUES LESS THAN (',v_partition_time,') ENGINE = InnoDB);');
-- SELECT CAST(@v_sql AS CHAR(10000) CHARACTER SET utf8);
PREPARE stmt FROM @v_sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt; -- 释放掉
IF EXISTS(SELECT 1 FROM information_schema.`PARTITIONS` WHERE TABLE_SCHEMA = v_schema_name AND TABLE_NAME = v_table_name AND PARTITION_NAME = v_partition_name) THEN
SET v_sql_status = 1;
END IF;
INSERT INTO t_partition_maintenance_logs(schema_name, table_name, partition_name, partition_time, sql_text, `status`, create_at)
VALUES(v_schema_name, v_table_name, v_partition_name, v_partition_time, @v_sql, v_sql_status, UNIX_TIMESTAMP());
END IF;
END IF;
END IF;
SET v_i = v_i + 1;
END WHILE;
DROP TEMPORARY TABLE IF EXISTS tmp_partition_table;
END;
二、表一开始需要新建二个分区
2.1 修改表建立分区
例如下面我是新建2个月分区
ALTER TABLE t_crawler_article
DROP PRIMARY KEY,
DROP INDEX article_md5,
ADD PRIMARY KEY (id, create_at),
ADD UNIQUE INDEX article_md5 (article_md5, create_at)
PARTITION BY RANGE (create_at) (
PARTITION m202306 VALUES LESS THAN (UNIX_TIMESTAMP('2023-07-01 00:00:00')),
PARTITION m202307 VALUES LESS THAN (UNIX_TIMESTAMP('2023-08-01 00:00:00'))
);
如果有数据情况还可以备份,以免丢失数据
create table t_crawler_article_bak select * from `t_crawler_article`;
2.2 执行维护分区存储,多执行也没关系
call Proc_Partition_maintenance();
call Proc_Partition_maintenance();
call Proc_Partition_maintenance();
可以多执行几次,最多维护3个月分区
2.3 查看分区,最好是空余三个月分区
SELECT
TABLE_NAME,
PARTITION_NAME,
PARTITION_ORDINAL_POSITION,
TABLE_ROWS,
PARTITION_DESCRIPTION
FROM
INFORMATION_SCHEMA.PARTITIONS
WHERE
TABLE_SCHEMA = 'crawler' AND
TABLE_NAME = 't_crawler_article';
三、定时器去定时维护分区
3.1 创建事件
每天凌晨 4:05:06执行,自动维护分区
create event Job_Partition_maintenance on schedule
every '1' DAY
starts '2023-01-01 04:05:06'
enable
do
CALL Proc_Partition_maintenance();
3.2 查看事件是否开启
SHOW VARIABLES LIKE 'event_scheduler';
可以看到下面的结果
mysql> SHOW VARIABLES LIKE 'event_scheduler';
+-----------------+-------+
| Variable_name | Value |
+-----------------+-------+
| event_scheduler | ON |
+-----------------+-------+
1 row in set (0.00 sec)
如果为off 可以设置下面命令开启,再查看
SET GLOBAL event_scheduler = ON; 这种是临时开启
再my.cnf 设置 永久开启:
[mysqld]
event_scheduler=ON
重启mysql systemctl restart mysqld.service
3.3 还可以查看具体事件是否开启
SELECT * FROM information_schema.EVENTS WHERE EVENT_SCHEMA = 'crawler'; 参数为库的名字
可以看到输出结果
mysql> SELECT * FROM information_schema.EVENTS WHERE EVENT_SCHEMA = 'crawler'\G;
*************************** 1. row ***************************
EVENT_CATALOG: def
EVENT_SCHEMA: crawler
EVENT_NAME: Job_Partition_maintenance
DEFINER: root@localhost
TIME_ZONE: SYSTEM
EVENT_BODY: SQL
EVENT_DEFINITION: CALL Proc_Partition_maintenance()
EVENT_TYPE: RECURRING
EXECUTE_AT: NULL
INTERVAL_VALUE: 1
INTERVAL_FIELD: DAY
SQL_MODE: ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION
STARTS: 2023-01-01 04:05:06
ENDS: NULL
STATUS: ENABLED # 这个代表开启了
ON_COMPLETION: NOT PRESERVE
CREATED: 2023-07-13 07:39:35
LAST_ALTERED: 2023-07-13 07:39:35
LAST_EXECUTED: NULL
EVENT_COMMENT:
ORIGINATOR: 1
CHARACTER_SET_CLIENT: utf8mb4
COLLATION_CONNECTION: utf8mb4_general_ci
DATABASE_COLLATION: utf8mb4_bin
1 row in set (0.00 sec)
查看事件下次执行时间
SELECT
EVENT_NAME,
LAST_ALTERED,
LAST_EXECUTED,
STATUS,
STARTS
FROM
information_schema.EVENTS
四、查看性能指标
4.1 查看进程数据
SHOW FULL PROCESSLIST;
SHOW PROCESSLIST;
SHOW STATUS;
SHOW VARIABLES;
4.2 查看表统计数据
统计表占用的大小
SELECT
table_schema AS 'Database',
table_name AS 'Table',
round(((data_length + index_length) / 1024 / 1024), 2) 'Size in MB'
FROM information_schema.TABLES
ORDER BY (data_length + index_length) DESC;
4.3 备份数据操作(针对10GB以下数据)
# 查询表大小
SELECT
table_schema AS 'Database',
table_name AS 'Table',
round(((data_length + index_length) / 1024 / 1024), 2) 'Size in MB'
FROM information_schema.TABLES
ORDER BY (data_length + index_length) DESC;
# 查询表分区
SELECT
TABLE_NAME,
PARTITION_NAME,
PARTITION_ORDINAL_POSITION,
TABLE_ROWS,
PARTITION_DESCRIPTION
FROM
INFORMATION_SCHEMA.PARTITIONS
WHERE
TABLE_SCHEMA = 'crawler' AND
TABLE_NAME = 't_crawler_article';
SELECT * FROM t_crawler_article PARTITION (p202308); # 4844 ~ 5080
SELECT count(*) FROM t_crawler_article PARTITION (p202308); # 135044
SELECT * FROM t_crawler_article PARTITION (p202308) limit 10; # 135044
SELECT count(*) FROM t_crawler_article ; #833
SELECT count(*) FROM temp_t_crawler_article ; #833
drop TABLE if exists temp_t_crawler_article;
# 备份分区
CREATE TABLE temp_t_crawler_article LIKE t_crawler_article;
INSERT INTO temp_t_crawler_article SELECT * FROM t_crawler_article PARTITION (p202308);
# 终端备份sql
mysqldump -h 127.0.0.1 -u root -p -P 3306 crawler temp_t_crawler_article > /home/ec2-user/data/mysql_backup/temp_t_crawler_article_m202308.sql
# 删除分区 bin log 同步的dll 模式 ,es 需要同步
# ALTER TABLE t_crawler_article DROP PARTITION p202308;
#or 删除数据
DELETE FROM t_crawler_article WHERE create_at >= unix_timestamp('2023-07-01 00:00:00') AND create_at < unix_timestamp('2023-08-01 00:00:00');
# 删除表
DROP TABLE temp_t_crawler_article;
# 确认分区,然后再删除空分区
ALTER TABLE t_crawler_article DROP PARTITION p202308
# 恢复分区 该可以不执行
ALTER TABLE t_crawler_article REMOVE PARTITIONING;
# 恢复分区 该可以不执行,重新分配分区
ALTER TABLE t_crawler_article
PARTITION BY RANGE (create_at) (
PARTITION m202307 VALUES LESS THAN (UNIX_TIMESTAMP('2023-08-01 00:00:00')),
PARTITION m202308 VALUES LESS THAN (UNIX_TIMESTAMP('2023-09-01 00:00:00')),
PARTITION m202309 VALUES LESS THAN (UNIX_TIMESTAMP('2023-10-01 00:00:00')),
PARTITION m202310 VALUES LESS THAN (UNIX_TIMESTAMP('2023-11-01 00:00:00')),
PARTITION m202312 VALUES LESS THAN (UNIX_TIMESTAMP('2023-12-01 00:00:00')),
PARTITION m202401 VALUES LESS THAN (UNIX_TIMESTAMP('2024-01-01 00:00:00'))
);
drop table if exists temp_t_crawler_article;
# 导入数据
mysql -h 127.0.0.1 -u root -p -P 3306 crawler < /home/ec2-user/data/mysql_backup/temp_t_crawler_article_m202308.sql
# 恢复到原表
INSERT INTO t_crawler_article SELECT * FROM temp_t_crawler_article;
mysql 查询表
SELECT
TABLE_NAME,
TABLE_COMMENT
FROM
information_schema.TABLES
WHERE
TABLE_SCHEMA = 'baobao' AND
TABLE_COMMENT LIKE '%pk%';
# 查询版本
SELECT VERSION();