准备数据
2.1 创建表
CREATE TABLE `user_operation_log` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`user_id` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`ip` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`op_data` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr1` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr2` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr3` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr4` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr5` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr6` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr7` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr8` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr9` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr10` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr11` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`attr12` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 1 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
2.2 造数据脚本
采用批量插入,效率会快很多,而且每1000条数就commit,数据量太大,也会导致批量插入效率慢
DELIMITER ;;
CREATE DEFINER=`root`@`%` PROCEDURE `batch_insert_log`()
BEGIN
DECLARE i INT DEFAULT 1;
DECLARE userId INT DEFAULT 10000000;
set @execSql = 'INSERT INTO `big_data`.`user_operation_log`(`user_id`, `ip`, `op_data`, `attr1`, `attr2`, `attr3`, `attr4`, `attr5`, `attr6`, `attr7`, `attr8`, `attr9`, `attr10`, `attr11`, `attr12`) VALUES';
set @execData = '';
WHILE i<=10000000 DO
set @attr = "rand_string(50)";
set @execData = concat(@execData, "(", userId + i, ", '110.20.169.111', '用户登录操作'", ",", @attr, ",", @attr, ",", @attr, ",", @attr, ",", @attr, ",", @attr, ",", @attr, ",", @attr, ",", @attr, ",", @attr, ",", @attr, ",", @attr, ")");
if i % 1000 = 0
then
set @stmtSql = concat(@execSql, @execData,";");
prepare stmt from @stmtSql;
execute stmt;
DEALLOCATE prepare stmt;
commit;
set @execData = "";
else
set @execData = concat(@execData, ",");
end if;
SET i=i+1;
END WHILE;
END
DELIMITER ;
delimiter $$
create function rand_string(n INT)
returns varchar(255) #该函数会返回一个字符串
begin
#chars_str定义一个变量 chars_str,类型是 varchar(100),默认值'abcdefghijklmnopqrstuvwxyzABCDEFJHIJKLMNOPQRSTUVWXYZ';
declare chars_str varchar(100) default
'abcdefghijklmnopqrstuvwxyzABCDEFJHIJKLMNOPQRSTUVWXYZ';
declare return_str varchar(255) default '';
declare i int default 0;
while i < n do
set return_str =concat(return_str,substring(chars_str,floor(1+rand()*52),1));
set i = i + 1;
end while;
return return_str;
end $$
2.3 执行存储过程函数
模拟数据1000W
SELECT count(1) FROM `user_operation_log`;
image-20230331163130669
2.4 普通分页查询
MySQL分页查询语法如下:
SELECT * FROM table LIMIT [offset,] rows | rows OFFSET offset
下面我们开始测试查询结果:
SELECT * FROM `user_operation_log` LIMIT 10000, 10;
查询3次时间分别为:
image-20230331164216737
image-20230331164250745
image-20230331164319760
SELECT * FROM `user_operation_log` LIMIT 10000, 10;
SELECT * FROM `user_operation_log` LIMIT 10000, 100;
SELECT * FROM `user_operation_log` LIMIT 10000, 1000;
SELECT * FROM `user_operation_log` LIMIT 10000, 10000;
SELECT * FROM `user_operation_log` LIMIT 10000, 100000;
SELECT * FROM `user_operation_log` LIMIT 10000, 1000000;
相同数据量,不同偏移量
SELECT * FROM `user_operation_log` LIMIT 100, 100;
SELECT * FROM `user_operation_log` LIMIT 1000, 100;
SELECT * FROM `user_operation_log` LIMIT 10000, 100;
SELECT * FROM `user_operation_log` LIMIT 100000, 100;
SELECT * FROM `user_operation_log` LIMIT 1000000, 100;
从上面结果可以得出结束:偏移量越大,花费时间越长
3 如何优化
3.1 优化数据量大的问题
SELECT * FROM `user_operation_log` LIMIT 1, 1000000
SELECT id FROM `user_operation_log` LIMIT 1, 1000000
SELECT id, user_id, ip, op_data, attr1, attr2, attr3, attr4, attr5, attr6, attr7, attr8, attr9, attr10, attr11, attr12 FROM `user_operation_log` LIMIT 1, 1000000
查询结果如下:
-
3.2 优化偏移量大的问题
3.2.1 采用子查询方式
我们可以先定位偏移位置的 id,然后再查询数据
SELECT id FROM `user_operation_log` LIMIT 1000000, 1;
SELECT * FROM `user_operation_log` WHERE id >= (SELECT id FROM `user_operation_log` LIMIT 1000000, 1) LIMIT 10;
查询结果如下:
EXPLAIN SELECT id FROM `user_operation_log` LIMIT 1000000, 1;
EXPLAIN SELECT * FROM `user_operation_log` WHERE id >= (SELECT id FROM `user_operation_log` LIMIT 1000000, 1) LIMIT 10;
奇怪,走了索引啊,而且是主键索引,如下
带着十万个为什么和千万个不甘心,尝试给主键再加一层唯一索引
ALTER TABLE `big_data`.`user_operation_log`
ADD UNIQUE INDEX `idx_id`(`id`) USING BTREE;
天啊,这查询效率的差距不止十倍!!!
再次EXPLAIN分析一下:
命中的索引不一样,命中唯一索引的查询,效率高出不止十倍。
结论:
但是上面的方法只适用于**==id是递增==**的情况,如果id不是递增的,比如雪花算法生成的id,得按照下面的方式:
注意:
-
某些 mysql 版本不支持在 in 子句中使用 limit,所以采用了多个嵌套select
-
但这种缺点是分页查询只能放在子查询里面
SELECT * FROM `user_operation_log` WHERE id IN (SELECT t.id FROM (SELECT id FROM `user_operation_log` LIMIT 1000000, 10) AS t);
查询所花费时间如下:
image-20230331201356087
EXPLAIN一下
EXPLAIN SELECT * FROM `user_operation_log` WHERE id IN (SELECT t.id FROM (SELECT id FROM `user_operation_log` LIMIT 1000000, 10) AS t);
image-20230331201459758
3.2.2 采用 id 限定方式
SELECT * FROM `user_operation_log` WHERE id between 1000000 AND 1000100 LIMIT 100;
SELECT * FROM `user_operation_log` WHERE id >= 1000000 LIMIT 100;
image-20230331202058138
EXPLAIN SELECT * FROM `user_operation_log` WHERE id between 1000000 AND 1000100 LIMIT 100;
EXPLAIN SELECT * FROM `user_operation_log` WHERE id >= 1000000 LIMIT 100;
因此,针对分页查询,偏移量大导致查询慢的问题:
最后说一句(求关注!别白嫖!)
如果这篇文章对您有所帮助,或者有所启发的话,求一键三连:点赞、转发、在看。
关注公众号:woniuxgg,在公众号中回复:笔记 就可以获得蜗牛为你精心准备的java实战语雀笔记,回复面试、开发手册、有超赞的粉丝福利!