创建表:brand_clue线索信息表
DROP TABLE IF EXISTS `brand_clue`; CREATE TABLE `brand_clue` ( `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '品牌ID', `brand_title` varchar(100) NULL COMMENT '品牌名称', `city_id` int NOT NULL COMMENT '城市编号', `clue_channel` int NOT NULL COMMENT '线索渠道', `feature_type` varchar(100) COMMENT '品牌特征', `settled_follow_status` int COMMENT '入驻跟进状态', `settled_follow_count` int NOT NULL DEFAULT '0' COMMENT '入驻跟进次数', `settled_follow_bd` int COMMENT '入驻跟进BD', `can_develop` tinyint(4) NOT NULL DEFAULT '1' COMMENT '可以发展 1:无效草稿 0', `settled_id` int NOT NULL DEFAULT '-1' COMMENT '品牌未入驻 -1:入驻后id > 0', `create_time` datetime DEFAULT NOW() COMMENT '创建时间', PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='品牌线索表' ;
插入200W数据(耗时82s):
## 创建一个插入数据的存储过程 DROP PROCEDURE IF EXISTS insert_procedure; delimiter;; CREATE PROCEDURE insert_procedure () BEGIN # 定义循环值 DECLARE i INT DEFAULT 1; # 开启事务 START TRANSACTION; # 开始循环插入 WHILE ( i <= 2000000 ) DO INSERT INTO `brand_clue`(`id`,`brand_title`, `city_id`, `clue_channel`, `feature_type`, `settled_follow_status`,`settled_follow_count`,`settled_follow_bd`) VALUES (i, CONCAT('品牌',i), CEIL(RAND() * 50), CEIL(RAND() * 100), CEIL(RAND() * 100), CEIL(RAND() * 30), CEIL(RAND() * 10), CEIL(RAND() * 300)); SET i = i + 1; END WHILE; END;; delimiter; # 调用存储过程插入数据 CALL insert_procedure ();
feature_type字段需要构造下数据,包含多个值,用于模拟全文搜索。
update brand_clue set feature_type = '68,69,70' where feature_type ='68'; update brand_clue set feature_type = '35,38' where feature_type = '35'; update brand_clue set feature_type = '7,38,72,95' where feature_type = '7';
can_develop模拟区分度2w条数据;
update brand_clue set can_develop = 0 where MOD(id,99) = 0;
settled_id模拟区分度约等于10w条数据;
update brand_clue set settled_id = CEIL(RAND() * 200000) where MOD(id,19) = 0;
1.低区分度的字段建立索引
-- 200W数据全表扫描 用时4.2s select * from brand_clue; -- 没有任何索引 全表扫描 用时4.1s select * from brand_clue where can_develop = 1 and settled_id < 0; -- 加上索引(200W数据加索引就用了17s) ALTER TABLE brand_clue add INDEX idx_can_develop_settled_id(can_develop,settled_id); -- 没走索引 全表扫描 用时4.0s EXPLAIN FORMAT = json select * from brand_clue where can_develop = 1 and settled_id < 0;
结论:低区分度的字段加索引并没有性能提升,增加IO成本。
2.单个字段多个索引混乱的情况
-- 加上索引(200W数据加索引就用了20s) ALTER TABLE brand_clue add INDEX idx_clue_channel_can_develop_settled_id(clue_channel,can_develop,settled_id); ALTER TABLE brand_clue add INDEX idx_can_develop_settled_id(can_develop,settled_id); ALTER TABLE brand_clue add INDEX idx_(clue_channel);
3.正确的索引设计
select * from brand_clue where FIND_IN_SET('68',feature_type) and clue_channel in (1,12,33,56,46,81,48,84) and can_develop = 1 and settled_id < 0;
即使FIND_IN_SET()函数不会走索引,idx_clue_channel_can_develop_settled_id也会被选中,提升查询性能。
4.全文索引
ALTER TABLE brand_clue ADD FULLTEXT INDEX ft_feature_type (feature_type) WITH PARSER ngram;