前言
平时学习过程中经常会看到千万级数据库索引优化,千万级数据查询优化等,但是我们平时很少会有千万级数据的场景,这篇文章可以用来生成千万级数据用作学习。后续将数据表连接到项目中,可以做进一步学习。
建表
首先给出一个sql脚本,用于建立存储数据的表,我这里的表为用户表,表名t_user_test
这里是建表语句:
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for t_user_test
-- ----------------------------
DROP TABLE IF EXISTS `t_user_test`;
CREATE TABLE `t_user_test` (
`id` int(0) NOT NULL AUTO_INCREMENT COMMENT 'id',
`username` varchar(40) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '用户名',
`password` varchar(40) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '密码',
`sex` tinyint(0) NULL DEFAULT NULL COMMENT '性别0男1女',
`address` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '地址',
`telephone` varchar(16) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '电话号码',
`intro` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '介绍',
`mail` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '邮箱',
`register_time` varchar(40) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '注册时间',
`last_login_time` varchar(40) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '上一次登陆时间',
`login_time` varchar(40) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '登陆时间',
`last_login_ip` varchar(40) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '上次登录ip地址',
`login_ip` varchar(40) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '当前登录ip',
`login_count` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '登录次数',
`update_time` varchar(40) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE,
INDEX `user_name_phone_index_01`(`username`, `telephone`, `mail`) USING BTREE COMMENT '用户名手机号索引'
) ENGINE = InnoDB AUTO_INCREMENT = 11206497 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;
建表完成之后,我们执行以下sql脚本,用于生成随机的一千万条数据,生成数据后可以新建查询查看数据表占用内存大小,执行:
SELECT table_name AS 'Table',
ROUND(((data_length + index_length) / 1024 / 1024), 2) AS 'Size (MB)'
FROM information_schema.tables
WHERE table_schema = '替换为你的数据库名'
AND table_name = '替换为你的表名';
结果:
生成千万数据
一共占用了我的内存2606MB,运行时间大概在五百秒左右,耐心等待,如果不想等,可以先跑10万条),以下是生成1000万条数据的脚本代码:
INSERT INTO t_user_test (username, password, sex, address, telphone, intro, mail, register_time, last_login_time, login_time, last_login_ip, login_ip, login_count, update_time)
SELECT
CONCAT('user', LPAD(ROW_NUMBER() OVER(), 10, '0')), -- Generate unique usernames
MD5(RAND()), -- Generate random password hashes
CASE WHEN RAND() > 0.5 THEN 1 ELSE 0 END, -- Random sex (0 for Female, 1 for Male)
CONCAT('Address', FLOOR(RAND() * 100000)), -- Random address
CONCAT('12345678', FLOOR(RAND() * 100000)), -- Random telephone
'Intro text', -- Static intro text
CONCAT('user', LPAD(ROW_NUMBER() OVER(), 10, '0'), '@example.com'), -- Generate unique emails
NOW() - INTERVAL FLOOR(RAND() * 3650) DAY, -- Random register_time within the last 10 years
NOW() - INTERVAL FLOOR(RAND() * 3650) DAY, -- Random last_login_time within the last 10 years
NOW() - INTERVAL FLOOR(RAND() * 3650) DAY, -- Random login_time within the last 10 years
CONCAT(FLOOR(RAND() * 256), '.', FLOOR(RAND() * 256), '.', FLOOR(RAND() * 256), '.', FLOOR(RAND() * 256)), -- Random last_login_ip
CONCAT(FLOOR(RAND() * 256), '.', FLOOR(RAND() * 256), '.', FLOOR(RAND() * 256), '.', FLOOR(RAND() * 256)), -- Random login_ip
FLOOR(RAND() * 100), -- Random login_count
NOW() -- Current update_time
FROM
(SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) AS t1,
(SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) AS t2,
(SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) AS t3,
(SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) AS t4,
(SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) AS t5,
(SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) AS t6,
(SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 10) AS t7;
ok,到此,你的t_user_test表中有1000万条数据了。现在你可以在这个表中进行查询,看看这么大的数据量查询所需要的时间。