建表
首先在MySQL的example库上创建表user
use example;
create TABLE `user` (
`id` int(11) NOT NULL,
`last_name` varchar(45) DEFAULT NULL,
`first_name` varchar(45) DEFAULT NULL,
`sex` set('M','F') DEFAULT NULL,
`age` tinyint(1) DEFAULT NULL,
`phone` varchar(11) DEFAULT NULL,
`address` varchar(45) DEFAULT NULL,
`password` varchar(45) DEFAULT NULL,
`create_time` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `idx_last_first_name_age` (`last_name`,`first_name`,`age`) USING BTREE,
KEY `idx_phone` (`phone`) USING BTREE,
KEY `idx_create_time` (`create_time`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
python脚本源码
用Python的MySQLdb模块连接msyql,每次批量插入100条,循环插入1000000次。本机测试的插入速度为每秒10000条左右。编写generate_user_data.py 脚本如下:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import MySQLdb
import MySQLdb.cursors
import random
import string
import time
#批量插的次数
loop_count = 1000000
#每次批量查的数据量
batch_size = 100
success_count = 0
fails_count = 0
#数据库的连接
conn = MySQLdb.connect(host="127.0.0.1", user="root", passwd="123456", db="example", port=3306, cursorclass = MySQLdb.cursors.SSCursor)
chars = 'AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz'
digits = '0123456789'
def random_generate_string(length):
return string.join(random.sample(chars, length), '')
def random_generate_number(length):
if length > len(digits):
digit_list = random.sample(digits, len(digits))
digit_list.append(random.choice(digits))
return string.join(digit_list, '')
return string.join(random.sample(digits, length), '')
def random_generate_data(num):
c = [num]
phone_num_seed = 13100000000
def _random_generate_data():
c[0] += 1
return (
c[0],
"last_name_" + str(random.randrange(100000)),
"first_name_" + str(random.randrange(100000)),
random.choice('MF'),
random.randint(1, 120),
phone_num_seed + c[0],
random_generate_string(20),
random_generate_string(10),
time.strftime("%Y-%m-%d %H:%M:%S")
)
return _random_generate_data
def execute_many(insert_sql, batch_data):
global success_count, fails_count
cursor = conn.cursor()
try:
cursor.executemany(insert_sql, batch_data)
except Exception, e:
conn.rollback()
fails_count = fails_count + len(batch_data)
print e
raise
else:
conn.commit()
success_count = success_count + len(batch_data)
print str(success_count) + " commit"
finally:
cursor.close()
try:
#user表列的数量
column_count = 9
#插入的SQL
insert_sql = "replace into user(id, last_name, first_name, sex, age, phone, address, password, create_time) values (" + ",".join([ "%s" for x in xrange(column_count)]) + ")"
batch_count = 0
begin_time = time.time()
for x in xrange(loop_count):
batch_count = x * batch_size
gen_fun = random_generate_data(batch_count)
batch_data = [gen_fun() for x in xrange(batch_size)]
execute_many(insert_sql, batch_data)
end_time = time.time()
total_sec = end_time - begin_time
qps = success_count / total_sec
print "总共生成数据: " + str(success_count)
print "总共耗时(s): " + str(total_sec)
print "QPS: " + str(qps)
except Exception, e:
print e
raise
else:
pass
finally:
pass
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
执行python脚本
python generate_user_data.py