单线程
500+w条数据,插入时间:1小时,13分钟。
脚本:
[root@10 hurl]# cat insert-mongo2.py
#!/usr/bin/env python
#coding=utf-8
import time,datetime
import pymongo
from pymongo import MongoClient
client = MongoClient('mongodb://test:test123@172.16.100.26:27019/test')
db = client.test
#from pymongo import Connection
#connection = Connection('127.0.0.1', 27017)
#db = connection['test']
#时间记录器
def func_time(func):
def _wrapper(*args,**kwargs):
start = time.time()
func(*args,**kwargs)
print func.__name__,'run:',time.time()-start
return _wrapper
@func_time
def insert(num):
posts = db.userinfo
for x in range(num):
post = {"_id" : str(x),
"author": str(x)+"Mike",
"description1":"this is a very long description for " + str(x),
"description2":"this is a very long description for " + str(x),
"description3":"this is a very long description for " + str(x),
"description4":"this is a very long description for " + str(x),
"description5":"this is a very long description for " + str(x),
"text": "My first blog post!",
"tags": ["mongodb", "python", "pymongo"],
"date": datetime.datetime.utcnow()}
posts.insert_one(post)
if x%100000 == 0:
print "100000 ! -- %s"%(datetime.datetime.now())
if __name__ == "__main__":
#设定循环1000万次
num = 10000000
insert(num)
插入总数:
mongos> db.userinfo.count()
5033914
mongos>
多进程并发:
测试脚本:
[root@10 hurl]# cat insert-mongo3.py
#!/usr/bin/env python
#coding=utf-8
import multiprocessing,random,string
import time,datetime
import pymongo
from pymongo import MongoClient
def gen_load(x,taskid):
client = MongoClient('mongodb://test:test123@172.16.100.26:27019/test')
db = client.test
posts = db.userinfo2
for x in range(1000000):
post = {"_id" : str(x),
"author": str(x)+"Mike",
"text": "My first blog post!",
"tags": ["mongodb", "python", "pymongo"],
"date": datetime.datetime.utcnow()}
posts.insert_one(post)
if x%100000 == 0:
print "100000 ! -- %s"%(datetime.datetime.now())
if __name__ == '__main__':
inser_number=2500
pro_pool = multiprocessing.Pool(processes=100)
print time.strftime('%Y-%m-%d:%H-%M-%S',time.localtime(time.time()))
start_time=time.time()
manager = multiprocessing.Manager()
for i in xrange(10):
taskid=i
pro_pool.apply_async(gen_load,args=(inser_number,taskid))
pro_pool.close()
pro_pool.join()
elapsed = time.time()-start_time
print elapsed
time.sleep(1)
print "Sub-process(es) done."
10w条数据插入
100个线程,10w条数据,86秒
[root@10 hurl]# python insert-mongo3.py
2015-12-08:13-54-53
10000 ! -- 2015-12-08 13:54:53.822510
10000 ! -- 2015-12-08 13:55:02.329206
10000 ! -- 2015-12-08 13:55:10.624850
10000 ! -- 2015-12-08 13:55:19.763308
10000 ! -- 2015-12-08 13:55:28.384718
10000 ! -- 2015-12-08 13:55:36.957917
10000 ! -- 2015-12-08 13:55:45.426113
10000 ! -- 2015-12-08 13:55:53.872843
10000 ! -- 2015-12-08 13:56:02.898263
10000 ! -- 2015-12-08 13:56:11.243706
86.4502689838
Sub-process(es) done.
100个进程:
[root@10 hurl]# ps -ef| grep python | wc -l
103
100w条数据:
100个线程,100w条数据,858秒:
[root@10 hurl]# python insert-mongo3.py
2015-12-08:14-04-21
100000 ! -- 2015-12-08 14:04:21.481368
100000 ! -- 2015-12-08 14:05:43.069700
100000 ! -- 2015-12-08 14:07:05.694774
100000 ! -- 2015-12-08 14:08:28.375586
100000 ! -- 2015-12-08 14:09:51.335589
100000 ! -- 2015-12-08 14:11:18.619572
100000 ! -- 2015-12-08 14:12:45.748144
100000 ! -- 2015-12-08 14:14:12.534341
100000 ! -- 2015-12-08 14:15:43.059457
100000 ! -- 2015-12-08 14:17:11.387942
858.129628897
Sub-process(es) done.
[root@10 hurl]#
查询:
在500w条数据中随机查询100w次,用时15秒。
[root@10 hurl]# python find-mongo2.py
find 0 .
find 100000 .
find 200000 .
find 300000 .
find 400000 .
find 500000 .
find 600000 .
find 700000 .
find 800000 .
find 900000 .
mread run: 15.3293960094
[root@10 hurl]#
脚本:
[root@10 hurl]# cat find-mongo2.py
#!/usr/bin/env python
#coding=utf-8
import time,datetime
import random
import pymongo
from pymongo import MongoClient
client = MongoClient('mongodb://test:test123@172.16.100.26:27019/test')
db = client.test
def func_time(func):
def _wrapper(*args,**kwargs):
start = time.time()
func(*args,**kwargs)
print func.__name__,'run:',time.time()-start
return _wrapper
#@func_time
def randy():
rand = random.randint(1,5000000)
return rand
@func_time
def mread(num):
find = db.userinfo
for i in range(num):
rand = randy()
#随机数查询
find.find({"author": str(rand)+"Mike"})
if i%100000 == 0:
print "find %s ."%i
if __name__ == "__main__":
#设定循环100万次
num = 1000000
mread(num)
无索引Update
在500w数据中,无索引update100条数据,567秒:
[root@10 hurl]# python update-mongo2.py
mread run: 567.210422993
[root@10 hurl]#
脚本:
[root@10 hurl]# cat update-mongo2.py
#!/usr/bin/env python
#coding=utf-8
import time,datetime
import random
import pymongo
from pymongo import MongoClient
client = MongoClient('mongodb://test:test123@172.16.100.26:27019/test')
db = client.test
def func_time(func):
def _wrapper(*args,**kwargs):
start = time.time()
func(*args,**kwargs)
print func.__name__,'run:',time.time()-start
return _wrapper
#@func_time
def randy():
rand = random.randint(1,5000000)
return rand
@func_time
def mread(num):
update = db.userinfo
for i in range(num):
rand = randy()
name = str(rand)+"Mike"
#随机数查询
print "uodate %s "%name
update.update_one({"author": name},{"$set":{"text":"My second blog post!"}})
if __name__ == "__main__":
#设定循环
num = 100
mread(num)
Create index
500w条数据,创建索引,用时70秒。
[root@10 hurl]# python index-mongo2.py
insert run: 70.6546721458
脚本:
[root@10 hurl]# vi index-mongo2.py
#!/usr/bin/env python
#coding=utf-8
import time,datetime
import pymongo
from pymongo import MongoClient
client = MongoClient('mongodb://test:test123@172.16.100.26:27019/test')
db = client.test
#时间记录器
def func_time(func):
def _wrapper(*args,**kwargs):
start = time.time()
func(*args,**kwargs)
print func.__name__,'run:',time.time()-start
return _wrapper
@func_time
def createInx(num):
posts = db.userinfo
posts.create_index('author', unique=True)
if __name__ == "__main__":
#设定循环
num = 1
createInx(num)
有索引Update
在500w数据中,有索引update100条数据,1.98秒:
[root@10 hurl]# python update-mongo2.py
mread run: 1.98181605339
[root@10 hurl]#
Delete
500w条数据,删除完成,使用时间:541.97秒。
[root@10 hurl]# python delete-mongo2.py
count before remove: 5033914
count after remove: 0
insert run: 541.965857983
脚本:
[root@linux-00 sh]# python delete-mongo2.py
count before remove: 100000
count after remove: 0
remove run: 0.951205015182
[root@linux-00 sh]# cat delete-mongo2.py
#!/usr/bin/env python
import time,datetime
import pymongo
from pymongo import MongoClient
client = MongoClient('mongodb://localhost:27017/')
db = client.test
def func_time(func):
def _wrapper(*args,**kwargs):
start = time.time()
func(*args,**kwargs)
print func.__name__,'run:',time.time()-start
return _wrapper
@func_time
def remove():
posts = db.userinfo
print 'count before remove:',posts.count();
posts.delete_many({});
print 'count after remove:',posts.count();
if __name__ == "__main__":
remove()
总结:
| 单进程 | 多进程 | 多进程 | 单进程 |
数据量 | 500w | 10w | 100w | 500w |
操作 | insert | insert | insert | Find 100w次 |
用时 | 1小时13分钟 | 86秒 | 858秒 | 15秒 |
平均(条/秒) | 1141.55 | 1162.79 | 1165.50 | 66666.67 |
注:我自己电脑上的虚拟机插入测试,单线程,10w条数据,用时30秒,平均3333条每秒
数据量 | 500w | 500w | 500w | 500w |
索引 | 无索引 | 有索引 |
|
|
操作 | Update100条数据 | Update100条数据 | 创建索引 | delete |
用时 | 567秒 | 1.98秒 | 70.65 | 541.97秒 |
平均(条/秒) | 0.1764 | 50.51 | 70771.41 | 9225.60 |