简单的需求,获取重复次数最多的数据
@沛公
import random
_dict ={0:0,}
for i in range(1,1000000):
rd_num = random.randrange(0,999,1)
if rd_num in _dict:
_dict[rd_num] = _dict[rd_num]+1
else:
_dict[rd_num] = 1
#print(rd_num)
print(_dict)
items = _dict.items();
print(items[0:10])
items = sorted(items, lambda x,y:cmp(x[1],y[1]),reverse=True)
print(items[0:10])
4.0g 虚拟机测试 100w数据0.6s 。。。 一般了 后续添加sqlite
import sqlite3
import random
_MAX_VALUE = 100
_DATA_CNT = 1000000
conn = sqlite3.connect(‘test.db‘)
cur = conn.cursor()
def got_cnt(x):
cur.execute(‘‘‘SELECT times FROM sorted WHERE value=%s;‘‘‘ % x)
already = cur.fetchall()
length = len(already)
if length is not 0:
length = already[0][0];
#print(‘search for %s Fond %s‘ % (x,length))
return length
cur.execute(‘‘‘CREATE TABLE ramdon_data
(id INTEGER PRIMARY KEY NOT NULL,value INTEGER);‘‘‘)
cur.execute(‘‘‘CREATE TABLE sorted
(id INTEGER PRIMARY KEY NOT NULL,value INTEGER, times INTEGER);‘‘‘)
print("create ok")
for i in range(1,_DATA_CNT):
rd_num = random.randrange(0,_MAX_VALUE,1)
#print(rd_num)
cur.execute(‘‘‘INSERT INTO ramdon_data(value) VALUES
(%s);‘‘‘ % rd_num)
conn.commit();
print("data ready")
cur.execute(‘‘‘SELECT * FROM ramdon_data;‘‘‘)
ans = cur.fetchall()
for d in ans:
#print(d)
value = d[1];
times = got_cnt(d[1]) + 1
if times is 1:
cur.execute(‘‘‘INSERT INTO
sorted(value,times) VALUES(%s,%s);‘‘‘ % (value,times))
else:
cur.execute(‘‘‘UPDATE sorted
SET times=%s WHERE value=%s;‘‘‘ % (times,value))
conn.commit();
print("calc ready")
cur.execute(‘‘‘SELECT * FROM sorted ORDER BY times;‘‘‘);
ans = cur.fetchall();
print(‘‘‘
len is %s‘‘‘ % len(ans))
for d in ans:
print(d)
conn.close();
加了sqlite之后,慢多了 100w数据 30s。。。 当然 跟我毫无节操的读写数据有关系。无论如何。sqlite和python以及sql算是入门了
原文:http://my.oschina.net/mummy108/blog/476841