先生成csv文件,按照name,score1,score2来
这个用python写比较简单:
gen_random_csv.py
#!/usr/bin/python
import csv
import random
LETTERS = 'abcdefghijklmnopqrspuvwxyz'
def GenRandomName():
arr = []
cnt = random.randint(5, 20)
while cnt > 0:
arr.append(random.choice(LETTERS))
cnt -= 1
return ''.join(arr)
def GenRandomScore():
return random.uniform(0, 1000)
def GenRandomFile(output_fn, total_line):
print 'Processing:', output_fn
with open(output_fn, 'wb') as csvfile:
writer = csv.writer(csvfile)
while total_line > 0:
name = GenRandomName()
score1 = GenRandomScore()
score2 = GenRandomScore()
writer.writerow([name, score1, score2])
total_line -= 1
print 'Save to:', output_fn
GenRandomFile('test_big2.csv', 1000000)
GenRandomFile('test_big3.csv', 2000000)
GenRandomFile('test_big4.csv', 4000000)
GenRandomFile('test_big5.csv', 6000000)
GenRandomFile('test_big6.csv', 8000000)
python gen_random_csv.py
wc -l *.csv
获得:
1000000 test_big2.csv
2000000 test_big3.csv
4000000 test_big4.csv
6000000 test_big5.csv
8000000 test_big6.csv
python的排序:
#!/usr/bin/python
import csv
import time
def Sort1(reader, writer):
ts1 = time.time()
arr = []
for row in reader:
arr.append(row)
ts2 = time.time()
print ' &#