解题思路
分治法:拆分小文件,先对每个小文件进行排序,然后使用小顶堆或大顶堆合并文件
代码(python3)
import threading
from queue import Queue
import time
import random
def make_test_data(data_file, number):
with open(data_file, 'w') as f:
for i in range(number): # 生成number个随机数
f.write(str(random.randint(1, 10000)) + '\n') # 生成1-10000之间的随机值
# 读数据的线程
class Reader(threading.Thread):
def __init__(self, task_queue, filename, split_nums):
super(Reader, self).__init__()
self.task_queue = task_queue
self.f = open(filename, 'r')
self.split_nums = split_nums
def run(self):
print('reader start')
data = []
for line in self.f:
data.append(int(line.strip()))
if len(data) == self.split_nums: # 读取split_nums个数字
self.task_queue.put(data) # 放入队列
data = []
print('reader end')
self.f.close()
# 排序的线程
class Sorter(threading.Thread):
def __init__(self, task_queue, sorted_queue, reader):
super(Sorter, self).__init__()
self.task_queue = task_queue
self.sorted_queue = sorted_queue
self.reader = reader
def run(self):
print('writer start')
while True:
if (self.task_queue.empty() and