import math
import multiprocessing
import time
class MyProcess:
def __init__(self, p: str, key: str, start_index: int, end_index: int):
super().__init__()
self.path = p
self.key = key
self.start_index = start_index
self.end_index = end_index
self.p = multiprocessing.Process(target=self.run())
def run(self):
with open(self.path, 'r') as f:
global count
for line in f.readlines()[self.start_index:self.end_index]:
line = line.strip('\n')
if line.find(self.key) != -1:
count.value += 1
key_list.append(line)
print(f'\r正在检索,已找到{int(count.value)}个', end='')
def start(self):
self.p.start()
def join(self):
self.p.join()
def go(p: str, process_num: int, key: str):
# 获取文件行数
global file_length
for file_length, line in enumerate(open(p, 'r')):
pass
file_length += 1
# 文件切割,按进程数切割,只需要切进程数-1次
cut_size = math.ceil(file_length / process_num)
# 进程列表
process_list = []
for i in range(process_num):
# 传入每个进程检索的起始和终止位置
process = MyProcess(path, key, cut_size * i, cut_size * (i + 1))
# 开始进程
process.start()
# 加入进程列表
process_list.append(process)
# 等待所有进程结束
for pro in process_list:
pro.join()
if __name__ == '__main__':
# 进程共享变量对象:已找到个数
count = multiprocessing.Value('d', 0)
# 进程可共享列表
key_list = multiprocessing.Manager().list()
# 文件行数
file_length = 0
stat_time = time.time()
# 文件路径,并发进程数,检索数据
path = r'D:\download\Documents\Code\Python\test1\23\14.txt'
go(path, 2, '8565')
print(f'\n检索完毕。共检索{file_length}条数据')
print(key_list)
print('耗时:', time.time() - stat_time)
正在检索,已找到101个
检索完毕。共检索1000000条数据
['8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565', '8565']
耗时: 19.899441957473755
总结:
- 进程共享变量count = multiprocessing.Value(‘d’, 0) 是一个对象。
- 进程相关的全局变量应放入main 函数,否则会报错。