Effective Python 24 条. 以 MapReduce 流程为例.
常规方法构建 MapReduce
import os
from threading import Thread
# InputData 基类
class InputData(object):
def read(self):
raise NotImplementedError
# InputData 具体子类
class PathInputData(InputData):
def __init__(self, path):
super().__init__()
self.path = path
def read(self):
return open(self.path).read()
# MapReduce 工作线程, 基类
class Worker(object):
def __init__(self, input_data):
# input_date 为 PathInputData的实例
self.input_data = input_data
self.result = None
def map(self):
raise NotImplementedError
def reduce(self):
raise NotImplementedError
# 子类, 换行符计数器
class LineCountWorker(Worker):
def map(self):
data = self.input_data.read()
self.result = data.count('\n')
def reduce(self, other):
self.result += other.result
"""
需要手工写流程协调上面定义的各个组件
并实现 MapReduce
"""
def generate_inputs(data_dir):