满意答案
fsi288
2013.10.05
采纳率:45% 等级:12
已帮助:6125人
# Step 1: 编写原始数据文档解析 (Parser)
# Step 2: 编写数据自动分类存储 (Collections)
import itertools
import re
patt = re.compile('(?P)\s+(?P\S+)', re.I | re.U | re.X)
class Parser:
def __init__(self, patt):
self.patt = patt
def __call__(self, srcfile):
for m in itertools.ifilter(lambda x: x,
itertools.imap(lambda x: self.patt.match(x),
open(srcfile))):
yield m.groupdict()
LINEFEED = '\n'
class Collections:
MAXSIZE = 16384
def __init__(self, root):
self.root = root
self.buff, self.size = {}, 0
def flush(self):
for k, lns in self.buff.items():
with open(os.path.join(self.root, '%s.txt'%k), 'at') as handle:
handle.writelines(itertools.imap(lambad x: x+LINEFEED, lns)
self.buff, self.size = {}, 0
def append(self, k, ln):
self.buff.setdefault(k, []).append(ln)
self.size += 1
if self.size >= self.MAXSIZE:
self.flush()
parsered = Parser(patt)
collections = Collections(storageroot)
for m in parsered(datasourcefile):
collections.append(m['k'], m['ln'])
collections.flush()
02分享举报