逐字逐句地遍历文件内容(在我的例子中,是来自古腾堡项目的Oz向导),有三种不同的方式:from __future__ import with_statement
import time
import re
from cStringIO import StringIO
def word_iter_std(filename):
start = time.time()
with open(filename) as f:
for line in f:
for word in line.split():
yield word
print 'iter_std took %0.6f seconds' % (time.time() - start)
def word_iter_re(filename):
start = time.time()
with open(filename) as f:
txt = f.read()
for word in re.finditer('\w+', txt):
yield word
print 'iter_re took %0.6f seconds' % (time.time() - start)
def word_iter_stringio(filename):
start = time.time()
with open(filename) as f:
io = StringIO(f.read())
for line in io:
for word in line.split():
yield word
print 'iter_io took %0.6f seconds' % (time.time() - start)
woo = '/tmp/woo.txt'
for word in word_iter_std(woo): pass
for word in word_iter_re(woo): pass
for word in word_iter_stringio(woo): pass
导致:% python /tmp/junk.py
iter_std took 0.016321 seconds
iter_re took 0.028345 seconds
iter_io took 0.016230 seconds