python对于“流”的处理以generator为主,这种方式适合拉数据,不适合推数据。
做个简单的扩展,totrg类似imap的推数据版本(可以一对一、一对多、一对零,完成类似imap、filter等功能),preduce类似reduce,groupto类似groupby的推数据版本。
fromsrc与rotrg对应,方便处理输入输出,fromto从输入读出发送到输出,支持标准输入输出、文件、list、generator,groupkv对groupby的扩展。
#
#coding:utf8
import sys
from itertools import imap,groupby
def coroutine(func):
'''协程修饰器,调用一次被修饰的函数即可创建生成器,不用调用next函数'''
def newfunc(*args,**kwargs):
r = func(*args,**kwargs)
r.next()
return r
return newfunc
def fromsrc(src,func=None,multi=False,fargs=()):
'''从src读取,对接受的数据进行处理,之后作为生成器的数据,func为处理函数,
multi表示是否func的输出为一对多'''
if src == sys.stdin:
for data in src:
if func:
if multi:
for d in func(*((data.rstrip('\r\n'),)+fargs)):
yield d
else:
yield func(*((data.rstrip('\r\n'),)+fargs))
else:
yield data.rstrip('\r\n')
elif type(src) == str:
for data in open(src):
if func:
if multi:
for d in func(*((data.rstrip('\r\n'),)+fargs)):
yield d
else:
yield func(*((data.rstrip('\r\n'),)+fargs))
else:
yield data.rstrip('\r\n')
else:
for data in src:
if func:
if multi:
for d in func(*((data,)+fargs)):
yield d
else:
yield func(*((data,)+fargs))
else:
yield data
@coroutine
def totrg(trg,func=None,multi=False,fargs=()):
'''对接受的数据进行处理,之后发送到trg,func为处理函数,
multi表示是否func的输出为一对多'''
if trg == sys.stdout or trg == sys.stderr:
while True:
data = yield
if func:
if multi:
for d in func(*((data,)+fargs)):
print >> trg, d
else:
print >> trg, func(*((data,)+fargs))
else:
print >> trg, data
elif type(trg) == str:
with open(trg,'a') as t:
while True:
data = yield
if func:
if multi:
for d in func(*((data,)+fargs)):
t.write(str(d)+'\n')
else:
t.write(str(func(*((data,)+fargs)))+'\n')
else:
t.write(str(data)+'\n')
elif type(trg) == list:
while True:
data = yield
if func:
if multi:
for d in func(*((data,)+fargs)):
trg.append(d)
else:
trg.append(func(*((data,)+fargs)))
else:
trg.append(data)
else:
try:
while True:
data = yield
if func:
if multi:
for d in func(*((data,)+fargs)):
trg.send(d)
else:
trg.send(func(*((data,)+fargs)))
else:
trg.send(data)
except StopIteration:
stop(trg)
def stop(outstream):
'''结束协程处理'''
try:
if outstream:
outstream.throw(StopIteration)
except StopIteration:
pass
def fromto(src,trg,func=None,multi=False,fargs=()):
'''从src读取数据,发送到trg,可用func对数据进行处理,
multi表示是否func的输出为一对多'''
t = totrg(trg)
s = fromsrc(src)
for data in s:
if func:
if multi:
for d in func(*((data,)+fargs)):
t.send(d)
else:
t.send(func(*((data,)+fargs)))
else:
t.send(data)
stop(trg)
@coroutine
def preduce(redfunc,outstream,init=None,finalfunc=None,initargs=()):
'''把数据用redfunc汇总,可以用init初始化、finalfunc做结束操作,
init可以是函数,也可以是值'''
result = None
if init != None:
result = init(*initargs) if callable(init) else init
try:
while True:
data = (yield)
if result == None:
result = data
else:
result = redfunc(result,data)
except StopIteration:
if finalfunc and callable(finalfunc):
finalfunc(result)
else:
outstream.send(result)
def groupkv(instream,keyfunc=None,valuefunc=None):
'''类似groupby函数,keyfunc对数据进行分组,valuefunc提取数据'''
grp = groupby(instream,keyfunc)
for k,vs in grp:
yield k,imap(valuefunc,vs)
@coroutine
def groupto(outstream,grpfunc,keyfunc=None,valuefunc=None):
'''类似groupby函数,数据处理采取推方式,
用keyfunc对数据进行分组,有新分组时发送通知给outstream,
同一组数据用grpfunc创建协程发送到该协程处理'''
grpstream = None
lastkey = None
try:
while True:
data = (yield)
key = keyfunc(data) if keyfunc else data
value = valuefunc(data) if valuefunc else data
if grpstream and key == lastkey:
grpstream.send(value)
else:
stop(grpstream)
grpstream = grpfunc(key)
grpstream.send(value)
outstream.send((key,grpstream))
lastkey = key
except StopIteration:
stop(grpstream)
@coroutine
def null():
'''丢到数据'''
while True:
yield
@coroutine
def groupsum(outstream,keyindex,valueindex):
'''分组求和的例子'''
def dealvalues(key):
def sendresult(result):
outstream.send((key,result))
return preduce(int.__add__,null(),0,sendresult)
grp = groupto(null(),dealvalues,itemgetter(keyindex),itemgetter(valueindex))
try:
while True:
data = yield
grp.send(data)
except StopIteration:
stop(grp)
transfer('test.txt',sys.stdout) #从文件读输出到屏幕
分组求和:
keysums = []grps = groupsum( totrg(keysums), 0, 1 )
fromto( [("a",1),("a",2),("b",3),("b",4),("c",5)], grps )
结果:[('a', 3), ('b', 7), ('c', 5)]