Python的itertools
- Python的itertools
- itertools.chain(*iterables)
- itertools.combinations(iterable,r)
- itertools.combinations_with_replacement(iterable,r)
- itertools.compress(data,selectors)
- itertools.count(start=0,step=1)
- itertools.cycle(iterable)
- itertools.dropwhile(iterable[,key])
- itertools.groupby(iterable[,key])
- itertools.ifilter(predicate,iterable)
- itertools.ifilterfalse(predicate,iterable)
- itertools.imap(function,*iterables)
- itertools.islice(iterable,stop)
- itertools.islice(iterable,start,stop[,step])
- izip(*iterables)
- izip_longest(*iterables[,fillvalue])
- itertools.permutations(iterable[,r])
- itertools.product(*iterables[,repeat])
- itertool.repeat(object[,times])
- itertools.startmap(function,iterable)
- itertools.takewhile(predicate,iterable)
- 几个排列的例子
最近在写一个爬虫的项目,发现在对list进行聚类的时候,很麻烦。虽然很多例如spark和pandas 还有numpy都提供了很多工具。但是觉得很麻烦。
于是发现了一个itertool的工具可以满足我的需求。下面对itertool的工具进行详细的介绍
为什么显示自己高逼格,详情请查看官网的api:
https://docs.python.org/2/library/itertools.html
itertools.chain(*iterables)
def chain(*iterable):
#chain(‘ABC’,’DEF’) --> A B C D E F
for it in iterables:
for element in it:
yield element
def from_iterable(iterables):
#chain.from_iterable([‘ABC’,’DEF’]) --> A B C D E F
for it in iterables:
for element in it:
yield element
从上面的两个源码可以看出,这个就是一组迭代对象串组合起来,形成一个更大的迭代器。
itertools.combinations(iterable,r)
r 放回 r长度的子字符 元素来自于iterable
def combinations(iterable, r):
# combinations('ABCD', 2) --> AB AC AD BC BD CD
# combinations(range(4), 3) --> 012 013 023 123
pool = tuple(iterable)
n = len(pool)
if r > n:
return
indices = range(r)
yield tuple(pool[i] for i in indices)
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
return
indices[i] += 1
for j in range(i+1, r):
indices[j] = indices[j-1] + 1
yield tuple(pool[i] for i in indices)
上面的代码的核心思想,其实就是针对indices的修改,我是放在程序中跑才看懂了。
def combinations(iterable, r):
pool = tuple(iterable)
n = len(pool)
for indices in permutations(range(n), r):
if sorted(indices) == list(indices):
yield tuple(pool[i] for i in indices)
这个代码依然是可以解释的。permutations
一组排列和组合。 这段代码就方便多了。
itertools.combinations_with_replacement(iterable,r)
r是子字符串的长度。 输入的input迭代器可以重复不止一次
def combinations_with_replacement(iterable, r):
# combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC
pool = tuple(iterable)
n = len(pool)
if not n and r:
return
indices = [0] * r
yield tuple(pool[i] for i in indices)
while True:
for i in reversed(range(r)):
if indices[i] != n - 1:
break
else:
return
indices[i:] = [indices[i] + 1] * (r - i)
yield tuple(pool[i] for i in indices)
def combinations_with_replacement(iterable, r):
pool = tuple(iterable)
n = len(pool)
for indices in product(range(n), repeat=r):
if sorted(indices) == list(indices):
yield tuple(pool[i] for i in indices)
itertools.compress(data,selectors)
def compress(data,selectors):
#compress(‘ABCDEF’,[1,0,1,0,1,1])--> A C E F
return (d for d,s in izip(data,selectors) if s)
izip 是组合在一起,这个是s是判断的条件.
itertools.count(start=0,step=1)
无限循环
def count(start=0, step=1):
# count(10) --> 10 11 12 13 14 ...
# count(2.5, 0.5) -> 2.5 3.0 3.5 ...
n = start
while True:
yield n
n += step
itertools.cycle(iterable)
def cycle(iterable):
# cycle('ABCD') --> A B C D A B C D A B C D ...
saved = []
for element in iterable:
yield element
saved.append(element)
while saved:
for element in saved:
yield element
itertools.dropwhile(iterable[,key])
def dropwhile(predicate, iterable):
# dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1
iterable = iter(iterable)
for x in iterable:
if not predicate(x):
yield x
break
for x in iterable:
yield x
itertools.groupby(iterable[,key])
groups = []
uniquekeys = []
data = sorted(data, key=keyfunc)
for k, g in groupby(data, keyfunc):
groups.append(list(g)) # Store group iterator as a list
uniquekeys.append(k)
class groupby(object):
# [k for k, g in groupby('AAAABBBCCDAABBB')] --> A B C D A B
# [list(g) for k, g in groupby('AAAABBBCCD')] --> AAAA BBB CC D
def __init__(self, iterable, key=None):
if key is None:
key = lambda x: x
self.keyfunc = key
self.it = iter(iterable)
self.tgtkey = self.currkey = self.currvalue = object()
def __iter__(self):
return self
def next(self):
while self.currkey == self.tgtkey:
self.currvalue = next(self.it) # Exit on StopIteration
self.currkey = self.keyfunc(self.currvalue)
self.tgtkey = self.currkey
return (self.currkey, self._grouper(self.tgtkey))
def _grouper(self, tgtkey):
while self.currkey == tgtkey:
yield self.currvalue
self.currvalue = next(self.it) # Exit on StopIteration
self.currkey = self.keyfunc(self.currvalue)
之前遇到一个问题是我自己定义的class对象,不能进行list(object).看过这个源码知道。是我自己在实体对象中,没有实现迭代。也是iter 还有call.之前一直报错是实体类中没有写__call__
itertools.ifilter(predicate,iterable)
def ifilter(predicate, iterable):
# ifilter(lambda x: x%2, range(10)) --> 1 3 5 7 9
if predicate is None:
predicate = bool
for x in iterable:
if predicate(x):
yield x
itertools.ifilterfalse(predicate,iterable)
def ifilterfalse(predicate, iterable):
# ifilterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8
if predicate is None:
predicate = bool
for x in iterable:
if not predicate(x):
yield x
itertools.imap(function,*iterables)
def imap(function, *iterables):
# imap(pow, (2,3,10), (5,2,3)) --> 32 9 1000
iterables = map(iter, iterables)
while True:
args = [next(it) for it in iterables]
if function is None:
yield tuple(args)
else:
yield function(*args)
itertools.islice(iterable,stop)
itertools.islice(iterable,start,stop[,step])
def islice(iterable, *args):
# islice('ABCDEFG', 2) --> A B
# islice('ABCDEFG', 2, 4) --> C D
# islice('ABCDEFG', 2, None) --> C D E F G
# islice('ABCDEFG', 0, None, 2) --> A C E G
s = slice(*args)
start, stop, step = s.start or 0, s.stop or sys.maxint, s.step or 1
it = iter(xrange(start, stop, step)))
try:
nexti = next(it)
except StopIteration:
# Consume *iterable* up to the *start* position.
for i, element in izip(xrange(start), iterable):
pass
return
try:
for i, element in enumerate(iterable):
if i == nexti:
yield element
nexti = next(it)
except StopIteration:
# Consume to *stop*.
for i, element in izip(xrange(i + 1, stop), iterable):
pass
izip(*iterables)
def izip(*iterables):
# izip('ABCD', 'xy') --> Ax By
iterators = map(iter, iterables)
while iterators:
yield tuple(map(next, iterators))
izip_longest(*iterables[,fillvalue])
class ZipExhausted(Exception):
pass
def izip_longest(*args, **kwds):
# izip_longest('ABCD', 'xy', fillvalue='-') --> Ax By C- D-
fillvalue = kwds.get('fillvalue')
counter = [len(args) - 1]
def sentinel():
if not counter[0]:
raise ZipExhausted
counter[0] -= 1
yield fillvalue
fillers = repeat(fillvalue)
iterators = [chain(it, sentinel(), fillers) for it in args]
try:
while iterators:
yield tuple(map(next, iterators))
except ZipExhausted:
pass
itertools.permutations(iterable[,r])
def permutations(iterable, r=None):
# permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
# permutations(range(3)) --> 012 021 102 120 201 210
pool = tuple(iterable)
n = len(pool)
r = n if r is None else r
if r > n:
return
indices = range(n)
cycles = range(n, n-r, -1)
yield tuple(pool[i] for i in indices[:r])
while n:
for i in reversed(range(r)):
cycles[i] -= 1
if cycles[i] == 0:
indices[i:] = indices[i+1:] + indices[i:i+1]
cycles[i] = n - i
else:
j = cycles[i]
indices[i], indices[-j] = indices[-j], indices[i]
yield tuple(pool[i] for i in indices[:r])
break
else:
return
全队列
itertools.product(*iterables[,repeat])
def product(*args, **kwds):
# product('ABCD', 'xy') --> Ax Ay Bx By Cx Cy Dx Dy
# product(range(2), repeat=3) --> 000 001 010 011 100 101 110 111
pools = map(tuple, args) * kwds.get('repeat', 1)
result = [[]]
for pool in pools:
result = [x+[y] for x in result for y in pool]
for prod in result:
yield tuple(prod)
itertool.repeat(object[,times])
ef repeat(object, times=None):
# repeat(10, 3) --> 10 10 10
if times is None:
while True:
yield object
else:
for i in xrange(times):
yield object
一般都是这样用的
list(imap(pow,xrange(10),repeat(2)))
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
itertools.startmap(function,iterable)
def starmap(function, iterable):
# starmap(pow, [(2,5), (3,2), (10,3)]) --> 32 9 1000
for args in iterable:
yield function(*args)
itertools.takewhile(predicate,iterable)
def takewhile(predicate, iterable):
# takewhile(lambda x: x<5, [1,4,6,4,1]) --> 1 4
for x in iterable:
if predicate(x):
yield x
else:
break
几个排列的例子
>>> import itertools
>>> for i in itertools.product('ABCD', repeat = 2):
... print i,
...
('A', 'A') ('A', 'B') ('A', 'C') ('A', 'D') ('B', 'A') ('B', 'B') ('B', 'C') ('B', 'D') ('C', 'A') ('C', 'B') ('C', 'C') ('C', 'D') ('D', 'A') ('D', 'B') ('D', 'C') ('D', 'D')
>>> for i in itertools.permutations('ABCD', 2):
... print i,
...
('A', 'B') ('A', 'C') ('A', 'D') ('B', 'A') ('B', 'C') ('B', 'D') ('C', 'A') ('C', 'B') ('C', 'D') ('D', 'A') ('D', 'B') ('D', 'C')
>>> for i in itertools.combinations('ABCD', 2):
... print i,
...
('A', 'B') ('A', 'C') ('A', 'D') ('B', 'C') ('B', 'D') ('C', 'D')
>>> for i in itertools.combinations_with_replacement('ABCD', 2):
... print i,
...
('A', 'A') ('A', 'B') ('A', 'C') ('A', 'D') ('B', 'B') ('B', 'C') ('B', 'D') ('C', 'C') ('C', 'D') ('D', 'D')