- itertools 模块的使用
- 这两个模块极其重要,极其重要,在科学运算,代数运算中的作用极大
- 这两个模块是非计科专业对抗计科专业最重要的武器之一
- 甚至就是最重要的那个(在内存足够大的情况下)
itertools模块
count(start, step)
- 生成从start开始的无限迭代器,步长为step
from itertools import count
for num in count(3,-2):
print(num)
cycle(iterable)
- 对一个可迭代对象进行无限循环
from itertools import cycle
ITER1 = [1,2,3,4,5]
ITER2 = (1,2,3,4,5)
cycle1 = cycle(ITER1)
cycle2 = cycle(ITER2)
for _ in range(5):
print(next(cycle1))
print(next(cycle2))
repeat(element, times)
- 将element重复生成times次
>>> repeat(10, 3)
repeat(10, 3)
>>> repeat(10, 3).__str__
<method-wrapper '__str__' of itertools.repeat object at 0x000001B79CF94AC0>
>>> print(repeat(10, 3))
repeat(10, 3)
>>> type(repeat(10, 3))
<class 'itertools.repeat'>
from itertools import repeat
for num in repeat(10, 3):
print(num)
- 然而我们比较一下
from itertools import repeat
from itertools import count
from itertools import cycle
print(type(repeat(10, 3)))
print(type(count(10, 3)))
print(type(cycle([10, ])))
<class 'itertools.repeat'>
<class 'itertools.count'>
<class 'itertools.cycle'>
chain(*iterables)
- 将多个可迭代对象连接起来,返回一个迭代器。
- 可迭代对象
- 列表
- 字符串
- 元组
- 字典
- 可迭代对象
from itertools import chain
iter1 = [1, 2, 3]
iter2 = "string"
iter3 = (111,222,333)
iter4 = {"1111":1111,"2222":2222,"3333":3333}
combined = chain(iter1,iter2,iter3,iter4)
for item in combined:
print(item)
compress(data, selectors)
- 根据selectors的真假值对data进行筛选,返回一个迭代器
- 有用,但不多,因为可以用mao替代
from itertools import compress
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
selectors = [True, True, True, False, True, True, False, True, False, True]
filtered = compress(data, selectors)
for item in filtered:
print(item)
dropwhile(predicate, iterable)
- 从iterable中删掉predicate为真的元素,返回剩余的元素组成的迭代器
from itertools import dropwhile
data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 10]
def fun(x):
return (x+1)**x < 300
droped = dropwhile(fun, data)
for iter_term in droped:
print(iter_term)
takewhile(predicate, iterable)
- 从iterable中取出predicate为真的元素,直到遇到第一个为假的元素为止,组成一个迭代器
from itertools import takewhile
data = [1, 1.3, 1.5, 7, 2.9, 1.2, 1.4, 3.6, 1.8, 1.0]
def fun(x):
return (x+1)**x < 300
taked = takewhile(fun, data)
for iter_term in taked:
print(iter_term)
- 那么我们抛出一个问题,怎么样才能取出全部为真的元素呢?
- dropwhile取反即可
permutations(iterable, r)
- 返回iterable中所有长度为r的排列(元组)组成的列表
from itertools import permutations
data = [1,2,3,4,5]
result = permutations(data,2)
for iter_term in result:
print(iter_term)
combinations(iterable, r)
- 返回iterable中所有长度为r的组合(元组)组成的列表
from itertools import combinations
data = [1,2,3,4,5]
result = combinations(data,2)
for iter_term in result:
print(iter_term)
product(*iterables, repeat)
- 返回iterable中所有长度为r的笛卡尔积(元组)组成的列表
from itertools import product
numbers = [1, 2, 3]
capital = ['A', 'B', 'C']
lowercase = ['a', 'b', 'c']
result = product(numbers, capital, lowercase)
for iter_term in result:
print(iter_term)
- 只传入单个iterable,返回所有长度为r的可重复的排列
>>> k = itertools.product
>>> lk = k(["1","2","3","4"],3)
Traceback (most recent call last):
File "<pyshell#15>", line 1, in <module>
lk = k(["1","2","3","4"],3)
TypeError: 'int' object is not iterable
>>> lk = k(["1","2","3","4"],repeat=3)
>>> lk
<itertools.product object at 0x00000215207C8180>
>>> list(lk)
[('1', '1', '1'), ('1', '1', '2'), ('1', '1', '3'), ('1', '1', '4'), ('1', '2', '1'), ('1', '2', '2'), ('1', '2', '3'), ('1', '2', '4'), ('1', '3', '1'), ('1', '3', '2'), ('1', '3', '3'), ('1', '3', '4'), ('1', '4', '1'), ('1', '4', '2'), ('1', '4', '3'), ('1', '4', '4'), ('2', '1', '1'), ('2', '1', '2'), ('2', '1', '3'), ('2', '1', '4'), ('2', '2', '1'), ('2', '2', '2'), ('2', '2', '3'), ('2', '2', '4'), ('2', '3', '1'), ('2', '3', '2'), ('2', '3', '3'), ('2', '3', '4'), ('2', '4', '1'), ('2', '4', '2'), ('2', '4', '3'), ('2', '4', '4'), ('3', '1', '1'), ('3', '1', '2'), ('3', '1', '3'), ('3', '1', '4'), ('3', '2', '1'), ('3', '2', '2'), ('3', '2', '3'), ('3', '2', '4'), ('3', '3', '1'), ('3', '3', '2'), ('3', '3', '3'), ('3', '3', '4'), ('3', '4', '1'), ('3', '4', '2'), ('3', '4', '3'), ('3', '4', '4'), ('4', '1', '1'), ('4', '1', '2'), ('4', '1', '3'), ('4', '1', '4'), ('4', '2', '1'), ('4', '2', '2'), ('4', '2', '3'), ('4', '2', '4'), ('4', '3', '1'), ('4', '3', '2'), ('4', '3', '3'), ('4', '3', '4'), ('4', '4', '1'), ('4', '4', '2'), ('4', '4', '3'), ('4', '4', '4')]
>>>
combinations_with_replacement
>>> f = itertools.combinations_with_replacement
>>> f(["1","2","3","4"],3)
<itertools.combinations_with_replacement object at 0x000002152078BD80>
>>> lf = f(["1","2","3","4"],3)
>>> list(lf)
[('1', '1', '1'), ('1', '1', '2'), ('1', '1', '3'), ('1', '1', '4'), ('1', '2', '2'), ('1', '2', '3'), ('1', '2', '4'), ('1', '3', '3'), ('1', '3', '4'), ('1', '4', '4'), ('2', '2', '2'), ('2', '2', '3'), ('2', '2', '4'), ('2', '3', '3'), ('2', '3', '4'), ('2', '4', '4'), ('3', '3', '3'), ('3', '3', '4'), ('3', '4', '4'), ('4', '4', '4')]
collections 模块
Counter 类
Counter 的__str__ 方法
>>> counter1
Counter({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1})
>>> counter2
Counter({6: 3, 5: 2, 2: 1, 7: 1, 8: 1})
Counter 的交并差运算
from collections import Counter
set1 = set([1, 2, 3, 4, 4, 5, 6, 6])
list2 = [2, 5, 5, 6, 6, 6, 7, 8]
counter1 = Counter(set1)
counter2 = Counter(list2)
intersection = counter1 & counter2
print("Intersection:", intersection.keys())
union = counter1 | counter2
print("Union:", union.keys())
difference = counter1 - counter2
print("Difference1:", difference.keys())
difference = counter2 - counter1
print("Difference2:", difference.keys())
most_common()
- 按照元素出现的次数进行排序
>>> counter2.most_common
<bound method Counter.most_common of Counter({6: 3, 5: 2, 2: 1, 7: 1, 8: 1})>
>>> counter2.most_common[0]
Traceback (most recent call last):
File "<pyshell#4>", line 1, in <module>
counter2.most_common[0]
TypeError: 'method' object is not subscriptable
>>>
subtract()
- 在这里我们顺便再看另一种counter的定义方式
- 这是直接减的
- 区别于Counter 的直接 - 运算
from collections import Counter
counter1 = Counter(a=3, b=2, c=1)
counter2 = Counter(a=1, b=2, c=3)
counter1.subtract(counter2)
print(counter1)
print(counter1 - counter2)
Counter({'a': 2, 'b': 0, 'c': -2})
Counter({'a': 1})
- 添加一串代码
from collections import Counter
counter1 = Counter(a=3, b=2, c=1)
counter3 = Counter(a=1, d=2, c=3)
counter1.subtract(counter3)
print(counter1)
print(counter1 - counter3)
- "可哈希"(hashable)是指一个对象可以作为字典的键或集合的元素