2022-5-17 python cookbook(v3.0) 学习笔记(四)

迭代器与生成器

手动遍历迭代器

>>> def manual_iter():
	with open('etc/passwd') as f:
		try:
			while True:
				line = next(f)
				print(line, end='')
		except StopIteration:
			pass

# StopIteration 可以用以下方式对应
>>> with open('etc/passwd') as f:
	while True:
		line = next(f, None)
		if line is None:
			break
		print(line, end='')

代理迭代

# 构建自定义容器对象,里面包含列表,元组等其他可迭代对象,如果想直接在这个自定义容器中执行迭代操作
>>> class Node:
	def __init__(self, value):
		self._value = value
		self._children = []
	def __repr__(self):
		return"Node({!r})".format(self._value)
	def add_child(self, node):
		self._children.append(node)
	def __iter__(self):	#将迭代请求传递给_children属性
		return iter(self._children)
	
>>> root = Node(0)
>>> child1 = Node(1)
>>> child2 = Node(2)
>>> root.add_child(child1)
>>> root.add_child(child2)
>>> child3 = 5
>>> root.add_child(child3)
>>> child4 = [1, 2, 3]
>>> root.add_child(child4)
>>> for ch in root:
	print(ch)
	
Node(1)
Node(2)
5
[1, 2, 3]
>>>

使用生成器创建新的迭代模式

# 一个函数需要有一个yield语句即可将其转换为一个生成器,生成器只能用于迭代操作
# 例:自定义一个类似range的迭代模式
>>> def frange(start, stop, increment):
	x = start
	while x < stop:
		yield x
		x += increment

>>> for n in frange(0, 2, 0.5):
	print(n)

0
0.5
1.0
1.5
>>> list(frange(0, 1, 0.25))
[0, 0.25, 0.5, 0.75]
>>> 

实现迭代器协议

# 构建支持迭代操作的自定义对象,并实现迭代协议
# 例:
>>> class Node:
	def __init__(self, value):
		self._value = value
		self._children = []
	def __repr__(self):
		return"Node({!r})".format(self._value)
	def add_child(self, node):
		self._children.append(node)
	def __iter__(self):
		return iter(self._children)
	def depth_first(self):	#迭代协议是以深度优先的方式
		yield self
		for c in self:
			yield from c.depth_first()

>>> root = Node(0)
>>> root.add_child(Node(1))
>>> root.add_child(Node(2))
>>> root.add_child(Node(3))
>>> root.add_child(Node(4))
>>> for ch in root.depth_first():
	print(ch)

Node(0)
Node(1)
Node(2)
Node(3)
Node(4)

反向迭代

# 反向迭代序列
>>> a = [1, 2, 3, 4]
>>> for x in reversed(a):
	print(x)

4
3
2
1

# 反向迭代需要当大小确定或者对象实现了__reversed__()才可以生效。
# 否则需要先将对象转换为列表
>>> f = open('somefile')
>>> for line in reversed(list(f)):
	print(line, end='')

# 也可以自定义__reversed__()
>>> class Countdown:
	def __init__(self, start):
		self.start = start
	def __iter__(self):
		n = self.start
		while n > 0:
			yield n
			n -= 1
	def __reversed__(self):
		n = 1
		while n <= self.start:
			yield n
			n += 1

>>> for rr in reversed(Countdown(3)):
	print(rr)
	
1
2
3
>>> for rr in Countdown(3):
	print(rr)

3
2
1

带有外部状态的生成器函数

# 如果向自定义一个生成器函数,同时还需要暴露其他状态给用户,
# 可以实现一个类,然后把生成器函数放到__iter__()方法中
>>> from collections import deque
>>> class linehistory:
	def __init__(self, lines, histlen=3):
		self.lines = lines
		self.history = deque(maxlen=histlen)
	def __iter__(self):
		for lineno, line in enumerate(self.lines, 1):
			self.history.append((lineno, line))
			yield line
	def clear(self):
		self.history.clear()

# 示例
>>> with open('somefile.txt') as f:
	lines = linehistory(f)
	for line in lines:
		if 'python' in line:
			for lineno, hline in lines.history:
				print('{}:{}'.format(lineni,hline), end='')

迭代器切片

>>> def count(n):
	while True:
		yield n
		n += 1

		
>>> c = count(0)
>>> c[10:20]		# 迭代器和生成器不能使用标准切片操作
Traceback (most recent call last):
  File "<pyshell#339>", line 1, in <module>
    c[10:20]
TypeError: 'generator' object is not subscriptable
>>> import itertools
>>> for x in itertools.islice(c, 10, 15):		# 使用islice可以
	print(x)

	
10
11
12
13
14

跳过可迭代对象的开始部分

# 遍历可迭代对象时,跳过开始的某些元素
>>> from itertools import dropwhile
>>> with open('/etc/passwd') as f:
	for line in dropwhile(lambda line: line.startswith('#'), f):	# 跳过开头是#的行
		print(line, end='')

# 如果已经明确知道要跳过多少个元素
>>> from itertools import islice
>>> items = ['a', 'b', 'c', 1, 4, 5,]
>>> for x in islice(items, 3, None):		# 获取[3: ]的元素,
	print(x)
	
1
4
5
>>> for x in islice(items, None, 3):		# 获取[ : 3]的元素
	print(x)

a
b
c

排列组合的迭代

# 遍历集合中元素的所有可能的排列和组合
>>> items = ['a', 'b', 'c']
>>> from itertools import permutations
>>> for p in permutations(items):
	print(p)

('a', 'b', 'c')
('a', 'c', 'b')
('b', 'a', 'c')
('b', 'c', 'a')
('c', 'a', 'b')
('c', 'b', 'a')
>>> for p in permutations(items, 2):		# permutations 可以选择长度
	print(p)

('a', 'b')
('a', 'c')
('b', 'a')
('b', 'c')
('c', 'a')
('c', 'b')
>>> 
>>> from itertools import combinations
>>> for c in combinations(items, 3):		#遍历所有组合且元素不重复
	print(c)

('a', 'b', 'c')
>>> for c in combinations(items, 2):
	print(c)

('a', 'b')
('a', 'c')
('b', 'c')
>>> from itertools import combinations_with_replacement
>>> for c in combinations_with_replacement(items, 3):		#遍历所有组合且元素可以重复
	print(c)

('a', 'a', 'a')
('a', 'a', 'b')
('a', 'a', 'c')
('a', 'b', 'b')
('a', 'b', 'c')
('a', 'c', 'c')
('b', 'b', 'b')
('b', 'b', 'c')
('b', 'c', 'c')
('c', 'c', 'c')
>>> 

序列上索引值迭代

# 迭代序列的同时跟踪正在被处理的元素索引
# 在遍历文件错误时进行定位和跟踪某些值在列表中的位置很有用
>>> mylist = ['a', 'b', 'c']
>>> for idx, val in enumerate(mylist):
	print(idx, val)

0 a
1 b
2 c
>>> for idx, val in enumerate(mylist, 1):		# 可以传递一个index的开始值
	print(idx, val)
	
1 a
2 b
3 c

同时迭代多个序列

# 同时迭代多个序列,每次取一个元素
# 其中一个序列到底,则迭代结束
>>> xpts = [1, 5, 4, 2, 10, 7]
>>> ypts = [101, 78, 37, 15, 62, 99, 135, 22]
>>> for x, y in zip(xpts, ypts):
	print(x, y)
	
1 101
5 78
4 37
2 15
10 62
7 99

# 如果想要迭代长度跟最长额序列长度一致,则需要填充
>>> from itertools import zip_longest
>>> for x, y in zip_longest(xpts, ypts):
	print(x, y)

1 101
5 78
4 37
2 15
10 62
7 99
None 135
None 22
>>> for x, y in zip_longest(xpts, ypts, fillvalue=0):
	print(x, y)

1 101
5 78
4 37
2 15
10 62
7 99
0 135
0 22

不同集合上元素的迭代

# 在多个对象执行相同的操作,避免重复操作
>>> from itertools import chain
>>> a = ['a', 'b', 'c']
>>> b = [1, 2, 3, 4, 5]
>>> for x in chain(a, b):
	print(x)
	
a
b
c
1
2
3
4
5

创建数据处理管道

# 以数据管道的方式迭代处理数据
# 有大量的数据需要处理,但是不能一次性放入内存
# 比如有一个非常大的日志文件目录,可以定义一个有多个执行特定独立任务的简单生成器函数组成
>>> import os
>>> import fnmatch
>>> import gzip
>>> import bz2
>>> import re
>>> 
>>> def gen_find(filepath, top):
	for path, dirlist, filelist in os.walk(top):
		yield os.path.join(path,name)

>>> def gen_open(filenames):
	for filename in filenames:
		if filename.endswith('.gz'):
			f = gzip.open(filename, 'rt')
		else:
			f = open(filename, 'rt')
		yield f
		f.close()

>>> def gen_concatenate(iterators):
	for it in iterators:
		yield from it

>>> def gen_grep(pattern, lines):
	pat = re.compile(pattern)
	for line in lines:
		if pat.search(line):
			yield line

>>> def gen_find(filepat, top):
	for path, dirlist, filelist in os.walk(top):
		for name in fnmatch.filter(filelist, filepat):
			yield os.path.join(path,name)

# 将上述函数连起来创建一个处理管道,查找单词‘python’的所有日志行
>>> lognames = gen_find('access-log*', 'www')
>>> files = gen_open(lognames)
>>> lines = gen_concatenate(files)
>>> pylines = gen_grep('(?i)python', lines)
>>> for line in pylines:
	print(line)

展开嵌套的序列

# 将多层嵌套序列展开成一个单层列表
# isinstance() 检查某个元素是否可迭代,如果可迭代,yield from会返回所有子例程的值
# 最终返回一个没有嵌套的序列
>>> from collections import Iterable
>>> def flatten(items, ignore_types=(str, bytes)):	#ignore_types 将str和bytes排除,防止展开成单个字符
	for x in items:
		if isinstance(x, Iterable) and not isinstance(x, ignore_types):
			yield from flatten(x)
		else:
			yield x

>>> items = [1, 2, [3, 4, [5, 6], 7], 8]
>>> for x in flatten(items):
	print(x)

1
2
3
4
5
6
7
8

顺序迭代合并后的排序迭代对象

# 一系列排序序列,想将他们合并后得到一个排序序列并在上面迭代
>>> import heapq
>>> a = [1, 4, 7, 10]
>>> b = [2, 5, 6, 11]
>>> for c in heapq.merge(a, b):
	print(c)

	
1
2
4
5
6
7
10
11

迭代器代替while无限循环

# 常见的操作如下
>>> def reader(s):
	while True:
		data = s.recv(4096)
		if data == b'':
			break

# 可以用iter() 代替while
>>> def reader2(s):
	for chunk in iter(lambda: s.recv(4096), b''):
		pass
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值