import re
import reprlib
from collections.abc import Iterable, Iterator, Generator
from fractions import Fraction
import itertools
import operator
RE_WORD = re.compile('\w+')classSentence:# 可迭代对象def__init__(self, text):
self.text = text
self.words = RE_WORD.findall(text)def__iter__(self):# 每次都实例化一个新的迭代器return SentenceIterator(self.words)def__repr__(self):return'Sentence(%s)'% reprlib.repr(self.text)# 不需要实现 __next__# “支持多种遍历”,必须能从同一个可迭代的实例中获取多个独立的迭代器,而且各个迭代器要能维护自身的内部状态,classSentenceIterator:# 迭代器def__init__(self, words):
self.words = words
self.index =0def__next__(self):# 返回单个元素try:
word = self.words[self.index]except IndexError:raise StopIteration()
self.index +=1return word
def__iter__(self):# 返回迭代器本身return self
classSentenceGenerator:# 生成器def__init__(self, text):
self.text = text
self.words = RE_WORD.findall(text)def__iter__(self):for word in self.words:yield word
def__repr__(self):return'Sentence(%s)'% reprlib.repr(self.text)classSentenceGeneratorInert:# 生成器惰性实现def__init__(self, text):
self.text = text
def__iter__(self):formatchin RE_WORD.finditer(self.text):# 直接构建迭代器yieldmatch.group()def__repr__(self):return'Sentence(%s)'% reprlib.repr(self.text)classSentenceGeneratorExpr:# 生成器表达式def__init__(self, text):
self.text = text
def__iter__(self):return(match.group()formatchin RE_WORD.finditer(self.text))# 生成器表达式def__repr__(self):return'Sentence(%s)'% reprlib.repr(self.text)
s = Sentence('"The time has come," the Walrus said,')print(s)print(iter(s))for word in s:print(word, end=' ')print()print(list(s))# 解释器需要迭代对象 x 时,会自动调用 iter(x)# (1) 检查对象是否实现了 __iter__ 方法,如果实现了就调用它,获取一个迭代器。# (2) 如果没有实现 __iter__ 方法,但是实现了 __getitem__ 方法,Python 会创建一个迭代器,尝试按顺序(从索引 0 开始)获取元素。# (3) 如果尝试失败,Python 抛出 TypeError 异常,通常会提示“C object is not iterable”(C对象不可迭代),其中 C 是目标对象所属的类。# 可迭代的对象、迭代器、生成器# 可迭代对象:如果对象实现了能返回迭代器的 __iter__方法,那么对象就是可迭代的。序列都可以迭代;实现了 __getitem__ 方法,而且其参数是从零开始的索引,这种对象也可以迭代# 迭代器:包含__iter__方法和__next__方法,__iter__方法返回 self,__next__方法返回下一个可用的元素,如果没有元素了,抛出 StopIteration 异常# 生成器: yield关键字
s ='ABC'for char in s:print(char, end=' ')print()
it =iter(s)whileTrue:try:print(next(it), end=' ')except StopIteration:del it
breakprint()defgenerator_test():for x inrange(5):yield x
g = generator_test()# 函数对象 -> 生成器对象print(next(g))for i in g:print(i, end=' ')print()# 等差数列生成器classArithmeticProgression:def__init__(self, begin, step, end=None):
self.begin = begin
self.step = step
self.end = end # None -> 无穷数列def__iter__(self):
result =type(self.begin + self.step)(self.begin)
forever = self.end isNone# None生成的是无穷数列
index =0# 降低处理浮点数时累积效应致错的风险while forever or result < self.end:yield result
index +=1
result = self.begin + self.step * index
defaritprog_gen(begin, step, end=None):
result =type(begin + step)(begin)
forever = end isNone
index =0while forever or result < end:yield result
index +=1
result = begin + step * index
ap = ArithmeticProgression(0,1,3)
ap2 = ArithmeticProgression(0, Fraction(1,3),1)print(list(ap2))print(itertools.count(1,0.5))# itertools.count(start, step)
gen = itertools.takewhile(lambda n: n <3, itertools.count(1,0.5))print(list(gen))# takewhile指定条件位False时停止print()# 标准库中的生成器函数# 过滤的生成器函数# compress dropwhile filter fillterfalse islice takewhiledefvowel(c):return c.lower()in'aeiou'deffilter_gen():print(list(filter(vowel,'Aardvark')))print(list(itertools.filterfalse(vowel,'Aardvark')))# 与filter相反print(list(itertools.dropwhile(vowel,'Aardvark')))# 跳过真值,不进一步检查print(list(itertools.takewhile(vowel,'Aardvark')))# 真值输出,假值停止print(list(itertools.compress('Aardvark',(1,0,1,1,0,1))))# 后面为真值产出前面元素print(list(itertools.islice('Aardvark',4)))# s[:stop]print(list(itertools.islice('Aardvark',4,7)))# s[start:stop]print(list(itertools.islice('Aardvark',1,7,2)))# s[start:stop:step]# filter_gen()# 映射的生成器函数# accumulate enumerate map starmap
sample =[5,4,2,8,7,6,3,0,9,1]defmap_gen():# accumulate是产出累积的总和;如果提供了 func ,那么把前两个元素传给它# 然后把计算结果和下一个元素传给它,以此类推,最后产出结果print(list(itertools.accumulate(sample)))print(list(itertools.accumulate(sample,min)))print(list(itertools.accumulate(sample,max)))print(list(itertools.accumulate(sample, operator.mul)))print(list(enumerate('albatroz',1)))# enumerate(iterable, start=0),产出由两个元素组成的元组,结构是 (index, item)print(list(map(operator.mul,range(11),range(11))))# 把it中的各个元素传给func,产出结果;如果传入N个可迭代的对象,那么func必须能接受N个参数print(list(itertools.starmap(operator.mul,enumerate('albatroz',1))))# 把it中的各个元素传给func,产出结果;输入的可迭代对象应该产出可迭代的元素iit,然后以func(*iit)这种形式调用func# map_gen()# 合并的生成器函数# chain chain.from_iterable product zip zip_longestdefmerge_gen():print(list(itertools.chain('ABC',range(2))))# 先产出 it1 中的所有元素,然后产出 it2 中的所有元素,以此类推,无缝连接在一起print(list(itertools.chain(enumerate('ABC'))))print(list(itertools.chain.from_iterable(enumerate('ABC'))))# 产出it生成的各个可迭代对象中的元素print(list(zip('ABC',range(5))))# 并行从输入的各个可迭代对象中获取元素,产出由 N 个元素组成的元组,只要有一个可迭代的对象到头了,就默默地停止print(list(zip('ABC',range(5),[10,20,30,40])))print(list(itertools.zip_longest('ABC',range(5))))# 等到最长的可迭代对象到头后才停止,空缺的值使用fillvalue 填充,默认为Noneprint(list(itertools.zip_longest('ABC',range(5), fillvalue='?')))print(list(itertools.product('ABC',range(2))))# 笛卡尔积# merge_gen()# 扩展的生成器函数# combinations combinations_with_replacement count cycle permutations repeatdefext_gen():print(list(itertools.islice(itertools.count(1,0.3),3)))# 从 start 开始不断产出数字,按 step 指定的步幅增加print(list(itertools.islice(itertools.cycle('ABC'),7)))# 顺序重复不断产出print(list(itertools.repeat(8,4)))# 重复不断地产出指定的元素,除非提供 times ,指定次数print(list(itertools.combinations('ABC',2)))# 组合print(list(itertools.combinations_with_replacement('ABC',2)))# 组合,包含相同print(list(itertools.permutations('ABC',2)))# 排列# ext_gen()# 重新排列元素的生成器函数# groupby reversed tee
animals =['duck','eagle','rat','giraffe','bear','bat','dolphin','shark','lion']
animals.sort(key=len, reverse=True)defrearrange_gen():print(list(itertools.groupby('LLLLAAGGG')))# 产出由两个元素组成的元素,形式为 (key, group) ,其中 key 是分组标准,group 是生成器,用于产出分组里的元素for length, group in itertools.groupby(animals,len):print(length,'->',list(group), end=' ')print()print(list(itertools.tee('ABC')))# 产出一个由 n 个生成器组成的元组,每个生成器用于单独产出输入的可迭代对象中的元素# rearrange_gen()# yield from# 生成器函数需要产出另一个生成器生成的值defchain(*iterables):for it in iterables:for i in it:yield i
defchain2(*iterables):for i in iterables:yieldfrom i
s ='ABC'
t =tuple(range(3))print(list(chain2(s, t)))# 可迭代的归约函数# all any max min reduce sum
#.15.上下文管理器和else模块
# for/else while/else try/else# for/else# 仅当 for 循环运行完毕时(即 for 循环没有被 break 语句中止)才运行 else 块# while/else# 仅当 while 循环因为条件为假值而退出时(即 while 循环没有被 break 语句中止)才运行 else 块# try/else# 仅当 try 块中没有异常抛出时才运行 else 块# try/except/else# with简化try/finally# __enter__ __exit__# contextlib模块中的实用工具# closing suppress @contextmanager ContextDecorator ExitStack
#.16.协程
from inspect import getgeneratorstate
from functools import wraps
from collections import namedtuple
# 协程可以处于四个状态的一个# 'GEN_CREATED'# 等待开始执行。# 'GEN_RUNNING'# 解释器正在执行。# 'GEN_SUSPENDED'# 在 yield 表达式处暂停。# 'GEN_CLOSED'# 执行结束。defsimple_coroutine():for _ inrange(5):print('-> coroutine started')
x =yieldprint('-> coroutine received:', x)# my_coro = simple_coroutine()# next(my_coro) # "预激"协程# my_coro.send(42)# my_coro.send(100)defsimple_coro2(a):whileTrue:print('-> Started: a =', a)
b =yield a
print('-> Received: b =', b)
c =yield a + b
print('-> Received: c =', c)# yield a 和 yield a + b 是作为返回值# send是作为输入值# my_coro2 = simple_coro2(14)# print(getgeneratorstate(my_coro2))# next(my_coro2)# print(getgeneratorstate(my_coro2))# my_coro2.send(28)# my_coro2.send(99)# print(getgeneratorstate(my_coro2))# 计算移动平均值defaverager():
total =0.0
count =0
average =NonewhileTrue:
term =yield average
total += term
count +=1
average = total / count
# coro_avg = averager()# next(coro_avg)# print(coro_avg.send(10))# print(coro_avg.send(30))# print(coro_avg.send(5))# 预激协程的装饰器# 不需要起初调用next方法defcoroutine(func):"""装饰器:向前执行到第一个`yield`表达式,预激`func`"""@wraps(func)defprimer(*args,**kwargs):
gen = func(*args,**kwargs)next(gen)return gen
return primer
# 终止协程和异常处理# generator.throw(exc_type[, exc_value[, traceback]])# generator.close()classDemoException(Exception):"""为这次演示定义的异常类型。"""defdemo_exc_handling():print('-> coroutine started')whileTrue:try:
x =yieldexcept DemoException:print('*** DemoException handled. Continuing...')else:print('-> coroutine received: {!r}'.format(x))
exc_coro = demo_exc_handling()next(exc_coro)
exc_coro.send(11)
exc_coro.send(22)# exc_coro.throw(DemoException)
exc_coro.close()# 如果不管协程如何结束都想做些清理工作,要把协程定义体中相关的代码放入try/finally块中# 让协程返回值
Result = namedtuple('Result','count average')defaverager2():
total =0.0
count =0
average =NonewhileTrue:
term =yieldif term isNone:break
total += term
count +=1
average = total/count
return Result(count, average)
coro_avg = averager2()next(coro_avg)
coro_avg.send(10)
coro_avg.send(30)
coro_avg.send(6.5)
result = Result(4,5)# 给默认值try:
coro_avg.send(None)except StopIteration as exc:
result = exc.value
print(result)# 使用yield from# 调用方 -> 委派生成器 -> 子生成器defgrouper(results, key):# 委派生成器whileTrue:
results[key]=yieldfrom averager2()defmainfun(data):
results ={}for key, values in data.items():
group = grouper(results, key)next(group)for value in values:
group.send(value)
group.send(None)# 重要!# print(results) # 如果要调试,去掉注释
report(results)# 输出报告defreport(results):for key, result insorted(results.items()):
group, unit = key.split(';')print('{:2} {:5} averaging {:.2f}{}'.format(
result.count, group, result.average, unit))
data ={'girls;kg':[40.9,38.5,44.3,42.2,45.2,41.7,44.5,38.0,40.6,44.5],'girls;m':[1.6,1.51,1.4,1.3,1.41,1.39,1.33,1.46,1.45,1.43],'boys;kg':[39.0,40.8,43.2,40.8,43.1,38.6,41.4,40.6,36.3],'boys;m':[1.38,1.5,1.32,1.25,1.37,1.48,1.25,1.49,1.46],}
mainfun(data)
#.17.使用期物处理并发
import os
import time
import sys
import requests
from concurrent import futures
POP20_CC =('CN IN US ID BR PK NG BD RU JP ''MX PH VN ET EG DE IR TR CD FR').split()
BASE_URL ='http://flupy.org/data/flags'
DEST_DIR ='downloads/'defsave_flag(img, filename):
path = os.path.join(DEST_DIR, filename)withopen(path,'wb')as fp:
fp.write(img)defget_flag(cc):
url ='{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
resp = requests.get(url)return resp.content
defshow(text):print(text, end=' ')# sys.stdout.flush() # 刷新缓冲defdownload_many(cc_list):for cc insorted(cc_list):
image = get_flag(cc)
show(cc)
save_flag(image, cc.lower()+'.gif')returnlen(cc_list)defmainfun(download_many):
t0 = time.time()
count = download_many(POP20_CC)
elapsed = time.time()- t0
msg ='\n{} flags downloaded in {:.2f}s'print(msg.format(count, elapsed))# mainfun(download_many)# 并发下载
MAX_WORKERS =20defdownload_one(cc):
image = get_flag(cc)
show(cc)
save_flag(image, cc.lower()+'_futures.gif')return cc
defdownload_many2(cc_list):
workers =min(MAX_WORKERS,len(cc_list))# 最大线程数with futures.ThreadPoolExecutor(workers)as executor:
res = executor.map(download_one,sorted(cc_list))returnlen(list(res))# mainfun(download_many2)# 期物测试defdownload_many3(cc_list):
cc_list = cc_list[:5]with futures.ThreadPoolExecutor(max_workers=3)as executor:
to_do =[]for cc insorted(cc_list):
future = executor.submit(download_one, cc)
to_do.append(future)
msg ='Scheduled for {}: {}'print(msg.format(cc, future))
results =[]for future in futures.as_completed(to_do):
res = future.result()
msg ='{} result: {!r}'print(msg.format(future, res))
results.append(res)returnlen(results)# mainfun(download_many3)# Python解析器:CPython(默认),PyPy,Psyco,JPython# 阻塞型I/O和GIL# CPython解释器本身就不是线程安全的,所以有全局解释器锁(GIL)# 一次只允许使用一个线程执行Python字节码,因此Python进程通常不能同时使用多个CPU核心# 使用多进程达到并发的效果,提升速率
#.18.使用asyncio包处理并发
import threading
import itertools
import time
import sys
classSignal:
go =Truedefspin(msg, signal):
write, flush = sys.stdout.write, sys.stdout.flush
for char in itertools.cycle('|/-\\'):
status = char +' '+ msg
write(status)
flush()
write('\x08'*len(status))
time.sleep(.1)ifnot signal.go:break
write(' '*len(status)+'\x08'*len(status))defslow_function():# 假装等待I/O一段时间
time.sleep(3)return42defsupervisor():
signal = Signal()
spinner = threading.Thread(target=spin,
args=('thinking!', signal))print('spinner object:', spinner)
spinner.start()
result = slow_function()
signal.go =False
spinner.join()return result
defmainfun():
result = supervisor()print('Answer:', result)
mainfun()