非常感谢译者熊能的无私奉献,书籍可在线观看python3-cookbook
github:https://github.com/yidao620c/python3-cookbook
一、数据结构和算法
-
任何的序列(或者是可迭代对象)可以通过一个简单的赋值操作来分解为单独的变量。 唯一的要求就是变量的总数和结构必须与序列相吻合。
-
解压可迭代对象赋值给多个变量
-
保留最后N个元素
-
查找最大的或最小的N个元素
-
实现一个优先级队列
import heapq class PriorityQueue: def __init__(self): self._queue = [] self._index = 0 def push(self, item, priority): heapq.heappush(self._queue, (-priority, self._index, item)) self._index += 1 def pop(self): return heapq.heappop(self._queue)[-1]
-
字典中的键映射多个值
from collections import defaultdict d = defaultdict(list) d['a'].append(1) d['a'].append(2) d['b'].append(4) d = defaultdict(set) d['a'].add(1) d['a'].add(2) d['b'].add(4)
-
字典排序
OrderedDict
-
字典的运算
prices = { 'ACME': 45.23, 'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.20, 'FB': 10.75 } min_price = min(zip(prices.values(), prices.keys())) # min_price is (10.75, 'FB') max_price = max(zip(prices.values(), prices.keys())) # max_price is (612.78, 'AAPL')
-
查找两字典的相同点
# Find keys in common a.keys() & b.keys() # { 'x', 'y' } # Find keys in a that are not in b a.keys() - b.keys() # { 'z' } # Find (key,value) pairs in common a.items() & b.items() # { ('y', 2) }
-
删除序列相同元素并保持顺序
def dedupe(items, key=None): seen = set() for item in items: val = item if key is None else key(item) if val not in seen: yield item seen.add(val)
-
命名切片
内置的slice()函数创建了一个切片对象,可以分别调用它的start,stop和step属性
###### 0123456789012345678901234567890123456789012345678901234567890' record = '....................100 .......513.25 ..........' cost = int(record[20:23]) * float(record[31:37]) SHARES = slice(20, 23) PRICE = slice(31, 37) cost = int(record[SHARES]) * float(record[PRICE])
-
序列中出现次数最多的元素
words = [ 'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes', 'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the', 'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into', 'my', 'eyes', "you're", 'under' ] from collections import Counter word_counts = Counter(words) # 出现频率最高的3个单词 top_three = word_counts.most_common(3) print(top_three) # Outputs [('eyes', 8), ('the', 5), ('look', 4)]
-
通过某个关键字排序一个字典列表
rows = [ {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004} ] from operator import itemgetter rows_by_fname = sorted(rows, key=itemgetter('fname')) rows_by_uid = sorted(rows, key=itemgetter('uid'))
-
排序不支持原生比较的对象
users = [User(23), User(3), User(99)] sorted(users, key=lambda u: u.user_id) sorted(users, key=attrgetter('user_id')) // 速度快
-
通过某个字段将记录分组
rows = [ {'address': '5412 N CLARK', 'date': '07/01/2012'}, {'address': '5148 N CLARK', 'date': '07/04/2012'}, {'address': '5800 E 58TH', 'date': '07/02/2012'}, {'address': '2122 N CLARK', 'date': '07/03/2012'}, {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}, {'address': '1060 W ADDISON', 'date': '07/02/2012'}, {'address': '4801 N BROADWAY', 'date': '07/01/2012'}, {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}, ] from operator import itemgetter from itertools import groupby # Sort by the desired field first rows.sort(key=itemgetter('date')) # Iterate in groups for date, items in groupby(rows, key=itemgetter('date')): print(date) for i in items: print(' ', i) 07/01/2012 {'date': '07/01/2012', 'address': '5412 N CLARK'} {'date': '07/01/2012', 'address': '4801 N BROADWAY'} 07/02/2012 {'date': '07/02/2012', 'address': '5800 E 58TH'} {'date': '07/02/2012', 'address': '5645 N RAVENSWOOD'} {'date': '07/02/2012', 'address': '1060 W ADDISON'} 07/03/2012 {'date': '07/03/2012', 'address': '2122 N CLARK'} 07/04/2012 {'date': '07/04/2012', 'address': '5148 N CLARK'} {'date': '07/04/2012', 'address': '1039 W GRANVILLE'}
-
过滤序列元素
# 1. 列表推导 >>>mylist = [1, 4, -5, 10, -7, 2, 3, -1] >>>[n for n in mylist if n > 0] [1, 4, 10, 2, 3] # 2. 生成器表达式 >>> pos = (n for n in mylist if n > 0) >>> pos <generator object <genexpr> at 0x1006a0eb0> >>> for x in pos: ... print(x) ... 1 4 10 2 3 # 2. filter()函数 values = ['1', '2', '-3', '-', '4', 'N/A', '5'] def is_int(val): try: x = int(val) return True except ValueError: return False ivals = list(filter(is_int, values)) print(ivals) # Outputs ['1', '2', '-3', '4', '5'] # 3. itertools.compress() addresses = [ '5412 N CLARK', '5148 N CLARK', '5800 E 58TH', '2122 N CLARK', '5645 N RAVENSWOOD', '1060 W ADDISON', '4801 N BROADWAY', '1039 W GRANVILLE', ] counts = [ 0, 3, 10, 4, 1, 7, 6, 1] >>> from itertools import compress >>> more5 = [n > 5 for n in counts] >>> more5 [False, False, True, False, False, True, True, False] >>> list(compress(addresses, more5)) ['5800 E 58TH', '1060 W ADDISON', '4801 N BROADWAY']
-
从字典中提取子集
prices = { 'ACME': 45.23, 'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.20, 'FB': 10.75 } # Make a dictionary of all prices over 200 p1 = {key: value for key, value in prices.items() if value > 200} # Make a dictionary of tech stocks tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'} p2 = {key: value for key, value in prices.items() if key in tech_names}
-
映射名称到序列元素
collections.namedtuple()
-
转换并同时计算数据
nums = [1, 2, 3, 4, 5] s = sum(x * x for x in nums)
-
合并多个字典或映射
a = {'x': 1, 'z': 3 } b = {'y': 2, 'z': 4 } from collections import ChainMap c = ChainMap(a,b) print(c['x']) # Outputs 1 (from a) print(c['y']) # Outputs 2 (from b) print(c['z']) # Outputs 3 (from a)
五、文件与IO
-
读写文本数据
with open('somefile.txt', 'rt') as f: data = f.read() # Iterate over the lines of the file with open('somefile.txt', 'rt') as f: for line in f: # process line
# Write chunks of text data with open('somefile.txt', 'wt') as f: f.write(text1) f.write(text2) ... # Redirected print statement with open('somefile.txt', 'wt') as f: print(line1, file=f) print(line2, file=f) ...
-
打印输出到文件中
-
使用其他分隔符或行终止符打印
>>>print('ACME', 50, 91.5, sep=',', end='!!\n') ACME,50,91.5!!
-
读写字节数据
# Read the entire file as a single byte string with open('somefile.bin', 'rb') as f: data = f.read() # Write binary data to a file with open('somefile.bin', 'wb') as f: f.write(b'Hello World') import array nums = array.array('i', [1, 2, 3, 4]) with open('data.bin','wb') as f: f.write(nums) a = array.array('i', [0, 0, 0, 0, 0, 0, 0, 0]) with open('data.bin', 'rb') as f: f.readinto(a) >>>a array('i', [1, 2, 3, 4, 0, 0, 0, 0])
-
文件不存在才能写入
-
字符串的I/0操作
用
io.StringIO()
和io.BytesIO()
类来创建类文件对象操作字符串数据,常用于单元测试中模拟普通文件 -
读写压缩文件
使用
gzip
和bz2
模块 -
固定大小记录的文件迭代
-
固定二进制数据到可变缓冲区中
-
内存映射的二进制文件
-
文件路径名的操作
os.path
-
测试文件是否存在
os.path.exist('') os.path.isfile('') os.path.isdir('') os.path.islink('') os.path.realpath('') os.path.getsize('') os.path.getmtime('')
-
获取文件夹中的文件列表
os.listdir('') pyfiles = [name for name in os.listdir('somedir') if name.endswith('.py')
文件名的匹配,可以考虑使用
glob
和fnmatch
模块