2022-4-7 python cookbook(v3.0) 学习笔记(一)

最新推荐文章于 2024-04-21 16:38:22 发布

丨可乐猫丨

最新推荐文章于 2024-04-21 16:38:22 发布

阅读量610

点赞数

文章标签： python 数据结构

本文链接：https://blog.csdn.net/weihua1643/article/details/124006574

版权

数据结构和算法

解压序列赋值给多变量

>>> data = ['ACME', 50, 91.1, (2012, 12, 21)]
>>> name, shares, price, date = data
>>> name
'ACME'
>>> date
(2012, 12, 21)
>>> name, shares, price, (year, mon, day) = data
>>> year
2012
>>> _, shares, price, _ = data		#解压一部分丢弃一部分时可使用任意变量占位
>>> price
91.1
>>> s = 'hello'			#除了列表、元组、也可以用于字符串、文件对象、迭代器生成器
>>> a, b, c, d, e = s
>>> b
'e'
>>>

解压可迭代对象赋值给多变量

>>> record = ('Dave', 'dave@xxx.com', '0433-xxxx', '04333-yyyy')
>>> name, email, *phone = record
>>> name
'Dave'
>>> phone
['0433-xxxx', '04333-yyyy']		#此方法解压出的phone永远是列表类型
>>>
>>> line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
>>> uname, *fields, homedir, sh = line.split(':')		#字符串分割
>>> uname
'nobody'
>>> homedir
'/var/empty'
>>> sh
'/usr/bin/false'
>>>  
>>> record = ('ACME', 50, 123.45, (12, 18, 2012))
>>> name, *_, (*_, year) = record
>>> name
'ACME'
>>> year
2012
>>>

保留最后N个元素

>>> from collections import deque
>>> q = deque(maxlen=3)		#新建固定大小的队列
>>> q.append(1)
>>> q.append(2)
>>> q.append(3)
>>> q
deque([1, 2, 3], maxlen=3)
>>> q.append(4)		#新元素加入且队列已满，则移除最先进入队列的元素
>>> q = deque()		#若不设置大小，则新建一个无限大小的队列
>>> q.append(1)
>>> q.append(2)
>>> q.appendleft(3)
>>> q
deque([3, 1, 2])
>>> q.pop()
2
>>> q
deque([3, 1])
>>> q.popleft()
3
>>> q
deque([1])
>>>

查找最大或最小的N个元素

>>> import heapq
>>> nums = [1, 8, 2, 23, 7 , -4, 18, 23, 42, 37, 2]
>>> heapq.nlargest(3, nums)		#最大的三个元素
[42, 37, 23]
>>> heapq.nsmallest(3, nums)		#最小的三个元素
[-4, 1, 2]
>>> portfolio = [{'name': 'xxx', 'price': 10},{'name': 'yyy', 'price': 15}, {'name': 'zzz', 'price': 20}]
>>> cheap = heapq.nsmallest(1, portfolio, key=lambda s: s['price'])
>>> cheap		#根据关键字进行查找最小的元素
[{'name': 'xxx', 'price': 10}]
>>> expensive = heapq.nlargest(1, portfolio, key=lambda s: s['price'])
>>> expensive		#根据关键字进行查找最大的元素
[{'name': 'zzz', 'price': 20}]
>>>

因为heapq的底层实现是将集合数据进行堆排序后放入列表

>>> heap = list(nums)
>>> heapq.heapify(heap)
>>> heap
[-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]
>>> heapq.heappop(heap)
-4
>>> heapq.heappop(heap)
1
>>> heapq.heappop(heap)
2
>>>

1.如果要查找的元素个数相对较小时，可以使用nlargest()和nsmallest()
2.如果仅仅想查找唯一最大最小值时，可以使用min()和max()
3.如果要查找元素个数与集合大小接近时，可以排序再切片或者使用sorted

实现一个优先级队列

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0

    def push(self, item, priority):
    	# priority:负数，目的是优先级从高到底
    	# _index:优先级相同时，先push的优先级更高
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1

    def pop(self):
        return heapq.heappop(self._queue)[-1]

class Item:
    def __init__(self, name):
        self.name = name

    def __repr__(self):
        return 'Item({!r})'.format(self.name)

def main():
    q = PriorityQueue()
    q.push(Item('foo1'), 1)
    q.push(Item('foo3'), 3)
    q.push(Item('foo4'), 4)
    q.push(Item('foo2'), 2)
    print(q.pop())
    print(q.pop())
    print(q.pop())
    print(q.pop())
========================================================
Output:
Item('foo4')
Item('foo3')
Item('foo2')
Item('foo1')

字典中的键映射多个值

>>> from collections import defaultdict
>>> d = defaultdict(list)
>>> d['a'].append(1)
>>> d['a'].append(2)
>>> d['b'].append(4)
>>> d
defaultdict(<class 'list'>, {'a': [1, 2], 'b': [4]})
>>> d = defaultdict(set)
>>> d['a'].add(1)
>>> d['a'].add(2)
>>> d['b'].add(4)
>>> d
defaultdict(<class 'set'>, {'a': {1, 2}, 'b': {4}})
>>>
>>> d = defaultdict(list)		#代码更简洁
>>> for key, value in pairs:
		d[key].append(value)

字典排序

为了能控制一个字典中元素的顺序，可以使用collections中的OrderedDict

>>> d = OrderedDict()
>>> d['xxx'] = 1
>>> d['yyy'] = 2
>>> d['zzz'] = 3
>>> d
OrderedDict([('xxx', 1), ('yyy', 2), ('zzz', 3)])

OrderedDict的大小是普通字典的两倍，因为内部需要维护另一个链表，每次当新元素插入时，它会被放到链表的尾部。

字典运算

求最小值、最大值、排序等
使用zip()将键和值进行反转

>>> prices = {
	'xxx': 45.23,
	'yyy': 612.78,
	'zzz': 205.55,
	'mmm': 27.20,
	'nnn': 10.75
	}
>>> min_price = min(zip(prices.values(), prices.keys()))
>>> min_price
(10.75, 'nnn')
>>> max_price = max(zip(prices.values(), prices.keys()))
>>> max_price
(612.78, 'yyy')
>>> prices_sorted = sorted(zip(prices.values(), prices.keys()))
>>> prices_sorted
[(10.75, 'nnn'), (27.2, 'mmm'), (45.23, 'xxx'), (205.55, 'zzz'), (612.78, 'yyy')]
>>>
>>> prices = {'xxx': 45.23, 'zzz': 45.23}		#值相同时，才会比较键
>>> min(zip(prices.values(), prices.keys()))
(45.23, 'xxx')
>>> max(zip(prices.values(), prices.keys()))
(45.23, 'zzz')

查找两字典的相同点

>>> a = {
	'x': 1,
	'y': 2,
	'z': 3
	}
>>> b = {
	'w': 10,
	'x': 11,
	'y': 2
	}
>>> a.keys() & b.keys()
{'x', 'y'}
>>> a.keys() - b.keys()
{'z'}
>>> a.items() & b.items()
{('y', 2)}
>>> c = {key:a[key] for key in a.keys() - {'z', 'w'}}	#用现有字典删除几个指定键构造新字典
>>> c
{'x': 1, 'y': 2}
>>>

删除序列相同元素并保持顺序

def dedupe(items, key=None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)
def main():
    a = [{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 1, 'y': 2}, {'x': 2, 'y': 4}]
    ret = list(dedupe(a, key=lambda d: (d['x'], d['y'])))
    print(ret)
    ret = list(dedupe(a, key=lambda d: d['x']))
    print(ret)

if __name__ == '__main__':
    main()

Output:
[{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 2, 'y': 4}]		#消除了重复项 {'x': 1, 'y': 2}
[{'x': 1, 'y': 2}, {'x': 2, 'y': 4}]			#消除了所有'x': 1的重复项

命名切片

>>> record = '0123456789011223344556677889900'
>>> SHARES = slice(10,15)		#将切片命名为SHARES
>>> PRICE = slice(16,20)		#将切片命名未PRICE
>>> SHARES
slice(10, 15, None)
>>> record[SHARES]
'01122'
>>> PRICE
slice(16, 20, None)
>>> record[PRICE]
'3445'
>>> a = slice(5, 50, 2)		#定义切片的开始，停止和步长
>>> a
slice(5, 50, 2)
>>> a.start
5
>>> a.stop
50
>>> a.step
2
>>> s = 'HelloWorld'
>>> a.indices(len(s))		#更新a的定义，（5, 50, 2) -> (5, 10, 2)
(5, 10, 2)
>>> for i in range(*a.indices(len(s))):
		print(s[i])
W
r
d

序列中出现次数最多的元素

>>> words = [
	'look', 'into', 'my', 'eyes', 'look', 'into','my','eyes',
	'the', 'eyes','the','eyes','the','eyes','not','around','the',
	'eyes','dont','look', 'around','the','eyes','look','into',
	'my','eyes','you','under'
	]
>>> from collections import Counter
>>> word_counts = Counter(words)		#整合每个元素出现的次数
>>> word_counts
Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, 'not': 1, 'dont': 1, 'you': 1, 'under': 1})
>>> word_counts.most_common(3)		#出现频率最高的3个元素
[('eyes', 8), ('the', 5), ('look', 4)]
>>> word_counts['look']
4
>>> word_counts['look'] += 1			#可以通过加法手动增加计数
>>> word_counts['look']
5
>>> morewords = ['why', 'are','you','not','looking','in','my','eyes']
>>> word_counts.update(morewords)	#也可以通过update方法追加元素计数
>>> word_counts
Counter({'eyes': 9, 'look': 5, 'the': 5, 'my': 4, 'into': 3, 'not': 2, 'around': 2, 'you': 2, 'dont': 1, 'under': 1, 'why': 1, 'are': 1, 'looking': 1, 'in': 1})
>>> a = Counter(words)
>>> b = Counter(morewords)
>>> a
Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, 'not': 1, 'dont': 1, 'you': 1, 'under': 1})
>>> b
Counter({'why': 1, 'are': 1, 'you': 1, 'not': 1, 'looking': 1, 'in': 1, 'my': 1, 'eyes': 1})
>>> c = a + b		#Counter实例可以进行数学运算
>>> c
Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, 'you': 2, 'dont': 1, 'under': 1, 'why': 1, 'are': 1, 'looking': 1, 'in': 1})
>>> d = a - b
>>> d
Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2, 'dont': 1, 'under': 1})
>>>

通过某个关键字排序字典列表

>>> rows = [
	{'fname':'Brian', 'lname': 'Jones', 'uid': 1003},
	{'fname':'David', 'lname': 'Beazley', 'uid': 1002},
	{'fname':'John', 'lname': 'Cleese', 'uid': 1001},
	{'fname':'Big', 'lname': 'Jones', 'uid': 1004}
	]
>>> from operator import itemgetter
>>> rows_by_fname = sorted(rows, key=itemgetter('fname'))	#使用itemgetter()
>>> rows_by_fname
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
>>> rows_by_uid = sorted(rows, key=itemgetter('uid'))
>>> rows_by_uid
[{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]
>>> rows_by_lfname = sorted(rows, key=itemgetter('lname', 'fname'))		#itemgetter支持多个keys
>>> rows_by_lfname
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]
>>> rows_by_fname = sorted(rows, key=lambda r: r['fname'])	#itemgetter可以用lambda表达式代替，但是itemgetter运行快一些
>>> rows_by_fname
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
>>> rows_by_lfname = sorted(rows, key=lambda r: (r['lname'],r['fname']))
>>> rows_by_lfname
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]
>>> min(rows, key=itemgetter('uid'))		#同样适用于min和max
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}
>>> max(rows, key=itemgetter('uid'))
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
>>>

排序不支持原生比较的对象

>>> class User:
		def __init__(self, user_id):
			self.user_id = user_id
		def __repr__(self):
			return 'User({})'.format(self.user_id)
>>> users = [User(23), User(3), User(99)]
>>>> sorted(users, key=lambda u: u.user_id)
[User(3), User(23), User(99)]
>>> from operator import attrgetter
>>> sorted(users, key=attrgetter('user_id'))	#attrgetter可以代替lambda，运行更快且可以多字段比较
[User(3), User(23), User(99)]
>>> min(users, key=attrgetter('user_id'))	#同样适用于min和max
User(3)
>>> max(users, key=attrgetter('user_id'))
User(99)

通过某个字段记录分组

>>> rows = [
	{'addr': 'xxx', 'date': 'xx/xx'},
	{'addr': 'yyy', 'date': 'xx/yy'},
	{'addr': 'zzz', 'date': 'xx/zz'},
	{'addr': 'iii', 'date': 'xx/ii'},
	{'addr': 'jjj', 'date': 'xx/zz'},
	{'addr': 'kkk', 'date': 'xx/zz'},
	{'addr': 'mmm', 'date': 'xx/xx'},
	{'addr': 'nnn', 'date': 'xx/yy'},
	]
>>> from operator import itemgetter
>>> from itertools import groupby
>>> rows.sort(key=itemgetter('date'))		#必须先根据指定字段进行排序，才能做后续分组操作
>>> for date, items in groupby(rows, key=itemgetter('date')):
		print(date)
		for i in items:
			print('', i)

xx/ii
 {'addr': 'iii', 'date': 'xx/ii'}
xx/xx
 {'addr': 'xxx', 'date': 'xx/xx'}
 {'addr': 'mmm', 'date': 'xx/xx'}
xx/yy
 {'addr': 'yyy', 'date': 'xx/yy'}
 {'addr': 'nnn', 'date': 'xx/yy'}
xx/zz
 {'addr': 'zzz', 'date': 'xx/zz'}
 {'addr': 'jjj', 'date': 'xx/zz'}
 {'addr': 'kkk', 'date': 'xx/zz'}
>>>

过滤序列元素

列表推导，如果输入非常大会占用大量内存

>>> mylist = [1, 4, -5, 10, -7, 2, 3, -1]
>>> [n for n in mylist if n > 0]
[1, 4, 10, 2, 3]
>>> [n for n in mylist if n < 0]
[-5, -7, -1]

生成器表达式

>>> pos = (n for n in mylist if n > 0)
>>> pos
<generator object <genexpr> at 0x000002CC5F62A7B0>
>>> a = list(pos)
>>> a
[1, 4, 10, 2, 3]

如果过滤时需要处理异常，可以将过滤代码放到函数中再使用filter()函数，filter()返回一个迭代器，需要用list()转换为列表类型

>>> value = ['1', '2', '-3', '-', '4', 'N/A', '5']
>>> def is_int(val):
		try:
			x = int(val)
			return True
		except ValueError:
			return False
>>> ivals = list(filter(is_int, value))
>>> ivals
['1', '2', '-3', '4', '5']

还可以再过滤时转换数据

>>> mylist
[1, 4, -5, 10, -7, 2, 3, -1]
>>> import math
>>> [math.sqrt(n) for n in mylist if n > 0]
[1.0, 2.0, 3.1622776601683795, 1.4142135623730951, 1.7320508075688772]
>>>

还可以在过滤时将不符合条件的值用新值代替

>>> mylist
[1, 4, -5, 10, -7, 2, 3, -1]
>>> clip_neg = [n if n > 0 else 0 for n in mylist]
>>> clip_neg
[1, 4, 0, 10, 0, 2, 3, 0]
>>> clip_neg = [n if n < 0 else 0 for n in mylist]
>>> clip_neg
[0, 0, -5, 0, -7, 0, 0, -1]

itertools.compress()
先创建Boolean序列，指定哪些元素符合条件，然后compress()函数根据这个序列去选择对应位置为True的元素，compress()返回一个迭代器，需要用list()转换为列表类型

>>> addr = [
	'xxx',
	'yyy',
	'zzz',
	'iii',
	'jjj',
	'kkk',
	'mmm',
	'nnn',
	]
>>> counts = [0, 3, 10, 4, 1, 7, 6, 1]
>>> from itertools import compress
>>> more5 = [n > 5 for n in counts]
>>> more5
[False, False, True, False, False, True, True, False]
>>> list (compress(addr, more5))
['zzz', 'kkk', 'mmm']
>>>

从字典中提取子集

字典推导

>>> prices = {
	'xxx': 45.23,
	'yyy': 612.78,
	'zzz': 205.55,
	'mmm': 37.20,
	'nnn': 10.75
	}
>>> p1 = {key: value for key, value in prices.items() if value > 200}
>>> p1
{'yyy': 612.78, 'zzz': 205.55}
>>> tech_names = {'xxx', 'yyy', 'zzz', 'iii'}
>>> p2 = {key: value for key, value in prices.items() if key in tech_names}
>>> p2
{'xxx': 45.23, 'yyy': 612.78, 'zzz': 205.55}
>>>

映射名称到序列元素

>>> from collections import namedtuple
>>> Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
>>> sub = Subscriber('xxxxx', 'yyyyyy')
>>> sub
Subscriber(addr='xxxxx', joined='yyyyyy')
>>> sub.addr
'xxxxx'
>>> sub.joined
'yyyyyy'
>>> len(sub)
2
>>> addr, joined = sub
>>> addr
'xxxxx'
>>> joined
'yyyyyy'
>>>

用命名元组替代下标操作

下标操作：
>>> def compute_cost(records):
		total = 0.0
		for rec in records:
			total += rec[1] * rec[2]
		return total
命名元组操作：
>>> from collections import namedtuple
>>> Stock = namedtuple('Stock', ['name', 'shares', 'price'])
>>> def compute_cost(records):
		total = 0.0
		for rec in records:
			s = Stock(*rec)
			total += s.shares * s.price		#用命名元组使代码表意更清晰
		return total

命名元组可以作为字典的替代，比字典存储节省内存空间且高效，但命名元组不可更改

>>> Stock = namedtuple('Stock', ['name', 'shares', 'price'])
>>> s = Stock('xxx', 100, 123.45)
>>> s
Stock(name='xxx', shares=100, price=123.45)
>>> s.shares = 75
Traceback (most recent call last):
  File "<pyshell#519>", line 1, in <module>
    s.shares = 75
AttributeError: can't set attribute

可以使用_replace()方法：
>>> s = s._replace(shares = 75)
>>> s
Stock(name='xxx', shares=75, price=123.45)
>>>

转换并同时计算数据

>>> nums = [1, 2, 3, 4, 5]
>>> s = sum(x * x for x in nums)
>>> s
55
>>>

合并多个字典或映射

假设从两个字典中查找，先从a中找，如果找不到再从b中找
>>> a = {'x': 1, 'z': 3}
>>> b = {'y': 2, 'z': 4}
>>> from collections import ChainMap
>>> c = ChainMap(a, b)
>>> c['x']		#从a中找到的
1
>>> c['y']		#从b中找到的
2
>>> c['z']		#从c中找到的
3
>>> c['z'] = 10		#对于字典的更新删除操作总是影响的第一个字典
>>> list(c.values())
[2, 10, 1]
>>> a
{'x': 1, 'z': 10}
>>> b
{'y': 2, 'z': 4}
>>>