元祖或者数组赋值
data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
name , shares , price , (year,month,day) = data
print(name , shares , price ,year,month,day)
# ACME 50 91.1 2012 12 21
复制代码
查找最大或者最小的N个元素
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3,nums)) # [42, 37, 23]
print(heapq.nsmallest(3,nums)) # [-4, 1, 2]
# 两个函数都能接受一个关键字参数,用于更复杂的数据结构中:
portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
print(cheap)
print(expensive)
# [{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}]
# [{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]
复制代码
下面的类利用 heapq 模块实现了一个简单的优先级队列
class PriorityQueue:
def __init__(self):
self._queue = []
self._index = 0
def push(self,item,priority):
heapq.heappush(self._queue,(-priority,self._index,item))
self._index += 1
def pop(self):
return heapq.heappop(self._queue)[-1]
def __len__(self):
return len(self._queue)
def pop_last(self,last):
return heapq.heappop(self._queue)[-last]
class Item:
def __init__(self,name):
self.name = name
def __repr__(self):
return 'Item({!r})'.format(self.name)
# 使用
q = PriorityQueue()
q.push(Item('foo'),1)
q.push(Item('bar'),5)
q.push(Item('spam'),4)
q.push(Item('grok'),1)
print(q.pop())
print(q.pop())
print(q.pop())
print(q.pop())
# Item('bar')
# Item('spam')
# Item('foo')
# Item('grok')
复制代码
字典排序
from collections import OrderedDict
d = OrderedDict()
d['a'] = 1
d['b'] = 2
d['c'] = 3
for k in d:
print(k , d[k])
# a 1
# b 2
# c 3
复制代码
字典的运算
通常需要使用 zip() 函数先将键和值反转过来
min_price = min(zip(prices.values(), prices.keys()))
# min_price is (10.75, 'FB')
max_price = max(zip(prices.values(), prices.keys()))
# max_price is (612.78, 'AAPL')
prices_sorted = sorted(zip(prices.values(), prices.keys()))
# prices_sorted is [(10.75, 'FB'), (37.2, 'HPQ'),
# (45.23, 'ACME'), (205.55, 'IBM'),
# (612.78, 'AAPL')]
复制代码
命名切片
如果你的程序包含了大量无法直视的硬编码切片
record = '....................100 .......513.25 ..........'
cost = int(record[20:23]) * float(record[31:37])
SHARES = slice(20, 23)
PRICE = slice(31, 37)
cost = int(record[SHARES]) * float(record[PRICE])
复制代码
序列中出现次数最多的元素
words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts = Counter(words)
# 出现频率最高的3个单词
top_three = word_counts.most_common(3)
print(top_three)
# [('eyes', 8), ('the', 5), ('look', 4)]
复制代码
通过某个关键字排序一个字典列表
from operator import itemgetter
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))
print(rows_by_fname)
print(rows_by_uid)
# [{'lname': 'Jones', 'uid': 1004, 'fname': 'Big'}, {'lname': 'Jones', 'uid': 1003, 'fname': 'Brian'}, {'lname': 'Beazley', 'uid': 1002, 'fname': 'David'}, {'lname': 'Cleese', 'uid': 1001, 'fname': 'John'}]
# [{'lname': 'Cleese', 'uid': 1001, 'fname': 'John'}, {'lname': 'Beazley', 'uid': 1002, 'fname': 'David'}, {'lname': 'Jones', 'uid': 1003, 'fname': 'Brian'}, {'lname': 'Jones', 'uid': 1004, 'fname': 'Big'}]
# itemgetter() 函数也支持多个 keys,比如下面的代码
rows_by_lfname = sorted(rows, key=itemgetter('lname','fname'))
print(rows_by_lfname)
# [{'lname': 'Beazley', 'fname': 'David', 'uid': 1002}, {'lname': 'Cleese', 'fname': 'John', 'uid': 1001}, {'lname': 'Jones', 'fname': 'Big', 'uid': 1004}, {'lname': 'Jones', 'fname': 'Brian', 'uid': 1003}]
复制代码
排序不支持原生比较的对象
from operator import attrgetter
class User:
def __init__(self, user_id):
self.user_id = user_id
def __repr__(self):
return 'User({})'.format(self.user_id)
users = [User(23), User(3), User(99)]
sorted(users, key=attrgetter('user_id'))
复制代码
通过某个字段将记录分组
from operator import itemgetter
from itertools import groupby
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# 现在假设你想在按 date 分组后的数据块上进行迭代。为了这样做,你首先需要按照指定的字段(这里就是 date )排序, 然后调用 itertools.groupby() 函数:
# Sort by the desired field first
rows.sort(key=itemgetter('date'))
for date, items in groupby(rows, key=itemgetter('date')):
print(date)
for i in items:
print(' ', i)
# 07/01/2012
# {'address': '5412 N CLARK', 'date': '07/01/2012'}
# {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
# 07/02/2012
# {'address': '5800 E 58TH', 'date': '07/02/2012'}
# {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
# {'address': '1060 W ADDISON', 'date': '07/02/2012'}
# 07/03/2012
# {'address': '2122 N CLARK', 'date': '07/03/2012'}
# 07/04/2012
# {'address': '5148 N CLARK', 'date': '07/04/2012'}
# {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}
复制代码
从字典中提取子集
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
# Make a dictionary of all prices over 200
p1 = {key: value for key, value in prices.items() if value > 200}
# Make a dictionary of tech stocks
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
p2 = {key: value for key, value in prices.items() if key in tech_names}
复制代码
映射名称到序列元素
from collections import namedtuple
Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('jonesy@example.com', '2012-10-19')
print(sub.addr)
print(sub.joined)
复制代码