Python3-cookbook- 笔记1 - 数据结构和算法

最新推荐文章于 2024-11-01 23:55:42 发布

weixin_33814685

最新推荐文章于 2024-11-01 23:55:42 发布

阅读量137

点赞数

文章标签：数据结构与算法 python

原文链接：https://juejin.im/post/5b3de94de51d45191e0cae6c

版权

元祖或者数组赋值

data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
name , shares , price , (year,month,day) = data
print(name , shares , price ,year,month,day)
# ACME 50 91.1 2012 12 21
复制代码

查找最大或者最小的N个元素

import heapq

nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3,nums)) # [42, 37, 23] 
print(heapq.nsmallest(3,nums)) # [-4, 1, 2]
# 两个函数都能接受一个关键字参数，用于更复杂的数据结构中：

portfolio = [
    {'name': 'IBM', 'shares': 100, 'price': 91.1},
    {'name': 'AAPL', 'shares': 50, 'price': 543.22},
    {'name': 'FB', 'shares': 200, 'price': 21.09},
    {'name': 'HPQ', 'shares': 35, 'price': 31.75},
    {'name': 'YHOO', 'shares': 45, 'price': 16.35},
    {'name': 'ACME', 'shares': 75, 'price': 115.65}
]

cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])

print(cheap)
print(expensive)
# [{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}]
# [{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]

复制代码

下面的类利用 heapq 模块实现了一个简单的优先级队列

class PriorityQueue:
	def __init__(self):
		self._queue = []
		self._index = 0

	def push(self,item,priority):
		heapq.heappush(self._queue,(-priority,self._index,item))
		self._index += 1

	def pop(self):
		return heapq.heappop(self._queue)[-1]

	def __len__(self):
		return len(self._queue)

	def pop_last(self,last):
		return heapq.heappop(self._queue)[-last]


class Item:
	def __init__(self,name):
		self.name = name

	def __repr__(self):
		return 'Item({!r})'.format(self.name)
# 使用
q = PriorityQueue()
q.push(Item('foo'),1)
q.push(Item('bar'),5)
q.push(Item('spam'),4)
q.push(Item('grok'),1)

print(q.pop())
print(q.pop())
print(q.pop())
print(q.pop())

# Item('bar')
# Item('spam')
# Item('foo')
# Item('grok')
复制代码

字典排序

from collections import OrderedDict

d = OrderedDict()
d['a'] = 1
d['b'] = 2
d['c'] = 3

for k in d:
	print(k , d[k])

# a 1
# b 2
# c 3
复制代码

字典的运算

通常需要使用 zip() 函数先将键和值反转过来

min_price = min(zip(prices.values(), prices.keys()))
# min_price is (10.75, 'FB')
max_price = max(zip(prices.values(), prices.keys()))
# max_price is (612.78, 'AAPL')

prices_sorted = sorted(zip(prices.values(), prices.keys()))
# prices_sorted is [(10.75, 'FB'), (37.2, 'HPQ'),
#                   (45.23, 'ACME'), (205.55, 'IBM'),
#                   (612.78, 'AAPL')]
复制代码

命名切片

如果你的程序包含了大量无法直视的硬编码切片

record = '....................100 .......513.25 ..........'
cost = int(record[20:23]) * float(record[31:37])
SHARES = slice(20, 23)
PRICE = slice(31, 37)
cost = int(record[SHARES]) * float(record[PRICE])
复制代码

序列中出现次数最多的元素

words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts = Counter(words)


# 出现频率最高的3个单词
top_three = word_counts.most_common(3)
print(top_three)

# [('eyes', 8), ('the', 5), ('look', 4)]

复制代码

通过某个关键字排序一个字典列表

from operator import itemgetter

rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]


rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))
print(rows_by_fname)
print(rows_by_uid)

# [{'lname': 'Jones', 'uid': 1004, 'fname': 'Big'}, {'lname': 'Jones', 'uid': 1003, 'fname': 'Brian'}, {'lname': 'Beazley', 'uid': 1002, 'fname': 'David'}, {'lname': 'Cleese', 'uid': 1001, 'fname': 'John'}]
# [{'lname': 'Cleese', 'uid': 1001, 'fname': 'John'}, {'lname': 'Beazley', 'uid': 1002, 'fname': 'David'}, {'lname': 'Jones', 'uid': 1003, 'fname': 'Brian'}, {'lname': 'Jones', 'uid': 1004, 'fname': 'Big'}]

# itemgetter() 函数也支持多个 keys，比如下面的代码

rows_by_lfname = sorted(rows, key=itemgetter('lname','fname'))
print(rows_by_lfname)
# [{'lname': 'Beazley', 'fname': 'David', 'uid': 1002}, {'lname': 'Cleese', 'fname': 'John', 'uid': 1001}, {'lname': 'Jones', 'fname': 'Big', 'uid': 1004}, {'lname': 'Jones', 'fname': 'Brian', 'uid': 1003}]

复制代码

排序不支持原生比较的对象

from operator import attrgetter

class User:
    def __init__(self, user_id):
        self.user_id = user_id

    def __repr__(self):
        return 'User({})'.format(self.user_id)

users = [User(23), User(3), User(99)]
sorted(users, key=attrgetter('user_id'))
复制代码

通过某个字段将记录分组

from operator import itemgetter
from itertools import groupby

rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

# 现在假设你想在按 date 分组后的数据块上进行迭代。为了这样做，你首先需要按照指定的字段(这里就是 date )排序， 然后调用 itertools.groupby() 函数：
# Sort by the desired field first
rows.sort(key=itemgetter('date'))

for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print(' ', i)
# 07/01/2012
#   {'address': '5412 N CLARK', 'date': '07/01/2012'}
#   {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
# 07/02/2012
#   {'address': '5800 E 58TH', 'date': '07/02/2012'}
#   {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
#   {'address': '1060 W ADDISON', 'date': '07/02/2012'}
# 07/03/2012
#   {'address': '2122 N CLARK', 'date': '07/03/2012'}
# 07/04/2012
#   {'address': '5148 N CLARK', 'date': '07/04/2012'}
#   {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}

复制代码

从字典中提取子集

prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
# Make a dictionary of all prices over 200
p1 = {key: value for key, value in prices.items() if value > 200}
# Make a dictionary of tech stocks
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
p2 = {key: value for key, value in prices.items() if key in tech_names}

复制代码

映射名称到序列元素

from collections import namedtuple

Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('jonesy@example.com', '2012-10-19')
print(sub.addr)
print(sub.joined)
复制代码

转载于:https://juejin.im/post/5b3de94de51d45191e0cae6c