找出一个序列中出现次数最多的元素
代码片段
def countTest():
words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]
print(Counter(words)) # Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, 'not': 1, "don't": 1, "you're": 1, 'under': 1})
wordsCount = Counter(words)
print("update:",wordsCount.update(words))
print(wordsCount.most_common(3))
print(wordsCount['eyes']) # 8
对象列表排序
代码片段
def dictListSorted():
# 对象列表排序
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
#[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004},
# {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
# {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
# {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
print(sorted(rows,key = itemgetter('fname')))
print(sorted(rows,key = lambda r:r['fname']))
# [{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
# {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
# {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
# {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]
print(sorted(rows,key = itemgetter('uid')))
print(sorted(rows,key = lambda r:r['uid']))
# [{'fname': 'Big', 'lname': 'Jones', 'uid': 1004},
# {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
# {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
# {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
print(sorted(rows,key = itemgetter('fname','uid')))
print("min:",min(rows,key=itemgetter('fname'))) # min: {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
print("max:",max(rows,key=itemgetter('uid'))) # max: {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
排序不支持比较的对象列表
代码片段
class User:
def __init__(self,user_id):
self.user_id = user_id
def __repr__(self):
return 'User({})'.format(self.user_id) # 重写比较方法
def sortNotCompare():
users = [User(30),User(25),User(18)]
print(users) # [User(30), User(25), User(18)]
print(sorted(users,key = lambda u :u.user_id)) # [User(18), User(25), User(30)]
print(sorted(users,key=attrgetter('user_id'))) # [User(18), User(25), User(30)]
print(sorted(users,key=lambda k:k.user_id)) # [User(18), User(25), User(30)]
print(min(users,key = attrgetter('user_id'))) # User(18)
print(max(users,key= lambda k :k.user_id)) # User(30)
通过某个字段将记录分组
代码片段
def groupbyTest():
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
rows.sort(key = itemgetter('date')) # 先排序 sort直接操作了元数据
for date,item in groupby(rows,key=itemgetter('date')):
print(date)
for i in item:
print("",i)
可迭代对象筛选
def filterLogic(val):
if val>0: # 过滤器
return True
else:
return False
def filterList():
nums = [1,2,3,4,-4,8,-7,12,4,-9]
filterList = [i for i in nums if i>0] # 内存占用较大
print(filterList) # [1, 2, 3, 4, 8, 12, 4]
filterList = filter(filterLogic,nums) # 过滤器,可迭代对象
print(list(filterList)) # [1, 2, 3, 4, 8, 12, 4]
def compressList():
addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK',
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]
resultList = [n>5 for n in counts]
# 这里的关键点在于先创建一个Boolean 序列,指示哪些元素复合条件。然后
# compress() 函数根据这个序列去选择输出对应位置为True 的元素
print(list(compress(addresses,resultList)))
字典过滤
代码片段
def filterDict():
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
filterDic = {key:value for key,value in prices.items() if value>100}
print(filterDic) # {'AAPL': 612.78, 'IBM': 205.55}
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
filterDic = {key: value for key, value in prices.items() if key in tech_names}
print(filterDic) # {'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}