一、python内置数据结构练习
A、过滤列表中的负数
# filter版本
# 生成随机列表
rand_list = [random.randint(-100,100) for x in range(1,10)]
print(rand_list)
# filter配合lambda过滤
rs = filter(lambda x: x >= 0, rand_list)
print(list(rs))
```
```python
# 推导式版本
# 生成随机列表
rand_list = [random.randint(-100, 100) for x in range(1, 10)]
# 重新打乱列表
random.shuffle(rand_list)
print(rand_list)
# 列表推导式
rs = [x for x in rand_list if x >= 0]
print(rs)
B、使用timeit查看表达式执行时间Python3中的timeit模块可以用来测试小段代码的运行时间。
stmt:用于传入要测试时间的代码,可以直接接受字符串的表达式,也可以接受单个变量,也可以接受函数。
传入函数时要把函数申明在当前文件中,然后在stmt = 'func()'执行函数,使用setup = 'from __main__ import func'。
setup:传入stmt的运行环境,比如stmt中使用到的参数、变量,要导入的模块等。
可以写成一行语句,也可以写多行语句,多行语句时要用分号隔开语句。
number:要测试的代码的运行次数,默认100000次。
ipython环境。
import timeit
timeit.timeit(stmt = 'filter(lambda x: x >= 0, rand_list)', setup='rand_list = [92, 7, -5, 59, -90, 14, -63, 50, -74]')
Out[24]: 0.3463938022929085
timeit.timeit(stmt = '[x for x in rand_list if x >= 0]', setup='rand_list = [92, 7, -5, 59, -90, 14, -63, 50, -74]')
Out[25]: 0.8586269034875329
C、过滤出字典中value大于60的item
# ALL_ENG_NAMES = ['fAaliyah', 'mAaron', 'fAarushi', ....]
from englishname import ALL_ENG_NAMES
end_index = len(ALL_ENG_NAMES)-1
# 生成随机字典
rand_dict = {ALL_ENG_NAMES[random.randint(0, end_index)]: random.randint(0, 100) for x in range(1, 10)}
print(rand_dict)
# 字典推导式
rs = {key: value for key, value in rand_dict.items() if value > 60}
print(rs)
D、过滤出集合中能被3整除的元素
# 生成随机集合
rand_set = {random.randint(0, 10) for x in range(1, 10)}
print(rand_set)
# 集合推导式
rs = {x for x in rand_set if x % 3 == 0}
print(rs)
E、元组推导式
使用元组推导式,结果为generator类型,需要转化为list,一般也不使用。
rand_tuple = (random.randint(-100,100) for x in range(1,10))
print(rand_tuple) # at 0x000000ACAB18B150>
print(type(rand_tuple)) #
print(list(rand_tuple)) # [44, -72, 58, -58, -34, 53, 81, -67, -39]
F、有名元组
元组有序,可以使用索引进行元素访问,但是使用数字下标(magic number),程序可读性不好。
可以使用常量(但也能被修改),或者枚举做到见到名知意。
# 使用Enum
from enum import Enum
class Point(Enum):
X = 0
Y = 1
p1 = (10, 10)
p2 = (20, 20)
print(p1[Point.X.value], p1[Point.Y.value])
不过collections包中有一个namedtuple,相当于创建一种类型,直接使用.进行成员访问。
# collections中的namedtuple
from collections import namedtuple
Point = namedtuple('Ponit', ['x', 'y'])
p1 = Point(x=10, y=10)
p2 = Point(x=20, y=20)
print(p1.x, p1.y) #10 10
G、统计序列中元素出现频度(前N名)
通过map遍历,增加value值。
list_data = [random.randint(1,5) for x in range(1,30)]
# 统计结果map,根据data生成key,value设置为0
rs_dict = dict.fromkeys(list_data, 0)
for x in list_data:
rs_dict[x] += 1
print(rs_dict)
# {5: 5, 4: 5, 1: 6, 2: 9, 3: 4}
H、通过collections下Counter统计
from collections import Counter
data_list = [random.randint(1,5) for x in range(1,30)]
# 使用map保存key出现的次数(Count)
rs = Counter(data_list)
print(rs)
# Counter({5: 8, 2: 8, 4: 6, 1: 6, 3: 1})
# 获取前N
rs = rs.most_common(3)
print(rs)
# [(5, 9), (3, 8), (4, 6)]
I、统计文件中出现频度最高的10个单词
韭菜大神李笑来,在《托福核心词汇》中用了个办法统计高频词汇,把历年托福真题的单词全部扫出来,统计出现频度,取出常用词,留下高频托福词汇。
用python可以很容易搞定。
import re
from collections import Counter
file = open('CET.txt', encoding='utf-8')
data = file.read()
list = re.split(r'\W+', data)
rs = Counter(list).most_common(10)
print(rs)
# [('the', 252), ('to', 126), ('of', 101), ('a', 98), ('in', 76), ('and', 63), ('you', 48), ('on', 47), ('s', 44), ('is', 39)]
J、使用字典实现switch
def func1():
print("func1() called")
def func2():
print("func2() called")
def func3():
print("func3() called")
func_dict = {1: func1, 2: func2, 3:func3}
func = func_dict.get(3)
func()
K、根据key对字典进行排序
由于dict是无序存储,有时需要根据key/value排序,对前N项进行输出,这里对dict排序也是常用操作。
from englishname import ALL_ENG_NAMES
from collections import OrderedDict
end_index = len(ALL_ENG_NAMES)-1
# 生成随机字典
rand_dict = {ALL_ENG_NAMES[random.randint(0, end_index)]: random.randint(0,100) for x in range(1,10)}
print(rand_dict)
# 根据key进行sorted
rs = OrderedDict(sorted(rand_dict.items(), key = lambda x: x[0]))
# 根据value进行sorted
rs = OrderedDict(sorted(rand_dict.items(), key = lambda x: x[1]))
# 根据key长度进行sorted
rs = OrderedDict(sorted(rand_dict.items(), key = lambda x: len(x[0])))
print(rs)
L、根据value对字典进行排序
from englishname import ALL_ENG_NAMES
from collections import OrderedDict
end_index = len(ALL_ENG_NAMES)-1
# 生成随机字典
rand_dict = {ALL_ENG_NAMES[random.randint(0, end_index)]: random.randint(0,100) for x in range(1,10)}
print(rand_dict)
# {'mLennon': 21, 'fRyleigh': 57, 'mQuinn': 17, 'mCampbell': 36, 'mEnrique': 97, 'mChris': 56, 'fCoral': 97, 'fMadyson': 39, 'mMarcos': 0}
# 使用zip转为(value, key)的元组列表,然后排序
zip_dict = zip(rand_dict.values(), rand_dict.keys())
rs = sorted(zip_dict)
print(rs)
# [(0, 'mMarcos'), (17, 'mQuinn'), (21, 'mLennon'), (36, 'mCampbell'), (39, 'fMadyson'), (56, 'mChris'), (57, 'fRyleigh'), (97, 'fCoral'), (97, 'mEnrique')]
#使用OrderedDict,根据顺序存放dict元素
ordered_dict = OrderedDict()
for x in rs:
ordered_dict[x[1]] = x[0]
print(dict(ordered_dict))
# {'mMarcos': 0, 'mQuinn': 17, 'mLennon': 21, 'mCampbell': 36, 'fMadyson': 39, 'mChris': 56, 'fRyleigh': 57, 'fCoral': 97, 'mEnrique': 97}
M、快速寻找多个字典中公共键
使用传统的遍历,对a、b、c三个字典进行遍历,找出公共键。
from random import sample
rand_dict_a = {x: random.randint(1, 4) for x in sample('ABCDEFG', random.randint(3, 6))}
rand_dict_b = {x: random.randint(1, 4) for x in sample('ABCDEFG', random.randint(3, 6))}
rand_dict_c = {x: random.randint(1, 4) for x in sample('ABCDEFG', random.randint(3, 6))}
rand_dict_list = [rand_dict_a, rand_dict_b, rand_dict_c]
for rand_dict in rand_dict_list:
print(rand_dict)
rs = list()
for key in rand_dict_a:
if key in rand_dict_b and key in rand_dict_c:
rs.append(key)
print(rs)
'''{'C': 3, 'F': 2, 'A': 3, 'E': 4}{'G': 2, 'C': 4, 'D': 2, 'E': 1, 'A': 1}{'D': 3, 'F': 1, 'A': 3}['A']'''
使用set的求交集方式过滤。
from random import sample
rand_dict_a = {x: random.randint(1, 4) for x in sample('ABCDEFG', random.randint(3, 6))}
rand_dict_b = {x: random.randint(1, 4) for x in sample('ABCDEFG', random.randint(3, 6))}
rand_dict_c = {x: random.randint(1, 4) for x in sample('ABCDEFG', random.randint(3, 6))}
rand_dict_list = [rand_dict_a, rand_dict_b, rand_dict_c]
for rand_dict in rand_dict_list:
print(rand_dict)
# dict.keys会返回key的集合
# 使用集合求交集的方法,进行去重
print(rand_dict_a.keys() & rand_dict_b.keys() & rand_dict_c.keys())
对于更多dict(N),可以使用map-reduce进行过滤。
from random import sample
from functools import reduce
rand_dict_a = {x: random.randint(1, 4) for x in sample('ABCDEFG', random.randint(3, 6))}
rand_dict_b = {x: random.randint(1, 4) for x in sample('ABCDEFG', random.randint(3, 6))}
rand_dict_c = {x: random.randint(1, 4) for x in sample('ABCDEFG', random.randint(3, 6))}
rand_dict_list = [rand_dict_a, rand_dict_b, rand_dict_c]
for rand_dict in rand_dict_list:
print(rand_dict)
rs = reduce(lambda x, y: x & y, map(lambda x: x.keys(), rand_dict_list))
print(rs)
N、队列
参数字,随机产生[0, 100]间的数字,用户输入数值进行猜测,提醒用户该数字大了、小了,支持历史猜测查询。
from collections import deque
NUM = random.randint(0, 100)
queue = deque([], 5)
def guess(guess_num):
if guess_num < NUM:
print("输入数字%s小于答案数字,请重新尝试" % guess_num)
elif guess_num > NUM:
print("输入数字%s大于答案数字,请重新尝试" % guess_num)
else:
print("恭喜你,答案%s正确" % guess_num)
return True
return False
while True:
line = input("请输入所猜测数字:")
if line.isdigit():
guess_num = int(line)
queue.append(guess_num)
if guess(guess_num):
break
elif line == 'history' or line == '-h':
print(list(queue))
O、使用pickle保存对象
from englishname import ALL_ENG_NAMES
import pickle
pickle.dump(ALL_ENG_NAMES, open('names.txt', 'wb'))
rs = pickle.load(open('names.txt', 'rb'))
print(rs)
二、python字符串练习
A、拆分字符串
字符串包含进程信息,中间通过空格,tab进行分隔,可以使用string内置的split()进行分隔。
process_str = 'X250 21812 5620 cons0 14:41:44 /d/java-dev/Python36/Scripts/ipython'
rs = process_str.split()
print(rs)
# ['X250', '21812', '5620', 'cons0', '14:41:44', '/d/java-dev/Python36/Scripts/ipython']
B、拆分含有多种分隔符的字符串
1、连续使用str.split(),每次处理一种分隔符
seps = '@-.'
mails = ['isisiwish-36000-stmp@vip--vip.qq.com']
for sep in seps:
tmp = list()
list(map(lambda x: tmp.extend(x.split(sep)), mails))
mails = tmp
mails = [x for x in mails if x]
print(mails)
# ['isisiwish', '36000', 'stmp', 'vip', 'vip', 'qq', 'com']
2、使用re模块的split()模块,对分隔字符进行处理
import re
mail = 'isisiwish-36000-stmp@vip--vip.qq.com'
rs = re.split(r"[-.@]+", mail)
print(rs)
# ['isisiwish', '36000', 'stmp', 'vip', 'vip', 'qq', 'com']
C、判断字符串开头结尾
import os
rs_list = os.listdir()
rs_list = [x for x in rs_list if x.endswith('.py')]
print(rs_list)
参考: