re是python3中的一个内置模块,用于处理python的正则表达式相关的内容。
Python3代码例子
import re
text = "sdfreer s33@qq.com rerersdfer 3435343434"
result = re.findall(r"\w+@\w+\.\w+", text, re.ASCII)
print(result)
# findall() 是固定的文本
result1 = re.findall(r"\d{10}", text) # 里面的数字10得根据
print(result1)
结果如下:
['s33@qq.com']
['3435343434']
通过一个例子来做正则表达式
import re
email = input("邮箱: ")
isValid = re.match(r"^\w+@\w+\.\w+$", email)
if isValid:
print("格式合法")
else:
print("格式错误")
正则表达式里的相关库
# 固定文本
import re
text = "aaa aaab aaaaac"
dataLst = re.findall(r"aaaa", text)
print(dataLst)
# 结果
['aaaa']
# 含特定字符
import re
text = "aaa aaab aaaaac"
dataLst = re.findall(r"aa[ab]", text)
print(dataLst)
# 结果
['aaa', 'aaa', 'aaa']
import re
text = "aaa aaab aaaaac"
dataLst = re.findall(r"aa[a-z]", text)
print(dataLst)
# 输出
['aaa', 'aaa', 'aaa', 'aac']
import re
text = "aaa1 aaab2 aaa4c"
dataLst = re.findall(r"aa[0-9]", text)
print(dataLst)
# 输出
['aa1', 'aa4']
import re
text = "aaa55651 aaab23434 aaa434343ererec"
dataLst = re.findall(r"a\d{3}", text)
print(dataLst)
# 输出
['a556', 'a434']
# 动态的提取
import re
text = "aaa55651 aaab23434 aaa434343ererec"
dataLst = re.findall(r"a\d{3,}", text)
print(dataLst)
# 输出
['a55651', 'a434343']
import re
text = "aaa55651 aaab23434 aaa434343ererec"
dataLst = re.findall(r"a\d{3,5}", text)
print(dataLst)
# 输出
['a55651', 'a43434']
import re
text = "a55651 ab23434 a434343ererec"
# ?代表 0个或1个
dataLst = re.findall(r"a\d?", text)
print(dataLst)
# 输出
['a5', 'a', 'a4']
import re
text = "a55651 ab23434 a434343ererec"
# + 代表 1个或n个
dataLst = re.findall(r"a\d+", text)
print(dataLst)
# 输出
['a55651', 'a434343']
import re
text = "a55651 ab23434 a434343ererec"
# * 代表 0个或n个
dataLst = re.findall(r"a\d*", text)
print(dataLst)
# 输出
['a55651', 'a', 'a434343']
import re
text = "a55651 ab23434 a434343ererec中"
# \w 代表 字母,数字,下划线
dataLst = re.findall(r"a\w*", text)
print(dataLst)
# 输出
['a55651', 'ab23434', 'a434343ererec中']
import re
text = "a55651 ab23434 a43ererec中"
# . 代表除换行以外任意字符
dataLst = re.findall(r".{2}", text)
print(dataLst)
# 提取字符串中的数字
import re
text = "身份证号222333199911009988, 其他人身份证号22255519991100998X"
res1 = re.findall('(\d{6}(\d{4})\d{7}[\dX])', text)
print(res1)
res2 = re.findall('\d{6}(\d{4})\d{7}[\dX]', text)
print(res2)
# 输出结果
[('222333199911009988', '1999'), ('22255519991100998X', '1999')]
['1999', '1999']
给个赞呗~