正则表达式
基本用法
import re
text = '123_abc \n@coom'
print(re.search(r'\d', text)) # 识别一个数字
<re.Match object; span=(0, 1), match=‘1’>
import re
text = '123_abc \n@coom'
print(re.search(r'\d*', text)) # 克林闭包,0次或多次
print(re.search(r'\d*?', text)) # 非贪婪
print(re.search(r'\d+', text)) # 正闭包,1次或多次
print(re.search(r'\D+', text)) # 非数字
<re.Match object; span=(0, 3), match=‘123’>
<re.Match object; span=(0, 0), match=‘’>
<re.Match object; span=(0, 3), match=‘123’>
<re.Match object; span=(3, 14), match=‘_abc \n@coom’>
import re
text = '123_abc \n@coom'
print(re.search(r'\w+', text)) # 字母、数字、下划线
print(re.search(r'\W+', text)) # 非~
print(re.search(r'\s+', text)) # 空白符号
print(re.search(r'\S+', text))
<re.Match object; span=(0, 7), match=‘123_abc’>
<re.Match object; span=(7, 10), match=’ \n@‘>
<re.Match object; span=(7, 9), match=’ \n’>
<re.Match object; span=(0, 7), match=‘123_abc’>
import re
text = '123_abc \n@coom'
print(re.search(r'.*', text)) # 所有字符,除了\n \r
print(re.search(r'^123.*\n@coom$', text)) # ^匹配开头、$匹配结尾(注意$要放在最后)
print(re.search(r'[@m456]', text)) # 匹配中括号内任意一个字符
print(re.search(r'[^@m456]', text)) # 匹配非括号内的任意字符
<re.Match object; span=(0, 8), match=‘123_abc ‘>
<re.Match object; span=(0, 14), match=‘123_abc \n@coom’>
<re.Match object; span=(9, 10), match=’@’>
<re.Match object; span=(0, 1), match=‘1’>
import re
text = '123_abc \n@coom'
print(re.search(r'o{2,10}', text)) # 匹配2-10个o
s = re.search(r'(\d+).*?([a-z]+).*', text) # 匹配括号内的内容
print(s.group(0)) # group(0)代表匹配到的整个字符串
print(s.group(1))
print(s.group(2))
print(s.groups())
<re.Match object; span=(11, 13), match=‘oo’>
123_abc
123
abc
(‘123’, ‘abc’)
正则表达式前r的作用
import re
text = '123\\n'
# python解释器先把'\\n'变为'\n',到正则表达式它认为要匹配一个换行符,而上式没有换行符,返回None
print(re.search('\\n', text)) # \\n -> \n -> 换行符
# 前面添加r是告诉python解释器按原字符串处理,所以到正则表达式还是'\\n'也就是一个'\'和一个'n'两个字符
print(re.search(r'\\n', text)) # \\n -> \n(代表要匹配\和n两个符号)
# 更为常用是用在文件格式
text = 'F:\\python\\regular' # 'F:\python\regular'
# print(re.search('\\', text)) # 这样写会报错
print(re.search('\\\\', text)) # 前面不加r要用四个反斜杠匹配一个反斜杠
print(re.search(r'\\', text))
None
<re.Match object; span=(3, 5), match=‘\n’>
<re.Match object; span=(2, 3), match=‘\’>
<re.Match object; span=(2, 3), match=‘\’>
常用方法
import re
text = '2515160189@qq.com'
print(re.match(r'\d+', text)) # match从头开始匹配,若开头不匹配直接返回None
print(re.match(r'\D+', text))
text = 'talk is cheap, show me the code'
pattern = re.compile(r'\w*[ts]\w*') # compile 就是事先把规则定好,跟直接写在search、match里没什么区别
print(re.findall(pattern, text)) # 找到所有匹配串,这里就是找到所有含有t或s的单词
print(re.sub(pattern, '123', text)) # 用123替换所有含有t或s的单词
print(text)
<re.Match object; span=(0, 10), match=‘2515160189’>
None
[‘talk’, ‘is’, ‘show’, ‘the’]
123 123 cheap, 123 me 123 code
talk is cheap, show me the code