python基础-正则表达式
正则
正则表达式是计算机科学的一个概念,正则表通常被用来检索、替换那些符合某个模式(规则)的文本。也就是说使用正则表达式可以在字符串中匹配出你需要的字符或者字符串,甚至可以替换你不需要的字符或者字符串。
'''
import re
#demo1
a=input("输入:")
b=re.search("python",a)#搜寻匹配,匹配是第一个
if bool(b) ==True:
print("ok")
else:
print("no")
#demo2
qq=input("输入:")
b=re.search("[0-9]{5,11}",qq) #(规则,对象)
if b:
print("ok")
else:
print("no")
'''
1.元字符
. ^ $ * + ? {} [] \ | ()
.
'''
>>> re.search(".","\nasfsdfdsaf") #去掉了\n
<_sre.SRE_Match object; span=(1, 2), match='a'>
'''
\d
'''
>>> re.search("\d","asf2sdfdsaf") #查询第一个数字
<_sre.SRE_Match object; span=(3, 4), match='2'>
>>> re.findall("\d","asf2sdf4dsa3f") #查询所有的
['2', '4', '3']
'''
\s
'''
>>> re.findall("\s","\n \t asss")
['\n', ' ', ' ', '\t', ' ', ' ']
'''
\w
'''
>>> re.findall("\w","adwo__55+%?")
['a', 'd', 'w', 'o', '_', '_', '5', '5']
'''
\b
'''
>>> re.findall(r"\bapple\b","apple apple") # r转义
['apple', 'apple']
'''
\.
'''
re.findall(r"\\b",r"55apple\b") #正则表达转义 ,需要加上r 和\
'''
\D、\S、\W、\B
'''
#\D
>>> re.findall(r"\D","123ASDF")
['A', 'S', 'D', 'F']
#\S
>>> re.findall(r"\S","\t \n aaa")
['a', 'a', 'a']
#\W
>>> re.findall(r"\W","\t \n aaa")
['\t', ' ', '\n', ' ']
'''
^
'''
>>> re.findall(r"^adc","adcefg") #adc开头的
['adc']
'''
$
'''
>>> re.findall(r"efg$","adcefg") #efg结尾的
['efg']
'''
{M,N}
'''
>>> re.findall(r"\d{1,3}","22db44ef555g1666")
['22', '44', '555', '166', '6']
'''
{M,}
'''
>>> re.findall(r"\d{2,}","22db44ef555g1666")
['22', '44', '555', '1666']
'''
{,N}
'''
>>> re.findall(r"\d{,2}","22db44ef555g1666") #注意这里最后会加上一个空格
['22', '', '', '44', '', '', '55', '5', '', '16', '66', '']
'''
{N}
'''
>>> re.findall(r"\d{2}","22db44ef555g1666")
['22', '44', '55', '16', '66']
'''
*
'''
>>> re.findall(r"\d*","22db44ef555g1666")
['22', '', '', '44', '', '', '555', '', '1666', '']
'''
+
'''
>>> re.findall(r"\d+","22db44ef555g1666")
['22', '44', '555', '1666']
'''
?
'''
>>> re.findall(r"\d?","22db44ef555g1666")
['2', '2', '', '', '4', '4', '', '', '5', '5', '5', '', '1', '6', '6', '6', '']
>>> re.findall(r"\d*?","22db44ef555g1666") # *是 0到多次 ?号是0到1次 结果就是匹配0次 求交集
['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
>>> re.findall(r"\d+?","22db44ef555g1666") #+号是1到多次 ?号是0到1次 结果就是1次 求交集
['2', '2', '4', '4', '5', '5', '5', '1', '6', '6', '6']
>>> re.findall(r"\d{2,5}?","22db44ef555g1666") #{2,5}是2到5次 ?号是0到1次 最小取值是2次
['22', '44', '55', '16', '66']
'''
[ ]
[ . ? * ( ) {} ]
[0-9]
'''
>>> re.findall(r"[0-9]","22db44ef5") #所有数字
['2', '2', '4', '4', '5']
'''
[^\d]
'''
>>> re.findall(r"[^\d]","22db44ef5AB")
['d', 'b', 'e', 'f', 'A', 'B']
>>> re.findall(r"[^\d]{2}","22db44ef5AB") #非数字次数为2的
['db', 'ef', 'AB']
'''
[a-z]
'''
>>> re.findall(r"[a-zA-Z]","22db44ef5AB") #所有字母
['d', 'b', 'e', 'f', 'A', 'B']
'''
[^a-z]
'''
>>> re.findall(r"[^a-z]{2}","22db44ef5AB")
['22', '44', '5A']
'''
|
'''
'''
A | B
'''
第一种方式:
>>> re.findall(r"b|a","22db44ef5ABa")
['b', 'a']
第二种方式:
>>> re.findall(r"[ba]","22db44ef5ABa")
['b', 'a']
'''
()
'''
>>> re.findall(r"(db)","22db44ef5ABa")
['db']
>>> re.findall(r"(db)*","22db44ef5ABadb")
['', '', 'db', '', '', '', '', '', '', '', '', 'db', '']
'''
2.re模块
re.compile()
'''
>>> re.compile(r"\d")
re.compile('\\d')
>>> a.findall("123kk444")
['1', '2', '3', '4', '4', '4']
'''
match()
'''
>>> print(re.match(r"\d","b123a")) #匹配开头,相当于脱字符
None
'''
search()
findall()
sub()
'''
>>> re.sub("i","o","pythin") #默认替换所有
'python'
>>> re.sub("i","o","pythin pythin pythin pythin",3) #指定次数
'python python python pythin'
'''
group()
star()
end()
span()
'''
>>> c=re.search(r"\d","b35f3")
>>> c.group()
'3'
>>> c.start()
1
>>> c.end()
2
>>> c.span()
(1, 2)
'''