正则学习笔记2

. 匹配除换行符之外的任意一个字符
但是在DOTALL模式中可以匹配换行符

>>> import re
>>> re.match(r".","abc")
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> re.match(r"..","abc")
<_sre.SRE_Match object; span=(0, 2), match='ab'>
>>> re.match(r"....","abc")
>>> print(re.match(r"....","abc"))
None
>>> print(re.match(r"..","a\nc"))
None
>>> print(re.match(r"..","a\nc",re.DOTALL))
<_sre.SRE_Match object; span=(0, 2), match='a\n'>
>>>
>>> print(re.match(r"\\","\a\nc",re.DOTALL))
None
>>> print(re.match(r"\\","\\a\nc",re.DOTALL))
<_sre.SRE_Match object; span=(0, 1), match='\\'>
>>>
>>>
>>>
>>> print(re.match(r"[abc]","axxx"))
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> print(re.match(r"[abc]","bxxx"))
<_sre.SRE_Match object; span=(0, 1), match='b'>
>>> print(re.match(r"[abc]","cxxx"))
<_sre.SRE_Match object; span=(0, 1), match='c'>
>>>
>>>> print(re.match(r"[^abc]","cxxx"))
None
>>> print(re.match(r"[^abc]","hxxx"))
<_sre.SRE_Match object; span=(0, 1), match='h'>

re.DOTALL------切换到多行匹配模式
[]:匹配括号中出现的任意单个字符是或的关系
[^]:不匹配方括号中列出的单个字符

>>> print(re.search(r"abc","abc"))
<_sre.SRE_Match object; span=(0, 3), match='abc'>
>>> print(re.search(r"^abc","ssssssabc"))
None

^在方括号外面表示从头开始匹配

>>> print(re.match(r"\d","123"))
<_sre.SRE_Match object; span=(0, 1), match='1'>
>>> print(re.match(r"\d+","123"))
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> print(re.match(r"\d*","123"))
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> print(re.match(r"\d*","a123"))
<_sre.SRE_Match object; span=(0, 0), match=''>
>>> print(re.match(r"\d*?","a123"))
<_sre.SRE_Match object; span=(0, 0), match=''>
>>> print(re.match(r"\d*?","123"))
<_sre.SRE_Match object; span=(0, 0), match=''>
>>> print(re.match(r"\d+?","123"))
<_sre.SRE_Match object; span=(0, 1), match='1'>
>>> print(re.search(r"\d+","abc123dee"))
<_sre.SRE_Match object; span=(3, 6), match='123'>
>>> re.search(r"\d+","abc123dee").group()
'123'
>>> re.search(r"\d{2}","abc123dee").group()
'12'
>>> re.search(r"\d*","abc123dee").group()
''
>>> re.findall(r"\d+","1a12b123c1234d")
['1', '12', '123', '1234']
>>> max(re.findall(r"\d+","1a12b123c1234d"))
'1234'
>>> max(re.findall(r"\d+","5a12b123c1234d"))
'5'
>>> re.findall(r"\d+","1ab2bc3de4fA")
['1', '2', '3', '4']
>>> re.findall(r"\d+","1ab12bc123de1234fA")
['1', '12', '123', '1234']
>>> re.findall(r"\[a-zA-Z]+","1ab12bc123de1234fA")
[]
>>> re.findall(r"[a-zA-Z]+","1ab12bc123de1234fA")
['ab', 'bc', 'de', 'fA']

\b:匹配一个单词的边界,也就是指单词和空格间的位置。如’st’,可以匹配’a test of’中的’st’,但不匹配’tester’中的’st’

>>> re.findall(r"[A-Z]+[a-zA-Z]*|[a-z]+","ABBBossssAA abc ABC")
['ABBBossssAA', 'abc', 'ABC']
>>> re.findall(r"\b[A-Z]+[a-zA-Z]*|[a-z]+\b","ABBBossssAA abc ABC")
['ABBBossssAA', 'abc', 'ABC']
>>> re.findall(r"\b[A-Z]+[a-z]*|[a-z]+\b","ABBBossssAA abc ABC")
['ABBBossss', 'abc', 'ABC']
>>> import re
>>> re.search(r"\s","ab cd")
<_sre.SRE_Match object; span=(2, 3), match=' '>
>>> re.search(r"\s","ab cd").group()
' '
>>> re.search(r"\s","ab\n   \r \t cd").group()
'\n'
>>> re.search(r"\s+","ab\n   \r \t cd").group()
'\n   \r \t '
>>>
>>>
>>> re.findall(r"\S+","ab cd\t ef\nhi")
['ab', 'cd', 'ef', 'hi']
>>> "".join(re.findall(r"\S+","ab cd\t ef\nhi"))
'abcdefhi'
>>> re.search(r"\w+","aaZAW0123_")
<_sre.SRE_Match object; span=(0, 10), match='aaZAW0123_'>
>>> re.search(r"\w+","aaZAW0123_-")
<_sre.SRE_Match object; span=(0, 10), match='aaZAW0123_'>
>>> re.search(r"\w+","aaZAW0123_-").group()
'aaZAW0123_'
>>> re.search(r"\W+","aaZAW0123_-").group()
'-'
>>> re.search(r"\d?","a7").group()
''
>>> re.search(r"\d?","7a").group()
'7'
>>> re.search(r"\d?","7").group()
'7'
>>> re.search(r"^abc","dddabc").group()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute 'group'
>>>
>>>
>>> re.search(r"^abc","dddabc")
>>> re.search(r"^abc","abcdddabc")
<_sre.SRE_Match object; span=(0, 3), match='abc'>
>>>

$:匹配字符串或一行的结尾,如果是多行则匹配每一行的结尾

>>> re.search(r"^\d+","133abcdddabc")
<_sre.SRE_Match object; span=(0, 3), match='133'>
>>> re.search(r"\d+$","133abcdddabc555")
<_sre.SRE_Match object; span=(12, 15), match='555'>
>>>
>>>> re.search(r"123","123")
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> re.search(r"^123$","123")
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> re.search(r"^123$","123sss")
>>> re.search(r"^123$","ss123")
>>>> re.search(r"\A123\Z","123")
<_sre.SRE_Match object; span=(0, 3), match='123'>

>>> re.search(r"\d(\D+)\d","1abc3")
<_sre.SRE_Match object; span=(0, 5), match='1abc3'>
>>> re.search(r"\d(\D+)\d","1abc3").group()
'1abc3'
>>> re.search(r"\d(\D+)\d","1abc3").group(0)
'1abc3'
>>> re.search(r"\d(\D+)\d","1abc3").group(1)
'abc'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(1)
'1'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(2)
'abc'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(3)
'3'
>>> pattern = re.compile(r"\d+")
>>> pattern.search("abc123")
<_sre.SRE_Match object; span=(3, 6), match='123'>
>>> pattern.search("abc123dddddd")
<_sre.SRE_Match object; span=(3, 6), match='123'>
>>> result = pattern.search("abc123dddddd")
>>> if result:
...     print("匹配到了")
... else:
...     print("没有匹配到!")
...
匹配到了
>>>
>>> s = input("请输入3个数字和3个字母:")
请输入3个数字和3个字母:123abc
>>>
>>> re.match(r"\d{3}[a-zA-Z]{3}",s)
<_sre.SRE_Match object; span=(0, 6), match='123abc'>
>>> re.match(r"\d{3}|[a-zA-Z]{3}\d{3}","abc123")
<_sre.SRE_Match object; span=(0, 6), match='abc123'>
>>>
>>> re.match(r"\d{3}|[a-zA-Z]{3}|[a-zA-Z]{3}\d{3}","abc123")
<_sre.SRE_Match object; span=(0, 3), match='abc'>
>>> re.match(r"\d{3}[a-zA-Z]{3}|[a-zA-Z]{3}\d{3}","abc123")
<_sre.SRE_Match object; span=(0, 6), match='abc123'>
>>> re.match(r"\d{3}[a-zA-Z]{3}$|[a-zA-Z]{3}\d{3}$","abc123")
<_sre.SRE_Match object; span=(0, 6), match='abc123'>
>>> re.match(r".","\ndb",re.DOTALL)
<_sre.SRE_Match object; span=(0, 1), match='\n'>
>>> re.DOTALL
<RegexFlag.DOTALL: 16>
>>> re.match(r".","\ndb",16)
<_sre.SRE_Match object; span=(0, 1), match='\n'>
>>> pattern = re.compile(r"abc")
>>> pattern.match("123abc")
>>> pattern.match("123abc",3)
<_sre.SRE_Match object; span=(3, 6), match='abc'>
>>> pattern.match("123abc",3,6)
<_sre.SRE_Match object; span=(3, 6), match='abc'>
>>> re.findall(r"\d+","1a2b3c")
['1', '2', '3']
>>> re.findall(r"[a-z](\d+)","1a2b3c")
['2', '3']
>>> re.findall(r"([a-z])(\d+)","1a2b3c")
[('a', '2'), ('b', '3')]
>>> re.findall(r"([a-z])(\d+)([a-z])","a1ab2bc3c")
[('a', '1', 'a'), ('b', '2', 'b'), ('c', '3', 'c')]
>>> re.findall(r"(([a-z])(\d+)([a-z]))","a1ab2bc3c")
[('a1a', 'a', '1', 'a'), ('b2b', 'b', '2', 'b'), ('c3c', 'c', '3', 'c')]
>>> re.findall(r"(([a-z])(\d+)([a-z]))","a1ab2bc3")
[('a1a', 'a', '1', 'a'), ('b2b', 'b', '2', 'b')]
>>> re.I
<RegexFlag.IGNORECASE: 2>
>>> re.DOTALL
<RegexFlag.DOTALL: 16>
>>> re.M
<RegexFlag.MULTILINE: 8>
>>> s = "a1a\nb2b\nc3c\n"
>>> re.search(r"[a-z]$",s)
<_sre.SRE_Match object; span=(10, 11), match='c'>
>>> re.search(r"[a-z]$",s,re.M)
<_sre.SRE_Match object; span=(2, 3), match='a'>
>>> re.findall(r"[a-z]$",s)
['c']
>>> re.findall(r"[a-z]$",s,re.M)
['a', 'b', 'c']

小练习:
s="a 2 b 3 c 5 d "把abcd切出来,用正则。

>>> p = re.compile(r"\s+\d\s+")
>>> result = p.split("a  2    b       3     c    5         d ")
>>> print(result)
['a', 'b', 'c', 'd ']
>>> re.sub(r"\d+","**","aa11bb22cc")
'aa**bb**cc'
>>>
>>> re.sub(r"\s+","","aa    11b    b22  \n   \t    cc")
'aa11bb22cc'
>>> re.sub(r"[ \t\r]+","","aa    11b    b22  \n   \t    cc")
'aa11bb22\ncc'
>>> re.subn(r"[ \t\r]+","","aa    11b    b22  \n   \t    cc")
('aa11bb22\ncc', 4)
>>> re.search(r"(?P<num>\d+) (?P=num)","123 123")
<_sre.SRE_Match object; span=(0, 7), match='123 123'>
>>> re.search(r"(?P<num>\d+) (?P=num)","123 123").group()
'123 123'
>>> re.search(r"(?P<num>\d+) (?P=num)","123 123").group(1)
'123'
>>> re.search(r"(?P<num>\d+) (?P=num)","123 456").group(1)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute 'group'
>>> re.search(r"(\d+) \1","123 456").group(1)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute 'group'
>>> re.search(r"(\d+) \1","123 123").group(1)
'123'
>>> re.search(r"((\d+) (\d+))","123 456").group()
'123 456'
>>> re.search(r"((\d+) (\d+))","123 456").group(1)
'123 456'
>>> re.search(r"((\d+) (\d+))","123 456").group(2)
'123'
>>> re.search(r"((\d+) (\d+))","123 456").group(3)
'456'

前向肯定断言的语法:
(?<=pattern)
前向肯定断言表示你希望匹配的字符串前面是pattern匹配的内容时,才匹配。
后向肯定断言的语法:
(?=pattern)
后向肯定断言表示你希望匹配的字符串的后面是pattern匹配的内容时,才匹配。

>>> re.search(r"((?<=abc)\d+)","abc123deb")
<_sre.SRE_Match object; span=(3, 6), match='123'>
>>> re.search(r"((?<=abc)\d+)","abc123deb").group()
'123'
>>> re.search(r"(\d+(?=abc))","xbc123abc").group()
'123'
>>> re.search(r"(?<=xbc)(\d+(?=abc))","xbc123abc").group()
'123'

前向否定断言的语法:
(?<!pattern)
前向否定断言表示你希望匹配的字符串的前面不是pattern匹配的内容时,才匹配。
后向否定断言的语法:
(?!pattern)
后向否定断言表示你希望匹配的字符串后面不是pattern匹配的内容时,才匹配。

>>> re.search(r"(?<!xbc)\d+","abc123abc").group()
'123'
>>> re.search(r"(?<!xbc)\d+","xbc123abc").group()
'23'
>>> re.search(r"(?<!xbc)\d+?","xbc123abc").group()
'2'
>>> re.search(r"\d+(?<!xbc)","123abc").group()
'123'
>>> re.search(r"\d+(?!xbc)","123xbc").group()
'12'
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值