一些regular的tips:
1 非贪婪flag
>>>
re
.
findall
(
r
"
a(
\d
+?)
"
,
"
a23b
"
)
# 非贪婪模式
[
'
2
'
]
>>>
re
.
findall
(
r
"
a(
\d
+)
"
,
"
a23b
"
)
[
'
23
'
]
注意比较这种情况:
>>>
re
.
findall
(
r
"
a(
\d
+)b
"
,
"
a23b
"
)
[
'
23
'
]
>>>
re
.
findall
(
r
"
a(
\d
+?)b
"
,
"
a23b
"
)
#如果前后均有限定条件,则非匹配模式失效
[
'
23
'
]
2 如果你要多行匹配,那么加上re.S和re.M标志
re.S:.将会匹配换行符,默认.逗号不会匹配换行符
>>>
re
.
findall
(
r
"
a(
\d
+)b.+a(
\d
+)b
"
,
"
a23b
\n
a34b
"
)
[]
>>>
re
.
findall
(
r
"
a(
\d
+)b.+a(
\d
+)b
"
,
"
a23b
\n
a34b
"
,
re
.
S
)
[(
'
23
'
,
'
34
'
)]
>>>
re.M:^$标志将会匹配每一行,默认^只会匹配符合正则的第一行;默认$只会匹配符合正则的末行
>>>
re
.
findall
(
r
"
^a(
\d
+)b
"
,
"
a23b
\n
a34b
"
)
[
'
23
'
]
>>>
re
.
findall
(
r
"
^a(
\d
+)b
"
,
"
a23b
\n
a34b
"
,
re
.
M
)
[
'
23
'
,
'
34
'
]
但是,如果没有^标志,
>>>
re
.
findall
(
r
"
a(
\d
+)b
"
,
"
a23b
\n
a34b
"
)
[
'
23
'
,
'
43
'
]
可见,是无需re.M
import re
n='''12 drummers drumming,
11 pipers piping, 10 lords a-leaping'''
p=re.compile('^\d+')
p_multi=re.compile('^\d+',re.MULTILINE) #设置 MULTILINE 标志
print re.findall(p,n) #['12']
print re.findall(p_multi,n) # ['12', '11']
============================
import re
a = 'a23b'
print re.findall('a(\d+?)',a) #['2']
print re.findall('a(\d+)',a) #['23']
print re.findall(r'a(\d+)b',a) #['23']
print re.findall(r'a(\d+?)b',a) # ['23']
============================
b='a23b\na34b'
''' . 匹配非换行符的任意一个字符'''
print re.findall(r'a(\d+)b.+a(\d+)b',b) #[]
print re.findall(r'a(\d+)b',b,re.M) # ['23', '34']
print re.findall(r'^a(\d+)b',b,re.M) # ['23', '34']
print re.findall(r'a(\d+)b',b) #['23','34'] 可以匹配多行
print re.findall(r'^a(\d+)b',b) # ['23'] 默认^只会匹配符合正则的第一行
print re.findall(r'a(\d+)b$',b) # ['34'] 默认$只会匹配符合正则的末行
print re.findall(r'a(\d+)b',b,re.M) #['23', '34']
print re.findall(r'a(\d+)b.?',b,re.M) # ['23', '34'] 表达式中的'.'匹配除换行符以外的字符,'?'匹配前一个字符0次或1次
print re.findall(r"a(\d+)b", "a23b\na34b") # ['23', '34']
============================
注:Python3.4中,print为函数,需要加括号