In [2]:import re
In [3]: re.findall(".","\n")
Out[3]:[]
In [4]: re.findall(".","\n",re.DOTALL)
Out[4]:['\n']
In [5]: re.findall(".","\n",re.S)
Out[5]:['\n']
+表示一次或多次
In [14]: re.findall("a[bcd]e","abce")
Out[14]:[]
In [15]: re.findall("a[bcd]+e","abce")
Out[15]:['abce']
In [16]:
Do you really want to exit ([y]/n)? n
In [16]: re.findall("abce|aede|afce","abce")
Out[16]:['abce']
+和*的区别
In [19]: re.findall("abc*","ab")
Out[19]:['ab']
In [20]: re.findall("abc+","ab")
Out[20]:[]
贪婪
尽可能多的往后匹配
(.*)
非贪婪
第一次遇到匹配成功的就结束,接着往后匹配
(.*?)
# 这个是非贪婪的
In [21]: re.findall("m(.*)m","jkmdfkmjkjkmjkksm")
Out[21]:['dfkmjkjkmjkks']# 这个是贪婪的
In [22]: re.findall("m(.*?)m","jkmdfkmjkjkmjkksm")
Out[22]:['dfk','jkks']
re.sub(“要替换的字符串”,“替换成的字符串”,“待替换的字符串”) 的使用
In [23]: a ="WE6LOVE23CHINA"
In [24]: re.sub("\d","_",a)
Out[24]:'WE_LOVE__CHINA'
re.compile()的使用
使用compile方法预先把能够匹配到的结果放入内存中
提高正则表达式匹配的效率
In [25]: re.compile("\d")
Out[25]: re.compile(r'\d', re.UNICODE)
In [26]: p = re.compile("\d")
In [27]: p.findall(a)
Out[27]:['6','2','3']
In [28]: p.sub("_",a)
Out[28]:'WE_LOVE__CHINA'
python中原始字符串r的用法
show me the codes
In [37]: a ="a\nb"
In [38]:print(a)
a
b
In [39]:len(a)
Out[39]:3
In [40]: b = r"a\nb"
In [41]:print(b)
a\nb
In [42]:len(b)
Out[42]:4
In [43]: b[1]
Out[43]:'\\'
注意:r直接忽略转义字符带来的影响
In [44]: r"a\nb"=="a\nb"
Out[44]:False
In [46]: re.findall("a\nb","a\nb")
Out[46]:['a\nb']
In [47]: re.findall(r"a\nb","a\nb")
Out[47]:['a\nb']
In [48]: r"a\nb"=="a\\nb"
Out[48]:True
In [49]: re.findall(r"a\nb","a\\nb")
Out[49]:[]