Python 正则表达式

1、常见正则表达式符号和特殊字符

这里写图片描述
这里写图片描述

2、常见正则表达式属性

这里写图片描述
这里写图片描述

3、re模块函数的使用

match() 方法匹配字符串

# 需要导入 re 模块
>>> import re
>>> m = re.match('fo', 'foo')
>>> m
<_sre.SRE_Match object; span=(0, 2), match='fo'>
>>> m.group()
'fo'
# 如果匹配失败, 将会抛出 AttributeError 异常
# 可以用 if m is not None: m.group() 先判断是否为 None
>>> m = re.match('fo', 'ofo ofo ')
>>> m
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#8>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'
# 模式匹配只能从字符串的起始部分开始, 即使字符串更长,匹配也能成功
>>> m = re.match('fo', 'fo 0000, lll')
>>> m.group()
'fo'

search() 方法匹配字符串

# search 函数会搜索在字符串中第一次出现的位置,并且从左向右搜索
>>> m = re.search('fo', 'we foo iiie foo')
>>> m
<_sre.SRE_Match object; span=(3, 5), match='fo'>

匹配多个字符

>>> bt = 'bat|bct|bbt'
>>> s = re.search(bt, 'foo bat bct')
>>> s.group()
'bat'

[]字符集

>>> m = re.match('[cr][23][dp][o2]', 'c3po')
>>> m.group()
'c3po'

用groups() 子组获得一个包含所有匹配子组的元组

>>> m = re.match('\w\w\w-\d\d\d', 'abc-123')
>>> m.group()
'abc-123'
>>> m.group(1)
Traceback (most recent call last):
  File "<pyshell#49>", line 1, in <module>
    m.group(1)
IndexError: no such group
>>> m.group(0)
'abc-123'
>>> m = re.match('(\w\w\w)-(\d\d\d)', 'abc-123')
# m.group() 和 m.group(0) 的效果是一样的 
>>> m.group()
'abc-123'
>>> m.group(1)
'abc'
>>> m.group(0)
'abc-123'
>>> m.group(2)
'123'
>>> m = re.match('ab', 'ab')
>>> m.group()
'ab'
>>> m.groups()
()
>>> 
>>> m = re.match('(ab)', 'ab')
>>> m.group()
'ab'
>>> m.group(1)
'ab'
>>> m.group(0)
'ab'
>>> m.group(2)
Traceback (most recent call last):
  File "<pyshell#64>", line 1, in <module>
    m.group(2)
IndexError: no such group
>>> m.groups()
('ab',)
>>> m = re.match('(a)(b)', 'ab')
>>> m.group()
'ab'
>>> m.group(1)
'a'
>>> m.group(0)
'ab'
>>> m.group(2)
'b'
>>> m.groups()
('a', 'b')

# groups() 是根据 pattern 由外到内分组的 
>>> m = re.match('(a(b))', 'ab')
>>> m.group()
'ab'
>>> m.group(0)
'ab'
>>> m.group(1)
'ab'
>>> m.group(2)
'b'
>>> m.groups()
('ab', 'b')
>>> m = re.match('(a(b(c)))', 'abc')
>>> m.group()
'abc'
>>> m.group(0)
'abc'
>>> m.group(1)
'abc'
>>> m.group(2)
'bc'
>>> m.group(3)
'c'
>>> m.groups()
('abc', 'bc', 'c')
>>> m = re.match('a(b(c(d)))', 'abcd')
>>> m.group()
'abcd'
>>> m.group(0)
'abcd'
>>> m.group(1)
'bcd'
>>> m.group(2)
'cd'
>>> m.groups()
('bcd', 'cd', 'd')

匹配字符串的起始和结尾以及单词边界

这里写图片描述

这里写图片描述

# ^ 用来匹配以 pattern 作为起始的字符串
>>> m = re.match('^from', 'from')
>>> m.group()
'from'
>>> m = re.match('^from','fromasdads')
>>> m.group()
'from'
>>> m = re.match('^from', 'd from')
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#129>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'

# $ 用来匹配以 pattern 作为结尾的字符串, 测试了很多都是报错, 不知
# 道什么鬼
>>> m = re.match('/bin/tcsh$', '/bin/tcsh')
>>> m.group()
'/bin/tcsh'
>>> m = re.match('ad$', 'ad adadad')
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#122>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'
>>> m = re.group('ad$', 'ad ad ad')
Traceback (most recent call last):
  File "<pyshell#123>", line 1, in <module>
    m = re.group('ad$', 'ad ad ad')
AttributeError: module 're' has no attribute 'group'
>>> m = re.match('ad$', 'ad ad ad')
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#125>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'
>>> m = re.match('bin$', 'aa bin')
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#131>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'
# 后来才发现要用search() 函数
>>> s = re.search('/bin/bac$', 'dada/bin/bac')
>>> s.group()
'/bin/bac'
# ^ 和 $ 组合,pattern 匹配单独构成的字符串
>>> m = re.match('^bin$', 'bin')
>>> m.group()
'bin'
>>> m = re.match('^bin$', 'bin bin')
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#138>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'
>>> m.group()
'the'

# \b 匹配边界   \B 匹配不以 pattern 为边界的字符串
>>> m = re.match(r'\bhte', 'thr the thr')
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#151>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'
>>> m = re.match(r'\Bthe', 'thr the thr')
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#153>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'
>>> m = re.match(r'\Bthe', 'thenthethe')
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#155>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'
>>> m = re.group(r'\Bthe', 'thithe')
Traceback (most recent call last):
  File "<pyshell#156>", line 1, in <module>
    m = re.group(r'\Bthe', 'thithe')
AttributeError: module 're' has no attribute 'group'
>>> m = re.match('r\Bthe', 'thithe')
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#158>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'
>>> m = re.match(r'\Bthe', 'banbinthe')
>>> m.group()
Traceback (most recent call last):
  File "<pyshell#161>", line 1, in <module>
    m.group()
AttributeError: 'NoneType' object has no attribute 'group'
>>> m = re.match(r'\Bthe', 'the theathe')
>>> s = re.search(r'\Bthe', 'the thathe')
>>> s.group()
'the'
>>> s = re.search('/bin/bac$', 'dada/bin/bac')
>>> s.group()
'/bin/bac'
>>> m = re.match(r'\Bthe', 'the theathe')
>>> s = re.search(r'\Bthe', 'the thathe')
>>> s.group()
'the'
>>> s = re.search(r'\bthe', 'thr the tha')
>>> s.group()
'the'

findall() 与 finditer()

# findall()  查询字符串中符合 pattern 的全部非重复出现情况,返回列表
>>> re.findall('car', 'carry the barcardi to the car')
['car', 'car', 'car']

# finditer()  比 findall() 节省内存并在匹配对象中迭代
>>> for i in re.finditer(r'\d+', '2 2663 887 322'):
    print(i.group())    
2
2663
887
322

sub() 和 subn()

# subn() 和 sub() 几乎一样, subn() 还可以返回一个表示替换的总数
>>> re.sub('[abc]', '789', 'aadhkahdkankdkdnlanl')
'789789dhk789hdk789nkdkdnl789nl'
>>> re.subn('[abc]', '789', 'adadadaddq3 euduashbfiau')
('789d789d789d789ddq3 eudu789sh789fi789u', 7)

split() 分隔字符串

>>> re.split(':', 'str1:str2:str3')
['str1', 'str2', 'str3']
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值