python 正则表达式

内容来自text process in python

 

re_show.py import re
def re_show(pat, s):
print re.compile(pat, re.M).sub("{/g<0>}", s.rstrip()),'/n'

s = '''Mary had a little lamb
And everywhere that Mary
went, the lamb was sure
to go'''

re_show('a', s)
M{a}ry h{a}d {a} little l{a}mb.
And everywhere th{a}t M{a}ry
went, the l{a}mb w{a}s sure
to go.

 

re_new.py
import re
def re_new(pat, rep, s):
    print re.sub(pat, '{'+rep+'}', s)

 

>>> from re_show import re_show
>>> s = '''Special characters must be escaped.*'''
>>> re_show(r'.*', s)

>>> re_show(r'/./*', s)

>>> re_show('', r'Python / escaped / pattern')

>>> re_show(r'//', r'Regex / escaped / pattern')

>>> re_show(r'^Mary', s)

>>> re_show(r'Mary$', s)

>>> re_show(r'$','Mary had a little lamb')

>>> re_show(r'.a', s)

>>> re_show(r'(Mary)( )(had)', s)

>>> re_show(r'/(.*/)', 'spam (and eggs)')

>>> re_show(r'[a-z]a', s)

>>> re_show(r'[^a-z]a', s)

>>> s2 = 'The pet store sold cats, dogs, and birds.'
>>> re_show(r'cat|dog|bird', s2)

>>> s3 = '=first first= # =second second= # =first= # =second='
>>> re_show(r'=first|second=', s3)

>>> re_show(r'(=)(first)|(second)(=)', s3)

>>> re_show(r'=(first|second)=', s3)

>>> s = '''Match with zero in the middle: @@
... Subexpression occurs, but...: @=!=ABC@
... Lots of occurrences: @=!==!==!==!==!=@
... Must repeat entire pattern: @=!==!=!==!=@'''
>>> re_show(r'@(=!=)*@', s)

>>> s = '''AAAD ... ABBBBCD ... BBBCD ... ABCCD ... AAABBBC'''
>>> re_show(r'A+B*C?D', s)

>>> s2 = '''aaaaa bbbbb ccccc ... aaa bbb ccc ... aaaaa bbbbbbbbbbbbbb ccccc'''
>>> re_show(r'a{5} b{,6} c{4,8}', s2)

>>> re_show(r'a+ b{3,} c?', s2)

>>> re_show(r'a{5} b{6,} c{4,8}', s2)

>>> s2 = '''jkl abc xyz
... jkl xyz abc
... jkl abc abc
... jkl xyz xyz
... '''
>>> re_show(r'(abc|xyz) /1', s2)

>>> re_show(r'(abc|xyz) (abc|xyz)', s2)

>>> re_show(r'(?P<let3>abc|xyz) (?P=let3)', s2)

>>> s2 = '''-- I want to match the words that start ... -- with 'th' and end with 's'. ... this ... thus ... thistle ... this line matches too much ... ''' >>> re_show(r'th.*s', s2)

>>> s2 = '''-- I want to match the words that start ... -- with 'th' and end with 's'. ... this ... thus ... thistle ... this line matches too much ... ''' >>> re_show(r'th[^s]*.', s2)

>>> re.search(r'(?Li)cat','The Cat in the Hat').start()
>>> re.search(r'cat','The Cat in the Hat',re.L|re.I).start()

 

s = '''-- I want to match the words that start ... -- with 'th' and end with 's'. ... this line matches just right ... this # thus # thistle'''

 

>>> re_show(r'th.*s',s)

 

>>> re_show(r'th.*?s',s)

 

>>> s = 'A-xyz-37 # B:abcd:142 # C-wxy-66 # D-qrs-93'

>>> re_new(r'([A-Z])(?:-[a-z]{3}-)([0-9]*)', r'/1/2', s)

 

>>> re_new(r'([A-Z])(-[a-z]{3}-)([0-9]*)', r'/1/2', s)

 

>>> s = "A-xyz-37 # B:abcd:142 # C-wxy-66 # D-qrs-93"

>>> re_new(r'(?P<prefix>[A-Z])(-[a-z]{3}-)(?P<id>[0-9]*)',  r'/g<prefix>/g<id>',s)

 

>>> s = 'A-xyz37 # B-ab6142 # C-Wxy66 # D-qrs93'

>>> # Assert that three lowercase letters occur after CAP-DASH

>>> re_new(r'([A-Z]-)(?=[a-z]{3})([/w/d]*)', r'/2/1', s)

 

>>> # Assert three lowercase letts do NOT occur after CAP-DASH ...

>>> re_new(r'([A-Z]-)(?![a-z]{3})([/w/d]*)', r'/2/1', s)

 

>>> re_show('Man', 'Manhandled by The Man')

>>> re_show('(?<!The )Man', 'Manhandled by The Man')

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值