文章目录
PYPI:https://pypi.org/project/regex/
代码:https://bitbucket.org/mrabarnett/mrab-regex/src/hg/
安装:
(base) $ pip install regex
regex支持Python 2.5+和Python 3.1+
Examples
>>> regex.match(r'(?(?=\d)\d+|\w+)', '123abc')
<regex.Match object; span=(0, 3), match='123'>
>>> regex.match(r'(?(?=\d)\d+|\w+)', 'abc123')
<regex.Match object; span=(0, 6), match='abc123'>
>>> print(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc'))
<regex.Match object; span=(0, 6), match='123abc'>
>>> print(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'))
None
Added POSIX matching (leftmost longest)
>>> # Normal matching.
>>> regex.search(r'Mr|Mrs', 'Mrs')
<regex.Match object; span=(0, 2), match='Mr'>
>>> regex.search(r'one(self)?(selfsufficient)?', 'oneselfsufficient')
<regex.Match object; span=(0, 7), match='oneself'>
>>> # POSIX matching.
>>> regex.search(r'(?p)Mr|Mrs', 'Mrs')
<regex.Match object; span=(0, 3), match='Mrs'>
>>> regex.search(r'(?p)one(self)?(selfsufficient)?', 'oneselfsufficient')
<regex.Match object; span=(0, 17), match='oneselfsufficient'>
>>> m = regex.search(r'(\w\w\K\w\w\w)', 'abcdef')
>>> m[0]
'cde'
>>> m[1]
'abcde'
>>>
>>> m = regex.search(r'(?r)(\w\w\K\w\w\w)', 'abcdef')
>>> m[0]
'bc'
>>> m[1]
'bcdef'
>>> m = regex.match(r"(\w)+", "abc")
>>> m.expandf("{1}")
'c'
>>> m.expandf("{1[0]} {1[1]} {1[2]}")
'a b c'
>>> m.expandf("{1[-1]} {1[-2]} {1[-3]}")
'c b a'
>>>
>>> m = regex.match(r"(?P<letter>\w)+", "abc")
>>> m.expandf("{letter}")
'c'
>>> m.expandf("{letter[0]} {letter[1]} {letter[2]}")
'a b c'
>>> m.expandf("{letter[-1]} {letter[-2]} {letter[-3]}")
'c b a'
Added partial matches
>>> pattern = regex.compile(r'\d{4}')
>>> # Initially, nothing has been entered:
>>> print(pattern.fullmatch('', partial=True))
<regex.Match object; span=(0, 0), match='', partial=True>
>>> # An empty string is OK, but it's only a partial match.
>>> # The user enters a letter:
>>> print(pattern.fullmatch('a', partial=True))
None
>>> # It'll never match.
>>> # The user deletes that and enters a digit:
>>> print(pattern.fullmatch('1', partial=True))
<regex.Match object; span=(0, 1), match='1', partial=True>
>>> # It matches this far, but it's only a partial match.
>>> # The user enters 2 more digits:
>>> print(pattern.fullmatch('123', partial=True))
<regex.Match object; span=(0, 3), match='123', partial=True>
>>> # It matches this far, but it's only a partial match.
>>> # The user enters another digit:
>>> print(pattern.fullmatch('1234', partial=True))
<regex.Match object; span=(0, 4), match='1234'>
>>> # It's a complete match.
>>> # If the user enters another digit:
>>> print(pattern.fullmatch('12345', partial=True))
None
>>> # It's no longer a match.
>>> # This is a partial match:
>>> pattern.match('123', partial=True).partial
True
>>> # This is a complete match:
>>> pattern.match('1233', partial=True).partial
False
regex.sub
# Python 3.7 and later
>>> regex.sub('.*', 'x', 'test')
'xx'
>>> regex.sub('.*?', '|', 'test')
'|||||||||'
# Python 3.6 and earlier
>>> regex.sub('(?V0).*', 'x', 'test')
'x'
>>> regex.sub('(?V1).*', 'x', 'test')
'xx'
>>> regex.sub('(?V0).*?', '|', 'test')
'|t|e|s|t|'
>>> regex.sub('(?V1).*?', '|', 'test')
'|||||||||'
match
>>> m = regex.match(r"(?:(?P<word>\w+) (?P<digits>\d+)\n)+", "one 1\ntwo 2\nthree 3\n")
>
>>> m.groupdict()
{'word': 'three', 'digits': '3'}
>>> m.captures("word")
['one', 'two', 'three']
>>> m.captures("digits")
['1', '2', '3']
>>> m.capturesdict()
{'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']}
captures
>>> # With optional groups:
>>>
>>> # Both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or second")
>>> m.group("item")
'second'
>>> m.captures("item")
['first', 'second']
>>> # Only the second group captures.
>>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", " or second")
>>> m.group("item")
'second'
>>> m.captures("item")
['second']
>>> # Only the first group captures.
>>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or ")
>>> m.group("item")
'first'
>>> m.captures("item")
['first']
>>> # With mandatory groups:
>>> # Both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)?", "first or second")
>>> m.group("item")
'second'
>>> m.captures("item")
['first', 'second']
>>> # Again, both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", " or second")
>>> m.group("item")
'second'
>>> m.captures("item")
['', 'second']
>>> # And yet again, both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", "first or ")
>>> m.group("item")
''
>>> m.captures("item")
['first', '']
fullmatch
>>> print(regex.fullmatch(r"abc", "abc").span())
(0, 3)
>>> print(regex.fullmatch(r"abc", "abcx"))
None
>>> print(regex.fullmatch(r"abc", "abcx", endpos=3).span())
(0, 3)
>>> print(regex.fullmatch(r"abc", "xabcy", pos=1, endpos=4).span())
(1, 4)
>>> regex.match(r"a.*?", "abcd").group(0)
'a'
>>> regex.fullmatch(r"a.*?", "abcd").group(0)
'abcd'
subf and subfn
>>> regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", "foo bar")
'foo bar => bar foo'
>>> regex.subf(r"(?P<word1>\w+) (?P<word2>\w+)", "{word2} {word1}", "foo bar")
'bar foo'
Added expandf to match object
>>> m = regex.match(r"(\w+) (\w+)", "foo bar")
>>> m.expandf("{0} => {2} {1}")
'foo bar => bar foo'
>>>
>>> m = regex.match(r"(?P<word1>\w+) (?P<word2>\w+)", "foo bar")
>>> m.expandf("{word2} {word1}")
'bar foo'
>>> m = regex.search(r"\w+", "Hello world")
>>> print(m.group())
Hello
>>> print(m.string)
Hello world
>>> m.detach_string()
>>> print(m.group())
Hello
>>> print(m.string)
None
>>> regex.match(r"(Tarzan|Jane) loves (?1)", "Tarzan loves Jane").groups()
('Tarzan',)
>>> regex.match(r"(Tarzan|Jane) loves (?1)", "Jane loves Tarzan").groups()
('Jane',)
>>> m = regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")
>>> m.group(0, 1, 2)
('kayak', 'k', None)
>>> regex.match(r"(?iV1)strasse", "stra\N{LATIN SMALL LETTER SHARP S}e").span()
(0, 6)
>>> regex.match(r"(?iV1)stra\N{LATIN SMALL LETTER SHARP S}e", "STRASSE").span()
(0, 7)
>>> # A 'raw' fuzzy match:
>>> regex.fullmatch(r"(?:cats|cat){e<=1}", "cat").fuzzy_counts
(0, 0, 1)
>>> # 0 substitutions, 0 insertions, 1 deletion.
>>> # A better match might be possible if the ENHANCEMATCH flag used:
>>> regex.fullmatch(r"(?e)(?:cats|cat){e<=1}", "cat").fuzzy_counts
(0, 0, 0)
>>> # 0 substitutions, 0 insertions, 0 deletions.
>>> m = regex.search('(fuu){i<=2,d<=2,e<=5}', 'anaconda foo bar')
>>> m
<regex.Match object; span=(7, 10), match='a f', fuzzy_counts=(0, 2, 2)>
>>> m.fuzzy_changes
([], [7, 8], [10, 11])
>>> p = regex.compile(r"first|second|third|fourth|fifth")
>>> option_set = ["first", "second", "third", "fourth", "fifth"]
>>> p = regex.compile(r"\L<options>", options=option_set)
>>> print(p.named_lists)
# Python 3
{'options': frozenset({'fifth', 'first', 'fourth', 'second', 'third'})}
# Python 2
{'options': frozenset(['fifth', 'fourth', 'second', 'third', 'first'])}
>>> option_set = ["first", "second", "third", "fourth", "fifth"]
>>> p = regex.compile(r"\L<options>", options=option_set, other_options=[])
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Python37\lib\site-packages\regex\regex.py", line 348, in compile
return _compile(pattern, flags, ignore_unused, kwargs)
File "C:\Python37\lib\site-packages\regex\regex.py", line 585, in _compile
raise ValueError('unused keyword argument {!a}'.format(any_one))
ValueError: unused keyword argument 'other_options'
>>> p = regex.compile(r"\L<options>", options=option_set, other_options=[], ignore_unused=True)
>>> m = regex.search(r"(\w{3})+", "123456789")
>>> m.group(1)
'789'
>>> m.captures(1)
['123', '456', '789']
>>> m.start(1)
6
>>> m.starts(1)
[0, 3, 6]
>>> m.end(1)
9
>>> m.ends(1)
[3, 6, 9]
>>> m.span(1)
(6, 9)
>>> m.spans(1)
[(0, 3), (3, 6), (6, 9)]
>>> m = regex.search(r"(?P<before>.*?)(?P<num>\d+)(?P<after>.*)", "pqr123stu")
>>> print(m["before"])
pqr
>>> print(len(m))
4
>>> print(m[:])
('pqr123stu', 'pqr', '123', 'stu')
findall
>>> regex.findall(r".", "abc")
['a', 'b', 'c']
>>> regex.findall(r"(?r).", "abc")
['c', 'b', 'a']
>>> regex.findall(r"..", "abcde")
['ab', 'cd']
>>> regex.findall(r"(?r)..", "abcde")
['de', 'bc']
Branch reset
>>> regex.match(r"(?|(first)|(second))", "first").groups()
('first',)
>>> regex.match(r"(?|(first)|(second))", "second").groups()
('second',)
\p{han} 可以匹配汉字, \p{Latin} 可以匹配拉丁字母
参考
- regex
https://www.cnblogs.com/animalize/p/4949219.html - re 模块
https://docs.python.org/3/library/re.html - 熊清亮:Python 正则表达式引擎 Regex vs RE
https://seealso.cn/post/python-regex-vs-re-regex-engine/