Python - regex 模块



PYPI:https://pypi.org/project/regex/

代码:https://bitbucket.org/mrabarnett/mrab-regex/src/hg/


安装:

(base) $ pip install regex

regex支持Python 2.5+和Python 3.1+


Examples

>>> regex.match(r'(?(?=\d)\d+|\w+)', '123abc')
<regex.Match object; span=(0, 3), match='123'>

>>> regex.match(r'(?(?=\d)\d+|\w+)', 'abc123')
<regex.Match object; span=(0, 6), match='abc123'>

>>> print(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc'))
<regex.Match object; span=(0, 6), match='123abc'>

>>> print(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'))
None


Added POSIX matching (leftmost longest)


>>> # Normal matching.
>>> regex.search(r'Mr|Mrs', 'Mrs')
<regex.Match object; span=(0, 2), match='Mr'>

>>> regex.search(r'one(self)?(selfsufficient)?', 'oneselfsufficient')
<regex.Match object; span=(0, 7), match='oneself'>

>>> # POSIX matching.
>>> regex.search(r'(?p)Mr|Mrs', 'Mrs')
<regex.Match object; span=(0, 3), match='Mrs'>

>>> regex.search(r'(?p)one(self)?(selfsufficient)?', 'oneselfsufficient')
<regex.Match object; span=(0, 17), match='oneselfsufficient'>

>>> m = regex.search(r'(\w\w\K\w\w\w)', 'abcdef')
>>> m[0]
'cde'
>>> m[1]
'abcde'
>>>
>>> m = regex.search(r'(?r)(\w\w\K\w\w\w)', 'abcdef')
>>> m[0]
'bc'
>>> m[1]
'bcdef'

>>> m = regex.match(r"(\w)+", "abc")
>>> m.expandf("{1}")
'c'
>>> m.expandf("{1[0]} {1[1]} {1[2]}")
'a b c'
>>> m.expandf("{1[-1]} {1[-2]} {1[-3]}")
'c b a'
>>>
>>> m = regex.match(r"(?P<letter>\w)+", "abc")
>>> m.expandf("{letter}")
'c'
>>> m.expandf("{letter[0]} {letter[1]} {letter[2]}")
'a b c'
>>> m.expandf("{letter[-1]} {letter[-2]} {letter[-3]}")
'c b a'


Added partial matches


>>> pattern = regex.compile(r'\d{4}')

>>> # Initially, nothing has been entered:
>>> print(pattern.fullmatch('', partial=True))
<regex.Match object; span=(0, 0), match='', partial=True>

>>> # An empty string is OK, but it's only a partial match.
>>> # The user enters a letter:
>>> print(pattern.fullmatch('a', partial=True))
None
>>> # It'll never match.


>>> # The user deletes that and enters a digit:
>>> print(pattern.fullmatch('1', partial=True))
<regex.Match object; span=(0, 1), match='1', partial=True>
>>> # It matches this far, but it's only a partial match.


>>> # The user enters 2 more digits:
>>> print(pattern.fullmatch('123', partial=True))
<regex.Match object; span=(0, 3), match='123', partial=True>
>>> # It matches this far, but it's only a partial match.

>>> # The user enters another digit:
>>> print(pattern.fullmatch('1234', partial=True))
<regex.Match object; span=(0, 4), match='1234'>
>>> # It's a complete match.


>>> # If the user enters another digit:
>>> print(pattern.fullmatch('12345', partial=True))
None
>>> # It's no longer a match.


>>> # This is a partial match:
>>> pattern.match('123', partial=True).partial
True


>>> # This is a complete match:
>>> pattern.match('1233', partial=True).partial
False


regex.sub

# Python 3.7 and later
>>> regex.sub('.*', 'x', 'test')
'xx'

>>> regex.sub('.*?', '|', 'test')
'|||||||||'


# Python 3.6 and earlier
>>> regex.sub('(?V0).*', 'x', 'test')
'x'

>>> regex.sub('(?V1).*', 'x', 'test')
'xx'

>>> regex.sub('(?V0).*?', '|', 'test')
'|t|e|s|t|'

>>> regex.sub('(?V1).*?', '|', 'test')
'|||||||||'


match

>>> m = regex.match(r"(?:(?P<word>\w+) (?P<digits>\d+)\n)+", "one 1\ntwo 2\nthree 3\n")
>
>>> m.groupdict()
{'word': 'three', 'digits': '3'}

>>> m.captures("word")
['one', 'two', 'three']

>>> m.captures("digits")
['1', '2', '3']

>>> m.capturesdict()
{'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']}



captures

>>> # With optional groups:
>>>
>>> # Both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or second")

>>> m.group("item")
'second'


>>> m.captures("item")
['first', 'second']


>>> # Only the second group captures.
>>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", " or second")

>>> m.group("item")
'second'


>>> m.captures("item")
['second']


>>> # Only the first group captures.
>>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or ")

>>> m.group("item")
'first'


>>> m.captures("item")
['first']


>>> # With mandatory groups:
>>> # Both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)?", "first or second")


>>> m.group("item")
'second'


>>> m.captures("item")
['first', 'second']


>>> # Again, both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", " or second")

>>> m.group("item")
'second'


>>> m.captures("item")
['', 'second']


>>> # And yet again, both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", "first or ")

>>> m.group("item")
''

>>> m.captures("item")
['first', '']


fullmatch

>>> print(regex.fullmatch(r"abc", "abc").span())
(0, 3)

>>> print(regex.fullmatch(r"abc", "abcx"))
None

>>> print(regex.fullmatch(r"abc", "abcx", endpos=3).span())
(0, 3)

>>> print(regex.fullmatch(r"abc", "xabcy", pos=1, endpos=4).span())
(1, 4)


>>> regex.match(r"a.*?", "abcd").group(0)
'a'


>>> regex.fullmatch(r"a.*?", "abcd").group(0)
'abcd'


subf and subfn

>>> regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", "foo bar")
'foo bar => bar foo'
>>> regex.subf(r"(?P<word1>\w+) (?P<word2>\w+)", "{word2} {word1}", "foo bar")
'bar foo'

Added expandf to match object

>>> m = regex.match(r"(\w+) (\w+)", "foo bar")
>>> m.expandf("{0} => {2} {1}")
'foo bar => bar foo'
>>>
>>> m = regex.match(r"(?P<word1>\w+) (?P<word2>\w+)", "foo bar")
>>> m.expandf("{word2} {word1}")
'bar foo'

 

>>> m = regex.search(r"\w+", "Hello world")
>>> print(m.group())
Hello

>>> print(m.string)
Hello world

>>> m.detach_string()

>>> print(m.group())
Hello

>>> print(m.string)
None

>>> regex.match(r"(Tarzan|Jane) loves (?1)", "Tarzan loves Jane").groups()
('Tarzan',)

>>> regex.match(r"(Tarzan|Jane) loves (?1)", "Jane loves Tarzan").groups()
('Jane',)

>>> m = regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")

>>> m.group(0, 1, 2)
('kayak', 'k', None)


>>> regex.match(r"(?iV1)strasse", "stra\N{LATIN SMALL LETTER SHARP S}e").span()
(0, 6)

>>> regex.match(r"(?iV1)stra\N{LATIN SMALL LETTER SHARP S}e", "STRASSE").span()
(0, 7)



>>> # A 'raw' fuzzy match:
>>> regex.fullmatch(r"(?:cats|cat){e<=1}", "cat").fuzzy_counts
(0, 0, 1)

>>> # 0 substitutions, 0 insertions, 1 deletion.

>>> # A better match might be possible if the ENHANCEMATCH flag used:
>>> regex.fullmatch(r"(?e)(?:cats|cat){e<=1}", "cat").fuzzy_counts
(0, 0, 0)

>>> # 0 substitutions, 0 insertions, 0 deletions.







>>> m = regex.search('(fuu){i<=2,d<=2,e<=5}', 'anaconda foo bar')

>>> m
<regex.Match object; span=(7, 10), match='a f', fuzzy_counts=(0, 2, 2)>

>>> m.fuzzy_changes
([], [7, 8], [10, 11])
 



>>> p = regex.compile(r"first|second|third|fourth|fifth")


>>> option_set = ["first", "second", "third", "fourth", "fifth"]

>>> p = regex.compile(r"\L<options>", options=option_set)

>>> print(p.named_lists)
# Python 3
{'options': frozenset({'fifth', 'first', 'fourth', 'second', 'third'})}

# Python 2
{'options': frozenset(['fifth', 'fourth', 'second', 'third', 'first'])}

>>> option_set = ["first", "second", "third", "fourth", "fifth"]

>>> p = regex.compile(r"\L<options>", options=option_set, other_options=[])
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "C:\Python37\lib\site-packages\regex\regex.py", line 348, in compile
    return _compile(pattern, flags, ignore_unused, kwargs)
  File "C:\Python37\lib\site-packages\regex\regex.py", line 585, in _compile
    raise ValueError('unused keyword argument {!a}'.format(any_one))
ValueError: unused keyword argument 'other_options'

>>> p = regex.compile(r"\L<options>", options=option_set, other_options=[], ignore_unused=True)




>>> m = regex.search(r"(\w{3})+", "123456789")
>>> m.group(1)
'789'
>>> m.captures(1)
['123', '456', '789']
>>> m.start(1)
6
>>> m.starts(1)
[0, 3, 6]
>>> m.end(1)
9
>>> m.ends(1)
[3, 6, 9]
>>> m.span(1)
(6, 9)
>>> m.spans(1)
[(0, 3), (3, 6), (6, 9)]
 



>>> m = regex.search(r"(?P<before>.*?)(?P<num>\d+)(?P<after>.*)", "pqr123stu")

>>> print(m["before"])
pqr

>>> print(len(m))
4

>>> print(m[:])
('pqr123stu', 'pqr', '123', 'stu')




findall


>>> regex.findall(r".", "abc")
['a', 'b', 'c']

>>> regex.findall(r"(?r).", "abc")
['c', 'b', 'a']


>>> regex.findall(r"..", "abcde")
['ab', 'cd']

>>> regex.findall(r"(?r)..", "abcde")
['de', 'bc']



Branch reset



>>> regex.match(r"(?|(first)|(second))", "first").groups()
('first',)

>>> regex.match(r"(?|(first)|(second))", "second").groups()
('second',)




\p{han} 可以匹配汉字, \p{Latin} 可以匹配拉丁字母


参考


  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
### 回答1: 在安装 FreeCAD 时遇到依赖问题,提示缺少 libboost-regex1.74.0-icu6,解决这个问题需要安装所需的依赖项。 libboost-regex1.74.0-icu6 是 Boost 库的一个子模块,用于提供正则表达式的功能。缺少这个库可能是由于系统中缺少对应版本的 Boost 库所导致的。 解决方法如下: 1. 首先,需要确认操作系统中是否已经安装了 Boost 库。可以使用以下命令进行确认: ``` dpkg -l | grep libboost ``` 如果没有安装 Boost 库,请先安装对应版本的 Boost 库。可以使用以下命令进行安装: ``` sudo apt-get install libboost-all-dev ``` 2. 在安装 libboost-regex1.74.0-icu6 之前,需要添加一个 Ubuntu PPA(个人软件包存档)仓库,该仓库提供了最新的 FreeCAD 软件包及其依赖项。可以使用以下命令添加该仓库: ``` sudo add-apt-repository ppa:freecad-maintainers/freecad-stable ``` 3. 添加完仓库后,需要更新系统的软件包信息。可以使用以下命令进行更新: ``` sudo apt-get update ``` 4. 更新完软件包信息后,可以尝试安装 FreeCAD 以及所需的依赖项。可以使用以下命令进行安装: ``` sudo apt-get install freecad ``` 5. 安装完成后,可以使用以下命令启动 FreeCAD: ``` freecad ``` 以上是通过添加 PPA 仓库并安装对应的依赖项来解决 FreeCAD 安装中缺少 libboost-regex1.74.0-icu6 的问题。如果问题仍然存在,建议参考官方文档或社区支持寻求更多帮助。 ### 回答2: 在安装FreeCAD时遇到依赖项错误 "libfreecad-python3-0.19 : 依赖: libboost-regex1.74.0-icu6" 可能意味着你的系统缺少 libboost-regex1.74.0-icu6 这个库文件。 解决这个问题的一种方法是手动安装缺少的依赖项。你可以按照以下步骤进行操作: 1. 打开终端并输入以下命令以更新系统软件包列表: ``` sudo apt update ``` 2. 安装 libboost-regex1.74.0-icu6 依赖项: ``` sudo apt install libboost-regex1.74.0-icu6 ``` 3. 继续安装 FreeCAD: ``` sudo apt install freecad ``` 这样,系统应该能够正确安装 FreeCAD 软件包,并满足所有依赖项。 如果以上步骤仍然无法解决问题,你可以尝试从官方网站下载 FreeCAD 的最新版本,并根据其提供的安装说明进行安装。有时官方网站的版本可能与软件源中的版本不同,可能会解决某些依赖性问题。 总之,安装 FreeCAD 时遇到依赖项错误通常可以通过安装缺少的依赖项来解决。希望以上信息能够对你有所帮助。 ### 回答3: 要解决在安装FreeCAD时出现的libfreecad-python3-0.19报错,依赖于libboost-regex1.74.0-icu6的问题,您可以按照以下步骤解决。 1. 首先,检查您的系统上是否已安装libboost-regex1.74.0-icu6。您可以使用以下命令进行检查: ``` dpkg -l libboost-regex1.74.0-icu6 ``` 2. 如果返回结果显示未安装libboost-regex1.74.0-icu6,您可以尝试使用以下命令安装它: ``` sudo apt-get install libboost-regex1.74.0-icu6 ``` 3. 如果上述命令无法安装libboost-regex1.74.0-icu6,可能是因为您的软件包索引过期。您可以通过运行以下命令更新软件包索引: ``` sudo apt-get update ``` 4. 更新软件包索引后,再次尝试安装libboost-regex1.74.0-icu6: ``` sudo apt-get install libboost-regex1.74.0-icu6 ``` 5. 完成安装后,您可以再次尝试安装FreeCAD,应该不会再出现libfreecad-python3-0.19的依赖报错。 如果在安装过程中遇到其他问题,建议查看相关错误消息,以便进一步确定出错的原因,并尝试在开放源代码社区中寻求帮助或提问。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值