(?...)
This is an extension notation (a '?' following a'(' is not meaningful otherwise). The first character after the'?' determines what the meaning and further syntax of the construct is. Extensions usually do not create a new group;(?P<name>...) is the only exception to this rule. Following are the currently supported extensions.
(?:...)
import re
def groups(m):
if m is not None:
print("m.group() == %s"%m.group()),
else:
print("m.group() == None."),
print(" ,m.groups() == %s"%str(m.groups()))
#(?...)
#(?iLmsux)
#(?:...)
m = re.match("(?:[abcd])(color)","acolor")
groups(m)
>>>
m.group() == acolor ,m.groups() == ('color',)
Similar to regular parentheses, but the substring matched by the group is accessible via the symbolic group name name. Group names must be valid Python identifiers, and each group name must be defined only once within a regular expression. A symbolic group is also a numbered group, just as if the group were not named.
#(?P<name>...)
m = re.match(r"(?P<id>\d+)\.\d+","324.322")
groups(m)
>>>
m.group() == 324.322 ,m.groups() == ('324',)
m = re.match(r"(?P<id>\d+)\.(?P=id)","324.324")
groups(m)
m = re.match(r"(?P<id>\d+)\.\1","324.324")
groups(m)
>>>
m.group() == 324.324 ,m.groups() == ('324',)
m.group() == 324.324 ,m.groups() == ('324',)
(?#...)
A comment; the contents of the parentheses are simply ignored.
#(?#...)
m = re.match(r"(?#I am invisible)\d+\.\d+","324.324")
groups(m)
>>>
m.group() == 324.324 ,m.groups() == ('324',)
(?=...)
#(?=...)
m = re.match(r"\d+\.(?=999)","324.999")
groups(m)
>>>
m.group() == 324. ,m.groups() == ()
#(?!...)
m = re.match(r"\d+\.(?!999)","324.324")
groups(m)
>>>
m.group() == 324. ,m.groups() == ()
Matches if the current position in the string is preceded by a match for... that ends at the current position. This is called apositive lookbehind assertion.(?<=abc)def will find a match in bcdef, since the lookbehind will back up 3 characters and check if the contained pattern matches. The contained pattern must only match strings of some fixed length, meaning that abc or a|b are allowed, but a* and {3,4} are not. Note that patterns which start with positive lookbehind assertions will not match at the beginning of the string being searched; you will most likely want to use thesearch() function rather than thematch() function:
#(?<=...)
m = re.search(r"(?<=324)\.\d+","324.324")
groups(m)
m = re.search(r"(?<=324|234)\.\d+","234.324")
groups(m)
>>>
#(?<!...)
m = re.search(r"(?<!324)\.\d+","32.324")
groups(m)
>>>
m.group() == .324 ,m.groups() == ()
(?(id/name)yes-pattern|no-pattern)
Will try to match with yes-pattern if the group with given id or name exists, and withno-pattern if it doesn’t. no-pattern is optional and can be omitted. For example, (<)?(\w+@\w+(?:\.\w+)+)(?(1)>) is a poor email matching pattern, which will match with'<user@host.com>' as well as 'user@host.com', but not with '<user@host.com'.
#(?(id/name)yes-pattern|no-pattern)
m = re.search(r"(?P<id>\d)\w+(?(id)\d)","1abc1")
groups(m)
m = re.search(r"(?P<id>\d)\w+?(?(id)(\d+))","4abc632")
groups(m)
>>>
m.group() == 1abc1 ,m.groups() == ('1',)m.group() == 4abc632 ,m.groups() == ('4', '632')
REF:Python 2.7.8 documentation