import re;
txt = "this is a good thing, that is a bad thing";
rule1 = ["this", "that", "a"];
p = list( re.compile(p) for p in rule1 );
for i, r in enumerate(p):
print( i, r );
for x in r.finditer(txt):
(i,j) = x.span();
print(txt[i:j]);
rule2 = ["\\bthis\\b", r"\bthis\b"];
p = list( re.compile(p) for p in rule2 );
for i, r in enumerate(p):
print( i, r );
for x in r.finditer(txt):
(i,j) = x.span();
print(txt[i:j]);
txt = "ababababab abab "
rule3 = ["(ab)* ","[a-z]","(ab){2}"]
p = list( re.compile(p) for p in rule3 );
for i, r in enumerate(p):
print( i, r );
for x in r.finditer(txt):
(i,j) = x.span();
print(txt[i:j]);
txt = "<H1>title<//H1>"
rule4 = ["<.*>","<.*?>"]; #sometimes * can be very greedy
# the first one return <H1>title<//H1> the second one return <H1> <//H1>
# 1st : as long as possible. 2nd as many as possible
p = list( re.compile(p) for p in rule4 );
for i, r in enumerate(p):
print( i, r );
for x in r.finditer(txt):
(i,j) = x.span();
print(txt[i:j]);
import re;
text= "Goods is a handsome boy, he is cool, good and so on...";
regex = re.compile(r'\w*oo\w*'); # \w : words or numbers
print ( regex.findall(text) );
#find all words including '00'
print ( regex.sub(lambda repl: '[' + repl.group(0) + ']', text) );
# group(0) the first and longest matched group
# [words including oo]
# definition : regex.sub(repl ( short for replace) , string, count=0)
正则表达式入门
最新推荐文章于 2022-05-11 15:42:53 发布