正则表达式入门


import re;

txt = "this is a good thing, that is a bad thing";
rule1 = ["this", "that", "a"];
p = list( re.compile(p) for p in rule1 );

for i, r in enumerate(p):
    print( i, r );
    for x in r.finditer(txt):
        (i,j) = x.span();
        print(txt[i:j]);


rule2 = ["\\bthis\\b", r"\bthis\b"];
p = list( re.compile(p) for p in rule2 );

for i, r in enumerate(p):
    print( i, r );
    for x in r.finditer(txt):
        (i,j) = x.span();
        print(txt[i:j]);


txt = "ababababab abab "
rule3 = ["(ab)* ","[a-z]","(ab){2}"]

p = list( re.compile(p) for p in rule3 );
for i, r in enumerate(p):
    print( i, r );
    for x in r.finditer(txt):
        (i,j) = x.span();
        print(txt[i:j]);


txt = "<H1>title<//H1>"
rule4 = ["<.*>","<.*?>"];  #sometimes * can be very greedy
# the first one return <H1>title<//H1> the second one return <H1> <//H1>
# 1st : as long as possible. 2nd as many as possible
p = list( re.compile(p) for p in rule4 );

for i, r in enumerate(p):
    print( i, r );
    for x in r.finditer(txt):
        (i,j) = x.span();
        print(txt[i:j]);

import re;
text=  "Goods is a handsome boy, he is cool, good and so on...";
regex = re.compile(r'\w*oo\w*');    # \w : words or numbers
print ( regex.findall(text) );
#find all words including '00'
print ( regex.sub(lambda repl: '[' + repl.group(0) + ']', text) );
# group(0) the first and longest matched group
# [words including oo]
# definition : regex.sub(repl ( short for replace) , string, count=0)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值