re:正则表达笔记

 

import re


# span:跨度
# pattern:模式

# todo: re.match 
# todo: 返回开头匹配的结果,若开头无匹配项,则返回None
print(re.match('www', 'www.runoob.com'))
# <re.Match object; span=(0, 3), match='www'>
print(re.match('www', 'www.runoob.com').span())
# (0, 3)

print(re.match('com', 'www.runoob.com'))
# None
# print(re.match('com', 'www.runoob.com').span())
# error: No span

# todo: matchObj.group
# todo: 括起来的元素将添加到.group中

# 定义字符串
line = "Cats are smarter than dogs!"
# 定义正则匹配结果
matchObj = re.match(r"(.*) are (.*?) .*", line, re.M|re.I)


print(type(matchObj))
# <class 're.Match'>
print(matchObj)
# <re.Match object; span=(0, 27), match='Cats are smarter than dogs!'>
print(matchObj.span())
# (0, 27)
print(matchObj.start())
# 0
print(matchObj.groupdict())
# {}
print(matchObj.group())
# Cats are smarter than dogs!
print(matchObj.groups())
# ('Cats', 'smarter')
print(matchObj.group(1))
# Cats
print(matchObj.group(2))
# smarter

# todo: re.search
# todo: 扫描整个字符串,并返回第一个成功的匹配

matchObj = re.search("www", "www.baidu.com")
print(matchObj)
# <re.Match object; span=(0, 3), match='www'>
print(matchObj.span())
# (0, 3)
print(matchObj.start())
# 0
print(matchObj.group())
# www
print(matchObj.groups())
# ()

# todo: re.sub
# todo: 替换

line = "Cats are smarter than dogs!"
pattern = r"smarter"
repl = "stupid"
string = re.sub(pattern, repl, line)
print(string)
# Cats are stupid than dogs!

# 或者转化为 删除
pattern = r"#.*$"
repl = ""
phone = "123456789 # 这是一个国外电话号码"

string = re.sub(pattern, repl, phone)
print(string)
# 123456789

# 当repl 为一个函数
pattern = r"(?P<value>\d+)"

def repl_func(matchObj:classmethod):
    # 将匹配的元组字符串转化为int
    value = int(matchObj.group("value"))
    return str(value * 2)

string = "X1S22F345DF2BRT01"
string_ = re.sub(pattern, repl_func, string)
print(string)
print(string_)

# todo: re.compile
# todo: 生成re.Pattern类,具有re基础方法:sub, split, search .match

# 生成匹配方法
matchPattern = re.compile(r"\d+") 
print(type(matchPattern))
# <class 're.Pattern'>

# 调用class pattern方法
result = matchPattern.match("xx123") # 查找头部是否含有数字
print(result)
# None
result = matchPattern.match("xxx123",3,5) # 从第4个字符开始找
print(result)
# <re.Match object; span=(3, 5), match='12'>
print(result.group())
# 12
print(result.span())
# (3, 5)
print(result.start())
# 3

# todo: pattern.findall
# todo: 找到string中所有符合条件的元素:list

pattern = re.compile(r"\d+") # 匹配数字
result_list = pattern.findall("1a2d3c34kjDSF325SAF45902DF")
print(result)
# ['1', '2', '3', '34', '325', '45902']
print(type(result_list))
# <class 'list'>

# todo: pattern.finditer
# todo: 作用同findall,但返回对象为iter:matchObj(不是列表的Iter,是matchObj的iter)
result_iter = pattern.finditer("12dkajsbu432kj23r5")
print(result_iter)
# <callable_iterator object at 0x10bd65a58>
print(type(result_iter))
# <class 'callable_iterator'>
print(next(result_iter))
# <re.Match object; span=(0, 2), match='12'>
print(next(result_iter).span())
# (9, 12)

# todo: pattern.split
# todo: 根据条件分割,返回列表

pattern = re.compile(r"\W+") # 非字符数据
result_list = pattern.split("ad qwd + 123 [sf=34-3&435s.?")
print(result_list)
# ['ad', 'qwd', '123', 'sf', '34', '3', '435s', '']

::

正则表达式中范围只能表示单个字符,不能组合表示

如:[0~9] 不能[10~19] 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值