re:正则表达笔记

最新推荐文章于 2023-02-12 12:56:29 发布

一个做图像的人

最新推荐文章于 2023-02-12 12:56:29 发布

阅读量191

点赞数

本文链接：https://blog.csdn.net/Sau_Hit/article/details/109124228

版权

import re


# span:跨度
# pattern:模式

# todo: re.match 
# todo: 返回开头匹配的结果，若开头无匹配项，则返回None
print(re.match('www', 'www.runoob.com'))
# <re.Match object; span=(0, 3), match='www'>
print(re.match('www', 'www.runoob.com').span())
# (0, 3)

print(re.match('com', 'www.runoob.com'))
# None
# print(re.match('com', 'www.runoob.com').span())
# error: No span

# todo: matchObj.group
# todo: 括起来的元素将添加到.group中

# 定义字符串
line = "Cats are smarter than dogs!"
# 定义正则匹配结果
matchObj = re.match(r"(.*) are (.*?) .*", line, re.M|re.I)


print(type(matchObj))
# <class 're.Match'>
print(matchObj)
# <re.Match object; span=(0, 27), match='Cats are smarter than dogs!'>
print(matchObj.span())
# (0, 27)
print(matchObj.start())
# 0
print(matchObj.groupdict())
# {}
print(matchObj.group())
# Cats are smarter than dogs!
print(matchObj.groups())
# ('Cats', 'smarter')
print(matchObj.group(1))
# Cats
print(matchObj.group(2))
# smarter

# todo: re.search
# todo: 扫描整个字符串，并返回第一个成功的匹配

matchObj = re.search("www", "www.baidu.com")
print(matchObj)
# <re.Match object; span=(0, 3), match='www'>
print(matchObj.span())
# (0, 3)
print(matchObj.start())
# 0
print(matchObj.group())
# www
print(matchObj.groups())
# ()

# todo: re.sub
# todo: 替换

line = "Cats are smarter than dogs!"
pattern = r"smarter"
repl = "stupid"
string = re.sub(pattern, repl, line)
print(string)
# Cats are stupid than dogs!

# 或者转化为 删除
pattern = r"#.*$"
repl = ""
phone = "123456789 # 这是一个国外电话号码"

string = re.sub(pattern, repl, phone)
print(string)
# 123456789

# 当repl 为一个函数
pattern = r"(?P<value>\d+)"

def repl_func(matchObj:classmethod):
    # 将匹配的元组字符串转化为int
    value = int(matchObj.group("value"))
    return str(value * 2)

string = "X1S22F345DF2BRT01"
string_ = re.sub(pattern, repl_func, string)
print(string)
print(string_)

# todo: re.compile
# todo: 生成re.Pattern类,具有re基础方法：sub, split, search .match

# 生成匹配方法
matchPattern = re.compile(r"\d+") 
print(type(matchPattern))
# <class 're.Pattern'>

# 调用class pattern方法
result = matchPattern.match("xx123") # 查找头部是否含有数字
print(result)
# None
result = matchPattern.match("xxx123",3,5) # 从第4个字符开始找
print(result)
# <re.Match object; span=(3, 5), match='12'>
print(result.group())
# 12
print(result.span())
# (3, 5)
print(result.start())
# 3

# todo: pattern.findall
# todo: 找到string中所有符合条件的元素:list

pattern = re.compile(r"\d+") # 匹配数字
result_list = pattern.findall("1a2d3c34kjDSF325SAF45902DF")
print(result)
# ['1', '2', '3', '34', '325', '45902']
print(type(result_list))
# <class 'list'>

# todo: pattern.finditer
# todo: 作用同findall,但返回对象为iter:matchObj(不是列表的Iter,是matchObj的iter)
result_iter = pattern.finditer("12dkajsbu432kj23r5")
print(result_iter)
# <callable_iterator object at 0x10bd65a58>
print(type(result_iter))
# <class 'callable_iterator'>
print(next(result_iter))
# <re.Match object; span=(0, 2), match='12'>
print(next(result_iter).span())
# (9, 12)

# todo: pattern.split
# todo: 根据条件分割,返回列表

pattern = re.compile(r"\W+") # 非字符数据
result_list = pattern.split("ad qwd + 123 [sf=34-3&435s.?")
print(result_list)
# ['ad', 'qwd', '123', 'sf', '34', '3', '435s', '']

：：

正则表达式中范围只能表示单个字符，不能组合表示

如：[0~9] 不能[10~19]

一个做图像的人

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
re:正则表达笔记

import re# span:跨度# pattern:模式# todo: re.match # todo: 返回开头匹配的结果，若开头无匹配项，则返回Noneprint(re.match('www', 'www.runoob.com'))# <re.Match object; span=(0, 3), match='www'>print(re.match('www', 'www.runoob.com').span())# (0, 3)print(re.ma...
复制链接

扫一扫