Python学习心得-正则表达式

最新推荐文章于 2023-06-03 00:07:24 发布

NKUer_there

最新推荐文章于 2023-06-03 00:07:24 发布

阅读量180

点赞数

分类专栏： python 文章标签： python

本文链接：https://blog.csdn.net/weixin_45955424/article/details/119190144

版权

python 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

# 这是一个示例 Python 脚本。

# 按 ⌃R 执行或将其替换为您的代码。
# 按 双击 ⇧ 在所有地方搜索类、文件、工具窗口、操作和设置。
#正则表达式学习心得
import re
#import re是必要的操作
#正则表达式用以匹配文本
regex1=r'\d\d\d-\d\d\d-\d\d\d\d'
#或者等效为
regex2=r'\d{3}-\d{3}-\d{4}'
#利用re模块compile创建regex对象，即所谓的正则表达式

phoneNumberRegex=re.compile(regex1)
phoneNumberRegex_new=re.compile(regex2)

mo=phoneNumberRegex_new.search('my number is 444-444-4444 and 333-3333-3333')
print(f'phone number is: {mo.group()}')#search方法返回一个match对象mo
#match对象的group方法则可以返回匹配的文本
#re.compile('***')->regex.search('***')->match.group()就得到了我们需要的字符串
#只匹配到了一个表达式，如何匹配更多的内容？

#page130
#将正则表达式分组，这时我们可以看到group（）为什么叫group（）了
phoneNumberRegex=re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mo=phoneNumberRegex.search('my number is 444-444-4444 and 333-3333-3333')
print(f'phone number is: {mo.group(1)} and there is {mo.group(2)} too.'.title())
#group传入0或者不传入参数则返回查找到的所有文本
#还可以一次性获取所有分组 groups()
mo1,mo2=mo.groups()
print(f'phone number is: {mo1} and there is {mo2} too.'.title())

#括号在正则表达式中代表分组，那如何在表达式中输入括号呢？
#和C++一致，采用\进行转义
phoneNumberRegex=re.compile(r'(\(\d\d\d\))-(\d\d\d-\d\d\d\d)')
mo=phoneNumberRegex.search('my number is (444)-444-4444 and 333-3333-3333')
print(f'phone number is: {mo.group(1)} and there is {mo.group(2)} too.'.title())
mo1,mo2=mo.groups()
print(f'phone number is: {mo1} and there is {mo2} too.'.title())
#在正则表达式中具有特殊含义的字符均可以利用\进行转义而成功输入

#管道：｜用以表示'或'
heroRegex=re.compile(r'Batman|Tina Fey')
mo1=heroRegex.search('Batman and Tina Fey.')
mo2=heroRegex.search('Batman and Tina Fey.')
print(mo1.group())
print(mo2.group())
#可见碰到第一个匹配的字符串将不再往下继续查找
#随后将介绍完整的查找过程

#还可以利用管道做许多好玩的事
batRegex=re.compile(r'Bat(man|mobile|copter|bat)')#1
mo=batRegex.search('Batmobile and Batcopter')
print(mo.group())
print(mo.group(1))
#这里的思路很有趣，group配合参数的返回的查找结果只包含在#1处加入括号的部分
#这时该库语法的思维，应该适应它
#适合于要查找一堆具有一个共同前缀的字符串

#用问号()?实现可选分配，即这部分是否在字符串里并不重要
batRegex=re.compile(r'Bat((man)?|mobile|copter|bat)')#其中(man)?里的man是可选的
mo=batRegex.search('Batok Batmobile and Batcopter')
print(mo.group())
print(mo.groups())#搞出了一个奇怪的输出

#()*匹配零次或多次
batRegex=re.compile(r'Bat(wo)*man')
mo1=batRegex.search('The Adventure Batman.')
print(mo1.group())

mo1=batRegex.search('The Adventure Batwoman.')
print(mo1.group())

mo1=batRegex.search('The Adventure Batwowowowoman.')
print(mo1.group(1))#果然是只关注第一个括号里的内容
print(mo1.group())

#()+匹配至少一次或多次
batRegex=re.compile(r'Bat(wo)+man')
mo1=batRegex.search('The Adventure Batman.')
print(mo1==None)
#True   的确至少要匹配一次，不然就会出问题
mo1=batRegex.search('The Adventure Batwoman.')
print(mo1.group())

mo1=batRegex.search('The Adventure Batwowowowoman.')
print(mo1.group())

#{}表示匹配的特定次数
#\d代表任何数字
haRegex=re.compile(r'(Ha){3}')
mo1=haRegex.search('HaHaHa,you are the f**king man.')
print(mo1.group())
print(mo1.group(1))#很好奇会输出什么？输出了一个Ha
#果然这就是这门技术的思路，其中带参数的group函数就是只管括号不管别的
mo1=haRegex.search('HaHa,you are the b**ch.')
print(mo1==None)
#花括号还可以指定范围{,}
haRegex=re.compile(r'(Ha){,3}')#匹配0～3次的Ha
mo1=haRegex.search('HaHaHa,you are the f**king man.')
print(mo1.group())
mo1=haRegex.search('HaHa,you are the b**ch.')
print(mo1==None)
print(mo1.group())

#python的正则表达式是贪心的
#(Ha){5}
#在有二义性的情况中，它会尽可能的匹配最长的字符串
#在花括号后面加一个问好{}?则首要匹配较短的字符串
haRegex=re.compile(r'(Ha){1,5}?')#匹配0～5次的Ha
mo1=haRegex.search('HaHaHa,you are the f**king man.')
print(mo1.group())
mo1=haRegex.search('HaHa,you are the b**ch.')
print(mo1==None)
print(mo1.group())
#果然贪心的特性反过来了

#注意区分问号?的两种用法，表明非贪心或者可选

#用findall方法代替search方法，可以返回一组字符串，查找所有符合条件的片段
#findall返回的是一个字符串列表-正则表达式没有分组
#返回的是一个元组构成的列表-正则表达式有分组

phoneNumberRegex=re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')
mo=phoneNumberRegex.findall('you get two numbers:444-444-4444,333-333-3333')
print(mo)
#不出意外打印一个元组列表

phoneNumberRegex=re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo=phoneNumberRegex.findall('you get two numbers:444-444-4444,333-333-3333')
print(mo)
#不出意外打印一个字符串列表
#Hei以外并没有发生

#学习正则表达式要学会利用各种缩写的字符代码
#\d
#\D 除了数字以外任何字符
#\w 任何数字、字母、下划线
#\W 除了\w外的任何字符
#\s 空格、制表符或换行符
#\S 除了\s以外的任何字符
#没有单纯匹配字母的缩写，可以写为[a-zA-Z]

xmasRegex=re.compile(r'\d+\s\w+')
all=xmasRegex.findall('12 drummers,11 pipers,10 lords,9 lidies,8 maids,7')
print(all)
for x in all:
    print(f'here are {x}.')

#re
#提供了让用户自己定义缩写的功能
#利用中括号
vowelRegex=re.compile(r'[aeiouAEIOU]')
print(vowelRegex.findall('azazazazazoeoeoeoeIIIIEEEE'))
#需要注意的是，在方括号内，正则表达式的特殊字符都失效了
#而且re还提供了"非字符类"的功能
consonantRegex=re.compile(r'[^aeiouAEIOU]')
print(consonantRegex.findall('aahjkdahfkjhskhfkjsdh'))
#相当于求个补集

#注意一下^字符还有另外的作用
#^ $ 分别代表匹配必须从起始处开始，最后的部分必须是匹配的
#二者配合使用则可以达到要求完全匹配的效果
#但是需要注意的是使用search时才有这效果，而findall则不会有这个特性
beginWitHello=re.compile(r'^Hello')
ok=beginWitHello.search('Hello World!')
print(ok.group())
print(ok)
ok=beginWitHello.search('he said Hello.')
print(ok==None)

endWithNumber=re.compile(r'\d$')
ok=endWithNumber.search('your name is 1910303')
print(ok.group())
print(ok)
ok=endWithNumber.search(r'1910303 is you.')
print(ok==None)

#通配字符（麻烦的叫法.小吐槽）
#.
#匹配换行符以外的所有字符
atRegex=re.compile(r'.at')
ok=atRegex.findall('the cat in the hat sat on the flat mat.')
for name in ok:
    print(f'here are you {name}')
#注意输出中是lat不是flat

#正则表达式内容真的很容易忘
#要时常复习

#点-星 匹配所有字符
nameRegex=re.compile(r'First Name:(.*)Last Name:(.*)')
mp=nameRegex.search('First Name:f**k you man Last Name:just f**k you')
print(mp.group(1))
print(mp.group(2))
#注意 点-星 是带有贪心法则的，获取尽可能多的字符（注意应该加深对贪心or非贪心的理解，不管如何，满足表达式永远是优先级最高的）
#结合问好可变成非贪心的
#前面对{}?的应用也是如此
#for example
nongreedyRegex=re.compile(r'<.*?>')
mo=nongreedyRegex.search('<to serve man> for dinner.>')
print(mo.group())
#非贪心

nongreedyRegex=re.compile(r'<.*>')
mo=nongreedyRegex.search('<to serve man> for dinner.>')
print(mo.group())
#贪心

#使用一些手段让句点字符也可匹配换行符
noNewRegex=re.compile(r'.*')
ok=noNewRegex.search('hei,you boy,f**king you man.\nif you can f**k him,just do it.')
print(ok.group())
#设置第二个参数
noNewRegex=re.compile(r'.*',re.DOTALL)
ok=noNewRegex.search('hei,you boy,f**king you man.\nif you can f**k him,just do it.')
print(ok.group())
#的确做到了匹配换行符

#{n,m}? *? +? 中的?都表示对前面分组的非贪心匹配
#^span   span$
#.   re.DOTALL
#\d \w \s 与 \D \W \S 所包含的字符集是相对的
#[abc]代表选择，其与[^abc]是相对的

#正则表达式区分大小写，如何让它不区分呢？
#可以事先对字符串进行lower处理，但是肯定有更好的策略

NKUer_there

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python学习心得-正则表达式

# 这是一个示例 Python 脚本。# 按 ⌃R 执行或将其替换为您的代码。# 按双击 ⇧ 在所有地方搜索类、文件、工具窗口、操作和设置。#正则表达式学习心得import re#import re是必要的操作#正则表达式用以匹配文本regex1=r'\d\d\d-\d\d\d-\d\d\d\d'#或者等效为regex2=r'\d{3}-\d{3}-\d{4}'#利用re模块compile创建regex对象，即所谓的正则表达式phoneNumberRegex=re.compil.
复制链接

扫一扫