python正则表达式及练习

最新推荐文章于 2024-08-19 20:41:37 发布

半夏映浮光

最新推荐文章于 2024-08-19 20:41:37 发布

阅读量1.1k

点赞数 1

本文链接：https://blog.csdn.net/HXiao0805/article/details/87714335

版权

如下方法为正则表达式基本方法，希望可以给跟我一样的初学者一些帮助。

# -*- coding: utf-8 -*-
"""
正则表达式练习1：re模块涉及方法

@author: administrator
"""
#导入re模块，用来处理正则表达式
import re
#match函数：从字符串的起始部分对模式进行匹配。
#如果匹配成功，就返回一个匹配对象；如果 匹配失败，就返回 None
m=re.match('foo','foofoo') #第一个参数为正则表达式

print(m)

if m is not None:
    print(m.group()) #在正则表达式中没有使用（）进行分组时，该函数返回完整匹配对象
    print(m.groups()) #在正则表达式中没有使用（）进行分组时，该函数返回空的元组
    
#search函数：从任意位置开始匹配
s=re.search('foo','this is food')
print(s)

if s is not None:
    print(s.group())
    print(s.groups())
    

m_2=re.match('(\w\w\w)-(\d\d\d)','qwe-123')
print(m_2)

if m_2 is not None:
    print(m_2.group()) #在正则表达式中使用（）进行分组时，该函数返回完整匹配对象 ->qwe-123
    print(m_2.group(1)) #在正则表达式中使用（）进行分组时，该函数返回第一个括号匹配对象 ->qwe
    print(m_2.group(2)) #在正则表达式中使用（）进行分组时，该函数返回第二个括号匹配对象 ->123
    print(m_2.groups()) #在正则表达式中使用（）进行分组时，该函数返回两个子组对应的元组 ->('qwe', '123')

#此时匹配不上，正则表达式\bthe，用来匹配t在边界并且后面是he，例如：thedog或this the dog 
s_2 = re.search(r'\bthe', 'bitethe dog') 

#findall函数:以列表形式返回全部匹配串，如果没有找到匹配部分，就返回一个空的列表
f_1=re.findall('car','carcarcar carry')
print(f_1)

#finditer函数与findall函数类似但是更节省内存:生成的是一个迭代器
f_2=re.finditer('(a\w+)','carcarcar carry')
print(f_2)
#使用for循环进行迭代取值
for i in f_2:
    print (i.group())
    print (i.group(1)) #由于不存在（）分组，所以在取group（1）报错
    #print (i.group(2)) #由于不存在（）分组，所以在取group（2）报错


#sub函数：某字符串中所有匹配正则表达式的部分进行某种形式的替换
s_1=re.sub('[ae]','X','abcdef') #将字符串abcdef中的a或e替换成X，返回替换成功的字符串
print(s_1)

#subn函数：某字符串中所有匹配正则表达式的部分进行某种形式的替换，替换后的字符串和表示替换总数的数字一起作为一个拥有两个 元素的元组返回
s_2=re.subn('[ae]','X','abcdef') #将字符串abcdef中的a或e替换成X，返回替换成功的字符串与替换总数组成的元组
print(s_2)
#可以使用\N，其中N是分组编号
#在参数3中使用参数1进行匹配，将匹配成功的部分替换成参数2的样式显示（r'\2/\1/\3'的意思是分组2\分组1\分组3的样式显示）
s_3=re.sub(r'(\d{1,2})/(\d{1,2})/(\d{2}|\d{4})',r'\2/\1/\3', '2/20/91')
print(s_3)

#扩展符号，扩展正则表达式：通过使用 (?iLmsux) 系列选项；可以进行组合使用
#1. re.I/IGNORECASE忽略大小写(?i)
r_1=re.findall(r'(?i)yes', 'yes? Yes. YES!!') 
print(r_1)
#2. re.M/MULTILINE实现多行混合搜索,在cmd中进入python环境是可以执行成功的；
#但是在集成环境中匹配不成功，不会进行跨行检索？是字符串换行符不正确？
r_2=re.findall(r'(?im)(^th[\w ]+)',"""
               This line is the first,
               another line,
               that line, it's the best
               """)
        
print(r_2)
#3. re.S/DOTALL。该标记表明点号（.）能够用来表示\n 符号（反之其通常 用于表示除了\n 之外的全部字符）
r_3=re.findall(r'(?is)th.+',"""
               The first line,
               the second line,
               the third line
               """)
print(r_3)
#4.re.X/VERBOSE 标记非常有趣；
#该标记允许用户通过抑制在正则表达式中使用空白符（除了在字符类中或者在反斜线转义中）来创建更易读的正则表达式
#有了(?x)正则表达式可以换行
r_4= re.search(r'''(?x)
\((\d{3})\)
[ ]
(\d{3})
-
(\d{4})
''', '(800) 555-1212').groups()
print(r_4)
#5.(?:...)使用该符号可以对正则表达式进行分组，但是并不会保存该分组用于后续的检索或者应用
#在不需要保存今后永远不会使用的多余匹配时，这个符号可以使用
r_5=re.findall(r'http://(?:\w+\.)*(\w+\.com)','http://google.com http://www.google.com http://code.google.com')
print(r_5)
r_6=re.search(r'http://(?:\w+\.)*(\w+\.com)','http://google.com http://www.google.com http://code.google.com')
print(r_6.group(1))

下面为一些基本练习，有兴趣的可以自己看一看，动手写一写。

# -*- coding: utf-8 -*-
"""
Created on Mon Feb 18 13:37:54 2019

@author: Administrator
正则表达式练习2
"""
import re
#1-1  识别后续的字符串： “bat”、“ bit”、“ but”、“ hat”、“ hit”或者“hut”。 
par_1=r'bat|bit|but|hat|hit|hut'
test_str1='this is bat,and that is bat,here is bit and there is but'
test_1=re.findall(par_1,test_str1)
print(test_1)
# out=['bat', 'hat', 'bat', 'bit', 'but']

#1-2  匹配由单个空格分隔的任意单词对，也就是姓和名。
par_2=r' |,'
test_str2='alice chen,bob jhon,kitty han' 
test_2=re.split(par_2,test_str2)
print(test_2)
# out=['alice', 'chen', 'bob', 'jhon', 'kitty', 'han']

#1-3  匹配由单个逗号和单个空白符分隔的任何单词和单个字母，如姓氏的首字母。
par_3=r', '
test_str3='alice chen, bob jhon, kitty han' 
test_3=re.split(par_3,test_str3)
print(test_3) 
# out=['alice chen', 'bob jhon', 'kitty han']

#1-4  匹配所有有效 Python 标识符的集合。
par_4=r'([A-Za-z]\w*)'
test_str4='python,test_1,2test' 
test_4=re.findall(par_4,test_str4)
print(test_4)
# out= ['python', 'test_1', 'test']

#1-5  根据读者当地的格式，匹配街道地址（使你的正则表达式足够通用，来匹配任意数量的街道单词，
#     包括类型名称）。例如，美国街道地址使用如下格式：1180 Bordeaux Drive。
#     使你的正则表达式足够灵活，以支持多单词的街道名称，如 3120 De la Cruz Boulevard。 
par_5=r'((\w* )*\w*)'
test_str5='3120 De la Cruz Boulevarde' 
test_5=re.search(par_5,test_str5)
print(test_5.group()) #3120 De la Cruz Boulevarde
print(test_5.group(1))#3120 De la Cruz Boulevarde
print(test_5.group(2))#Cruz 此处为满足内括号的最后一个串，之前的串被覆盖

#1-6  匹配以“www”起始且以“.com”结尾的简单Web 域名；例如，www://www. yahoo.com/。 
#     选做题：你的正则表达式也可以支持其他高级域名，如.edu、.net 等（例如， http://www.foothill.edu）。
#par_6=r'^www.*\.com$|.*\.edu|net$' #匹配以www开始并且以.com结尾的域名或者是开头无限制但是要以.edu或.net结尾的域名
par_6=r'^(www|.*).*(\.com|edu|net$)' 
test_str6='www://www. yahoo.com,http://www.foothill.edu,www://www. yahoo.net,http://www. yahoo.net' 
#test_6=re.search(par_6,test_str6)
#if test_6 is not None:
#    print(test_6.group()) #www://www. yahoo.com   
test_pre=re.split(',',test_str6)
print(test_pre) #['www://www. yahoo.com', 'http://www.foothill.edu', 'www://www. yahoo.net', 'http://www. yahoo.net']
for i in test_pre:
    test_6=re.search(par_6,i)
    print(test_6.group()) #循环4次依次输出：www://www. yahoo.com
                          #               http://www.foothill.edu
                          #               www://www. yahoo.net
                          #               http://www. yahoo.net

喜欢我的文章希望和我一起成长的宝宝们，可以搜索并添加公众号TryTestwonderful ，或者扫描下方二维码添加公众号