正则表达式-基础

最新推荐文章于 2024-01-15 14:30:21 发布
singebogo
最新推荐文章于 2024-01-15 14:30:21 发布
阅读量256
点赞数
分类专栏： Python
Python 专栏收录该内容
48 篇文章 0 订阅
订阅专栏
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#    @version : 0.0.1
#    @File    : reg.py
#    @Time    : 2018/6/14 0014 下午 4:09
#    @Site    : 
#    @Software: PyCharm
#    @Author  : singebogo
#    @Author_email: singbogo@163.com
#    @description: 
'''
   1、import re  导入正则表达式模块
   2、用 re.complie() 创建一个Regex对象
   3、向Regex对象search()方法传入想查找的字符串，它返回Match对象
   4、调用Match对象group()方法，返回实际匹配的文本字符串
'''


import re

# phoneNumReg = re.compile(r"\d\d\d-\d\d\d-\d\d\d\d")
# mo = phoneNumReg.search('my number is 415-555-4242')
# print ('phone number found: ' + mo.group())
#
#
# # 分组
# phoneNumReg = re.compile(r"(\d\d\d)-(\d\d\d-\d\d\d\d)")
# mo = phoneNumReg.search('my number is 415-555-4242')
# print ('phone number found: ' + mo.group(1))
# print ('phone number found: ' + mo.group(1))
#
# print ('phone number found: ' + mo.group())
# code, number = mo.groups()
# print code
# print number
#
#
# #用管道匹配多个分组
# heroRegEx = re.compile(r"Batman|Tina Fey")
# mo1 =  heroRegEx.search("Batman and Tina Fey")
# print(mo1.group())
#
# mo2 = heroRegEx.search("Tina Fey and Batman")
# print(mo2.group())
#
# # 管道匹配多个分组
# heroRegEx1 = re.compile(r"Bat(man|moblie|copter|bat)")
# mo3 =  heroRegEx1.search("Batman and Tina Fey")
# print(mo3.group())
# print(mo3.group(1))

#用问号实现可选匹配
# batRegex2 = re.compile(r"Bat(wo)?man")
# mo4 =  batRegex2.search("Batman and Tina Fey")
# print(mo4.group())
# print(mo4.group(1))
# mo5 =  batRegex2.search("Batwoman and Tina Fey")
# print(mo5.group())
# print(mo5.group(1))

#用星号匹配零次或者多次  可以不出现
# batRegex2 = re.compile(r"Bat(wo)*man")
# mo4 =  batRegex2.search("Batman and Tina Fey")
# print(mo4.group())
# print(mo4.group(1))
# mo5 =  batRegex2.search("Batwowowowoman and Tina Fey")
# print(mo5.group())
# print(mo5.group(1))
#用+（加）号匹配零次或者多次  至少出现一次
# batRegex2 = re.compile(r"Bat(wo)+man")
# mo4 =  batRegex2.search("Batwoman and Tina Fey")
# print(mo4.group())
# mo5 =  batRegex2.search("Batwowowowoman and Tina Fey")
# print(mo5.group())
# print(mo5.group(1))

#{}花括号匹配特定次数
# batRegex2 = re.compile(r"Bat(wo){1,4}man")
# mo5 =  batRegex2.search("Batwowowoman and Tina Fey")
# print(mo5.group())
# print(mo5.group(1))

# 正则表达式默认是“贪心”，在有二义性的情况，尽可能匹配最长的字符串，花括号跟着问号，则“非贪心”尽可能匹配最短的字符串
# 在花括号之后还有匹配数据时候，其还是默认的贪心模式, 而是可选匹配
# batRegex2 = re.compile(r"Bat(wo){1,4}?")
# mo5 =  batRegex2.search("Batwowowoman and Tina Fey")
# print(mo5.group())
# batRegex3 = re.compile(r"Bat(wo){1,4}?man")
# mo6 =  batRegex3.search("Batwowowoman and Tina Fey")
# print(mo6.group())
# Batwo
# Batwowowoman

# findall()方法: search()返回一个Match（）对象，包含别查找字符串中的“第一次”匹配对的文本，
#               findall()方法：将返回一组字符串，包含被查找的字符串中所有的匹配
#                              如果是分组，则返回列表

# phoneNumReg = re.compile(r"\d\d\d-\d\d\d-\d\d\d\d")
# mo = phoneNumReg.search('my number is 415-555-4242 work：234-456-2322')
# print ('phone number found: ' + mo.group())
#
# phoneNumReg1 = re.compile(r"\d\d\d-\d\d\d-\d\d\d\d")
# mo = phoneNumReg1.findall('my number is 415-555-4242 work：234-456-2322')
# print ('phone number found: ' + str(mo))
#
# # phone number found: 415-555-4242
# # phone number found: ['415-555-4242', '234-456-2322']
#
# phoneNumReg2 = re.compile(r"(\d\d\d)-(\d\d\d)-(\d\d\d\d)")
# mo1 = phoneNumReg2.findall('my number is 415-555-4242 work：234-456-2322')
# print (mo1)
# # [('415', '555', '4242'), ('234', '456', '2322')]

#   字符分类
'''
\d          0-9的任意数字
\D          除0-9的数字以外的任何字符
\w          任何字符、数字、下划线字符（可以认为是匹配“单词”字符）
\W          除了字母，数字，下划线以外的字符
\s          空格、制表符、换行符（空白字符）
\S          除空格、制表符、换行符（空白字符）以外的任何字符

?           匹配零次或者一次前面的分组
*           匹配零次或者多次前面的分组
+           匹配一次或者多次前面的分组
{n}         匹配n次前面的分组
{n,}        匹配n次或者更多前面的分组‘
{,m}        匹配零次和m次前面的分组
{n,m}       匹配至少n次，最多m次前面的分组
{n,m}?或*?或+?      对前面的分组进行非贪心匹配
^spam       字符串必须以spam开始
spam$       字符串必须以spam结尾
.           匹配所有字符，换行符除外
[abc]       匹配方括号的任意字符
[^abc]      匹配不在方括号内的任意字符

'''

# 建立自己的新字符分类
#                     当你的想匹配一组字符，但是缩写的字符分类（/w,/W. /s, /S）太宽泛了， 可以使用[]定义自己的字符分类
# consonantRegex = re.compile(r"[aeiouAEIOU]")
# print (consonantRegex.findall('RoboCop eats baby food. BABY FOOD U.'))
# consonantRegex = re.compile(r"[^aeiouAEIOU]")    #  不匹配该字符类中的所有字符
# print (consonantRegex.findall('RoboCop eats baby food. BABY FOOD U.'))
# ['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o', 'A', 'O', 'O', 'U']
# ['R', 'b', 'C', 'p', ' ', 't', 's', ' ', 'b', 'b', 'y', ' ', 'f', 'd', '.', ' ', 'B', 'B', 'Y', ' ', 'F', 'D', ' ', '.']


# 插入字符和美元字符
#                 插入符号：匹配必须发生在被查找文本开始处
#                 美元符号：表示整个字符串匹配必须在这个正则表达式的模式结束
# beginwithhello = re.compile(r'^hello')
# mo4= beginwithhello.search('hello world')
# print (mo4.group())
# print (beginwithhello.search('say hello world') == None)
# hello
# True

# endwithNumber= re.compile(r'\d$')
# mo6 = endwithNumber.search('your number is 23')
# print(mo6.group())
# print (endwithNumber.search("42424242test") == None)

# 3
# True

# endwithNumber= re.compile(r'^\d+$')
# mo6 = endwithNumber.search("42424242")
# print(mo6.group())
# print (endwithNumber.search('your number is 23') == None)
# 42424242
# True

# 通配符 句号
# atRegex = re.compile(r'.at')
# mo7 = atRegex.findall('the cat in the hat sat on the flat mat.')
# print(mo7)
# ['cat', 'hat', 'sat', 'lat', 'mat']

#点-星 匹配所有的字符
#                    点-星：默认为贪心模式，尽可能的匹配更多的字符串
#                           非贪心模式： 使用点星和问号，问号告示python为非贪心模式
#                             通过re.DOTALL作为re.compile()第二参数，可以让点字符匹配所有字符，包括换行




# nameRegex = re.compile(r"First Name:(.*)Last Name:(.*)")
# mo = nameRegex.search("First Name: Al Last Name: Sweigrat")
# print(mo.group(1))
# print(mo.group(2))
# Al
# Sweigrat
#
# nogreedRegex = re.compile(r"<.*?>")
# mo = nogreedRegex.search('<To server man> for dinner>')
# print (mo.group())  #<To server man>
# greedRegex = re.compile(r"<.*>")
# mo1 = greedRegex.search('<To server man> for dinner>')
# print(mo1.group())  #<To server man> for dinner>

# noNewLineRegex = re.compile(r'.*')
# mo = noNewLineRegex.search('search the public trust.\n Protected in inconnet \nUpload the law')
# print(mo.group())  #search the public trust.
#
# NewLineRegex = re.compile(r'.*', re.DOTALL)
# mo1 = NewLineRegex.search('search the public trust.\n Protected in inconnet \nUpload the law')
# print(mo1.group())
# search the public trust.
#  Protected in inconnet
# Upload the law


# 不区分大小写的匹配
#                 传入：re.IGNORECASR 或 re。I 作为re.complies()

robocop = re.compile(r"robocop", re.I)
mo3 = robocop.search('RoboCop is part man, part machine, all cop,')
print(mo3.group())  # RoboCop


# 组合参数： 希望使用re.VERBOSE 来编写注释，还希望使用re.IGNORCASE来忽略大小写，因为re.compile()函数只接受一个值作为它的第二参数。 可以使用按位或（\）将变量组合

robocop = re.compile(r"robocop", re.I | re.VERBOSE)
mo3 = robocop.search('RoboCop is part man, part machine, all cop,')
print(mo3.group())  # RoboCop