python 正则学习笔记

#coding=utf-8
#######################################################
#filename: .py
#author:
#date:2019-01-08
#function:
#######################################################

import re


##正则表达式的匹配
#1.导入re模块
#2.使用re.compile() 创建Regex对象
#3.使用Regex对象.Search()查找匹配字符串,返回Match对象
#4.使用Match对象.group()返回匹配到的字符串

#7.2 匹配电话号码
phonenumberRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
phonenumberMatch = phonenumberRegex.search('my phone number is 010-521-1234')
print(phonenumberMatch.group())

#7.3
#7.3.1 利用括号分组
phonenumberRegexGroup = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
phonenumberMatchGroup = phonenumberRegexGroup.search('my phone number is 010-521-1234')
print('第一组:'+phonenumberMatchGroup.group(1)+'\n' \
      +'第二组:'+phonenumberMatchGroup.group(2)+'\n' \
      +'所有组:'+phonenumberMatchGroup.group(0))

#7.3.2 利用管道匹配多个分组
namelistRegexGroup = re.compile(r'(tom|jmmy)')
namelistMatchGroup = namelistRegexGroup.search('tom and jmmy are good friends')
print(namelistMatchGroup.group())
print(namelistRegexGroup.findall('tom and jmmy are good friends'))  #查找所有的匹配到的字符串

#7.3.3 ?问号实现可选匹配【零次或者一次】
phonenumberRegex = re.compile(r'(\d\d\d-)?\d\d\d-\d\d\d\d')
phonenumberMatch = phonenumberRegex.search('my phone number is 010-521-1234')
print('? :' + phonenumberMatch.group())
phonenumberMatch = phonenumberRegex.search('my phone number is 521-1234')
print('? :' + phonenumberMatch.group())

#7.3.4 *星号实现零次或多次匹配【零次、一次或者多次】
phonenumberRegex = re.compile(r'(\d\d\d-)*\d\d\d-\d\d\d\d')
phonenumberMatch = phonenumberRegex.search('my phone number is 020-010-521-1234')
print('* :' + phonenumberMatch.group())
phonenumberMatch = phonenumberRegex.search('my phone number is 521-1234')
print('* :' + phonenumberMatch.group())

#7.3.5 +加号实现一次或者多次【一次或者多次】
phonenumberRegex = re.compile(r'(\d\d\d-)*\d\d\d-\d\d\d\d')
phonenumberMatch = phonenumberRegex.search('my phone number is 020-010-521-1234')
print('+ :' + phonenumberMatch.group())

#7.3.6 {}花括号匹配制定次数【指定次数】
haRegex = re.compile(r'(ha){3}')
haMatch = haRegex.search('this is hahaha words')
print('{}:' + haMatch.group())

haRegex = re.compile(r'(ha){3,}')
haMatch = haRegex.search('this is hahahaha words')
print('{}:' + haMatch.group())

haRegex = re.compile(r'(ha){3,5}')
haMatch = haRegex.search('this is hahahaha words')
print('{}:' + haMatch.group())

#7.4 贪心和非贪心的匹配
haRegex = re.compile(r'(ha){3,5}')
haMatch = haRegex.search('this is hahahahaha words')
print('默认贪心:' + haMatch.group())

haRegex = re.compile(r'(ha){3,5}?')
haMatch = haRegex.search('this is hahahahaha words')
print('非  贪心:' + haMatch.group())
##此时,?只是起到标注贪心非贪心的作用

#7.5 findall()方法
phonenumberRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
phonenumberFindall =phonenumberRegex.findall('my phone number is 010-521-1234 and her phonenumber is 010-332-5426')
print(phonenumberFindall)
print(phonenumberFindall[1])

phonenumberRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')
phonenumberFindall =phonenumberRegex.findall('my phone number is 010-521-1234 and her phonenumber is 010-332-5426')
print(phonenumberFindall)
print(phonenumberFindall[1]) #列表下标从1开始
print(phonenumberFindall[1][0]) #元组下标从0开始

#7.6 字符分类
print(
    '\d 0到9的任何数字\n'
    +'\D 除0到9的数字以外的任何字符\n'
    +'\w 任何字母、数字、下划线(可以认为是匹配单词)\n'
    +'\W 除字母、数字、下划线以外的任何字符\n'
    +'\s 空格、制表符或换行符(可以认为是匹配空白字符)\n'
    +'\S 除空格、制表符或换行符以外的任何字符\n'
    )
numberRegex = re.compile(r'[0-5]')
print('缩短表达式')
print(numberRegex.findall('01234567899876543210'))

#7.7 创建自己的字符分类
vowelRegex = re.compile(r'[aeiouAEIOU]')
print('自定义字符分类')
print(vowelRegex.findall('my name is cody, I am leaning python Regex.'))

vowelRegex = re.compile(r'[^aeiouAEIOU.]')
print('自定义字符分类')
print(vowelRegex.findall('my name is cody, I am leaning python Regex.'))

#7.8 插入字符和美元字符
whileStringisnumber = re.compile(r'^\d+$')
print(whileStringisnumber.search('123456789'))
print(whileStringisnumber.search('1234x56789'))

#7.9 通配符
atRegex = re.compile(r'.at')
print(atRegex.findall('The hat in the Cat sat on the flat mat'))

#7.9.1 用点和星匹配所有字符
nameRegex = re.compile(r'Lastname:(.*)Firstname:(.*)')
print(nameRegex.search('Lastname:tom Firstname:caty'))

#7.9.2 用句点匹配换行
newlineRegex = re.compile(r'.*')
print(newlineRegex.search('my name is tom.\n and her name is aki.\n we are friends.').group())

newlineRegex = re.compile(r'.*',re.DOTALL)
print(newlineRegex.search('my name is tom.\n and her name is aki.\n we are friends.').group())

#7.10 正则表达式复习篇


#7.11 不区分大小写
rebocopRegex = re.compile(r'ReBocop')
print(rebocopRegex.findall('ReBocop Rebocop ReBoCop ReBOcop'))

rebocopRegex = re.compile(r'ReBocop',re.IGNORECASE)
print(rebocopRegex.findall('ReBocop Rebocop ReBoCop ReBOcop'))

#7.1.2 使用sub替换字符串
nameRegex = re.compile(r'Agent \w+')
print(nameRegex.findall('Agent Alice gave the secret documents to Agent Bob.'))
print(nameRegex.sub('Censord','Agent Alice gave the secret documents to Agent Bob.'))

#7.1.3 管理复杂的正则表达式
phonenumberRegex = re.compile(r'''(
    (\d{3}|\(\d{3}\))?      #区号
    (\s|-|\.)?             #分隔符
    (\d{3})                  #前3位
    (\s|-|\.)            #分隔符
    (\d{4})                  #后4位
    (\s*(ext|x|ext.)\s*(\d{2,5}))? #分机
)''',re.VERBOSE)
print(phonenumberRegex.findall('tom phone number is 010-255-1234 and tim 111-121-2551 ext 4521 .')[1][0])
print(phonenumberRegex.findall('tom phone number is 010-255-1234 and tim 111-121-2551 ext 4521 .'))

emailRegex = re.compile(r'''(
    [a-zA-Z0-9._%+-]+   #username
    @                     #
    [a-zA-Z0-9.-]+      #domain name
    (\.[a-zA-Z]{2,4})   #dot-something
)''',re.VERBOSE)
print(emailRegex.findall('my email is tom@qq.com and hers is 90114%aa@163.com.'))

#在剪切板文本中找到所有匹配
import pyperclip
text = str(pyperclip.paste())
matches = []
for groups in phonenumberRegex.findall(text):
    phoneNum = '-'.join([groups[1],groups[3],groups[5]])
    if groups[8] != '':
        phoneNum = phoneNum + ' x'+groups[8]
    matches.append(phoneNum)
for groups in emailRegex.findall(text):
    matches.append(groups[0])
print(matches)

#所有匹配连接成字符串,并复制到剪切板
if len(matches) >0 :
    pyperclip.copy('\n'.join(matches))
    print('Copied to clipboard:')
    print('\n'.join(matches))
else:
    print('No phone number or email address found.')

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值