#coding=utf-8
#######################################################
#filename: .py
#author:
#date:2019-01-08
#function:
#######################################################
import re
##正则表达式的匹配
#1.导入re模块
#2.使用re.compile() 创建Regex对象
#3.使用Regex对象.Search()查找匹配字符串,返回Match对象
#4.使用Match对象.group()返回匹配到的字符串
#7.2 匹配电话号码
phonenumberRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
phonenumberMatch = phonenumberRegex.search('my phone number is 010-521-1234')
print(phonenumberMatch.group())
#7.3
#7.3.1 利用括号分组
phonenumberRegexGroup = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
phonenumberMatchGroup = phonenumberRegexGroup.search('my phone number is 010-521-1234')
print('第一组:'+phonenumberMatchGroup.group(1)+'\n' \
+'第二组:'+phonenumberMatchGroup.group(2)+'\n' \
+'所有组:'+phonenumberMatchGroup.group(0))
#7.3.2 利用管道匹配多个分组
namelistRegexGroup = re.compile(r'(tom|jmmy)')
namelistMatchGroup = namelistRegexGroup.search('tom and jmmy are good friends')
print(namelistMatchGroup.group())
print(namelistRegexGroup.findall('tom and jmmy are good friends')) #查找所有的匹配到的字符串
#7.3.3 ?问号实现可选匹配【零次或者一次】
phonenumberRegex = re.compile(r'(\d\d\d-)?\d\d\d-\d\d\d\d')
phonenumberMatch = phonenumberRegex.search('my phone number is 010-521-1234')
print('? :' + phonenumberMatch.group())
phonenumberMatch = phonenumberRegex.search('my phone number is 521-1234')
print('? :' + phonenumberMatch.group())
#7.3.4 *星号实现零次或多次匹配【零次、一次或者多次】
phonenumberRegex = re.compile(r'(\d\d\d-)*\d\d\d-\d\d\d\d')
phonenumberMatch = phonenumberRegex.search('my phone number is 020-010-521-1234')
print('* :' + phonenumberMatch.group())
phonenumberMatch = phonenumberRegex.search('my phone number is 521-1234')
print('* :' + phonenumberMatch.group())
#7.3.5 +加号实现一次或者多次【一次或者多次】
phonenumberRegex = re.compile(r'(\d\d\d-)*\d\d\d-\d\d\d\d')
phonenumberMatch = phonenumberRegex.search('my phone number is 020-010-521-1234')
print('+ :' + phonenumberMatch.group())
#7.3.6 {}花括号匹配制定次数【指定次数】
haRegex = re.compile(r'(ha){3}')
haMatch = haRegex.search('this is hahaha words')
print('{}:' + haMatch.group())
haRegex = re.compile(r'(ha){3,}')
haMatch = haRegex.search('this is hahahaha words')
print('{}:' + haMatch.group())
haRegex = re.compile(r'(ha){3,5}')
haMatch = haRegex.search('this is hahahaha words')
print('{}:' + haMatch.group())
#7.4 贪心和非贪心的匹配
haRegex = re.compile(r'(ha){3,5}')
haMatch = haRegex.search('this is hahahahaha words')
print('默认贪心:' + haMatch.group())
haRegex = re.compile(r'(ha){3,5}?')
haMatch = haRegex.search('this is hahahahaha words')
print('非 贪心:' + haMatch.group())
##此时,?只是起到标注贪心非贪心的作用
#7.5 findall()方法
phonenumberRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
phonenumberFindall =phonenumberRegex.findall('my phone number is 010-521-1234 and her phonenumber is 010-332-5426')
print(phonenumberFindall)
print(phonenumberFindall[1])
phonenumberRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')
phonenumberFindall =phonenumberRegex.findall('my phone number is 010-521-1234 and her phonenumber is 010-332-5426')
print(phonenumberFindall)
print(phonenumberFindall[1]) #列表下标从1开始
print(phonenumberFindall[1][0]) #元组下标从0开始
#7.6 字符分类
print(
'\d 0到9的任何数字\n'
+'\D 除0到9的数字以外的任何字符\n'
+'\w 任何字母、数字、下划线(可以认为是匹配单词)\n'
+'\W 除字母、数字、下划线以外的任何字符\n'
+'\s 空格、制表符或换行符(可以认为是匹配空白字符)\n'
+'\S 除空格、制表符或换行符以外的任何字符\n'
)
numberRegex = re.compile(r'[0-5]')
print('缩短表达式')
print(numberRegex.findall('01234567899876543210'))
#7.7 创建自己的字符分类
vowelRegex = re.compile(r'[aeiouAEIOU]')
print('自定义字符分类')
print(vowelRegex.findall('my name is cody, I am leaning python Regex.'))
vowelRegex = re.compile(r'[^aeiouAEIOU.]')
print('自定义字符分类')
print(vowelRegex.findall('my name is cody, I am leaning python Regex.'))
#7.8 插入字符和美元字符
whileStringisnumber = re.compile(r'^\d+$')
print(whileStringisnumber.search('123456789'))
print(whileStringisnumber.search('1234x56789'))
#7.9 通配符
atRegex = re.compile(r'.at')
print(atRegex.findall('The hat in the Cat sat on the flat mat'))
#7.9.1 用点和星匹配所有字符
nameRegex = re.compile(r'Lastname:(.*)Firstname:(.*)')
print(nameRegex.search('Lastname:tom Firstname:caty'))
#7.9.2 用句点匹配换行
newlineRegex = re.compile(r'.*')
print(newlineRegex.search('my name is tom.\n and her name is aki.\n we are friends.').group())
newlineRegex = re.compile(r'.*',re.DOTALL)
print(newlineRegex.search('my name is tom.\n and her name is aki.\n we are friends.').group())
#7.10 正则表达式复习篇
#7.11 不区分大小写
rebocopRegex = re.compile(r'ReBocop')
print(rebocopRegex.findall('ReBocop Rebocop ReBoCop ReBOcop'))
rebocopRegex = re.compile(r'ReBocop',re.IGNORECASE)
print(rebocopRegex.findall('ReBocop Rebocop ReBoCop ReBOcop'))
#7.1.2 使用sub替换字符串
nameRegex = re.compile(r'Agent \w+')
print(nameRegex.findall('Agent Alice gave the secret documents to Agent Bob.'))
print(nameRegex.sub('Censord','Agent Alice gave the secret documents to Agent Bob.'))
#7.1.3 管理复杂的正则表达式
phonenumberRegex = re.compile(r'''(
(\d{3}|\(\d{3}\))? #区号
(\s|-|\.)? #分隔符
(\d{3}) #前3位
(\s|-|\.) #分隔符
(\d{4}) #后4位
(\s*(ext|x|ext.)\s*(\d{2,5}))? #分机
)''',re.VERBOSE)
print(phonenumberRegex.findall('tom phone number is 010-255-1234 and tim 111-121-2551 ext 4521 .')[1][0])
print(phonenumberRegex.findall('tom phone number is 010-255-1234 and tim 111-121-2551 ext 4521 .'))
emailRegex = re.compile(r'''(
[a-zA-Z0-9._%+-]+ #username
@ #
[a-zA-Z0-9.-]+ #domain name
(\.[a-zA-Z]{2,4}) #dot-something
)''',re.VERBOSE)
print(emailRegex.findall('my email is tom@qq.com and hers is 90114%aa@163.com.'))
#在剪切板文本中找到所有匹配
import pyperclip
text = str(pyperclip.paste())
matches = []
for groups in phonenumberRegex.findall(text):
phoneNum = '-'.join([groups[1],groups[3],groups[5]])
if groups[8] != '':
phoneNum = phoneNum + ' x'+groups[8]
matches.append(phoneNum)
for groups in emailRegex.findall(text):
matches.append(groups[0])
print(matches)
#所有匹配连接成字符串,并复制到剪切板
if len(matches) >0 :
pyperclip.copy('\n'.join(matches))
print('Copied to clipboard:')
print('\n'.join(matches))
else:
print('No phone number or email address found.')