import re
# compile #编译成正则
# findall #寻找所有符合条件的字符,返回列表
# match #开头匹配,返回最前面匹配到的内容,通过group调用
# search #全局匹配,返回最前面匹配到的内容,通过group调用
# split #用正则分割字符串,功能更强大,返回列表
# sub #替换,返回替换后的字符
# subn #替换,返回替换后的字符和替换次数,用元组存储返回
#最简单的实例,findall找到所有符合条件的字符串,生成一个列表
# pattern='fool'
# s1='I am a fool'
# s2='You are a idiot'
# s3='fools is more than fool'
# print re.findall(pattern,s1)
# print re.findall(pattern,s2)
# print re.findall(pattern,s3)
#不过更建议写成这样,速度更快,解析更准确
# pattern=re.compile(r'fool')
# pattern=re.compile('fool')
# s3='fools is more than fool'
# print re.findall(pattern,s3)
#字符集合匹配
# pattern1=re.compile('f[abc]')#包含在内
# pattern2=re.compile('f[^abc]')#去除
# pattern3=re.compile('f[a-z]')#包含范围
# pattern4=re.compile('f[a-z]{2}')
# s1='father is fb while mother is dfc and dfv'
# print pattern1.findall(s1)
# print pattern2.findall(s1)
# print pattern3.findall(s1)
# print pattern4.findall(s1)
#开头匹配
# pattern=re.compile('^egg')
# s1='egg belongs to you'
# s2='the egg belongs to you'
# print pattern.findall(s1)
# print pattern.findall(s2)
#结尾匹配
# pattern=re.compile('you$')
# s1='what are you'
# s2='what you are'
# print pattern.findall(s1)
# print pattern.findall(s2)
#匹配个数
# pattern1=re.compile('abc{4}')
# pattern2=re.compile('abc{2,4}')
# pattern3=re.compile('abc{2,}')
# pattern4=re.compile('abc*')#大于1,贪婪模式,等于{1,}
# pattern5=re.compile('abc+')#大于等于1,贪婪模式,等于{0,}
# s1='ab abc abcc abccc abcccc'
# print pattern1.findall(s1)
# print pattern2.findall(s1)
# print pattern3.findall(s1)
# print pattern4.findall(s1)
# print pattern5.findall(s1)
#非贪婪模式
# pattern1=re.compile('abc+?')#非贪婪
# pattern2=re.compile('abc+')#贪婪
# s1='abc abcc abccc abcccc'
# print pattern1.findall(s1)
# print pattern2.findall(s1)
#转义字符
# re.findall('\d','dert5322ws') #十进制数字
# re.findall('\D','defds846sdbv') #非十进制数字
# re.findall('\s','se4 se23e 21342 d44,s') #空白字符
# re.findall('\S','se4 se23e 21342 d44,s') #非空字符
# re.findall('\w','se4 se23e 21342 d44,s') #数字字母字符
# re.findall('\W','se4 se23e a42 d44,s') #非数字字母字符
# re.findall('s.{4}','se4 se23e a42 d44,s') #.可表示任意字符
#筛选有用信息
# pattern1=re.compile('name=(.+),')
# pattern2=re.compile('name=(.+),age=(\d+)')
# s1='name=zhangweiguo,age=18'
# print pattern1.findall(s1)
# print pattern2.findall(s1)
#多个筛选通道
# pattern1=re.compile('(good|bad)')
# s1='apple is good, while weilong is bad'
# print pattern1.findall(s1)
#搜索选项
# pattern1=re.compile('zw[123]',re.I)#不区分大小写
# pattern2=re.compile('zw[123]')
# s1='Zw1 zw2 ZW3'
# print pattern1.findall(s1)
# print pattern2.findall(s1)
#
# pattern3=re.compile('^Begin.+end$',re.M)#每一行当作一个元素
# pattern4=re.compile('^Begin.+end$',re.S)#多行匹配
# pattern5=re.compile('^Begin.+end$')
# s2='Begin You do end\nBegin I do end'
# print pattern3.findall(s2)
# print pattern4.findall(s2)
# print pattern5.findall(s2)
#有时一次性找出所有的太占内存,采用迭代器finditer
# pattern1=re.compile('(Liu.+?)\s',re.I|re.S)
# s1='Liusan and Liusi are all named by \nLiuke and liumei '
# S=pattern1.finditer(s1)
# for string in S:
# print string.group(),string.start(),string.end()
#有时并不需要findall,那么来试试match和search
# pattern1=re.compile('Chinese',re.I)
# s1='Do you know Chinese is a language, and chinese is good'
# s2='Chinese is a kind language'
# p1=pattern1.search(s1)#全局匹配,无则返回None
# p2=pattern1.match(s1)#开头匹配,无则返回None,有则
# p3=pattern1.match(s2)
# print p1.span(),p1.start(),p1.end(),p1.group()
# print p2
# print p3.span(),p3.group()
#还有替换与分割的功能:split、sub(subn)
# pattern1=re.compile('go+d')
# s1='the best good food is made by god'
# print pattern1.sub('fool',s1)#返回更改后的字符串
# print pattern1.subn('fool_copy',s1)#返回更改后的字符串与更换次数
#约等价于以下
# s1=s1.repalce('good','fool')
# s1=s1.repalce('god','fool')
# pattern2=re.compile('[\+\-\*/]')
# pattern3=re.compile('<.*?>')
# s2='12+13*34/34'
# s3='Bod<thin>Amily<fat>Bill<>Youself<the best>Newton'
# ss=pattern2.split(s2)
# print ss
# print pattern3.split(s3)
# 更高级的功能(一):生成字典(?P<name>)
# s=[r'^(?P<first_word>\w+)',
# r'[\s,\.]*(\w+)[\s,\.]*',
# r'(?P<last_word>\w+)\.$']
# S='I am a student, and you are my teacher.'
# for i in s:
# pattern=re.compile(i)
# p1=pattern.findall(S)
# p2=pattern.search(S)
# print '模式:%-10s;findall匹配到:%-10s'%(i,p1)
# print 'search匹配到:%-10s,生成的字典是:%-10s'%(p2.group(),p2.groupdict())
# 更高级的功能(二):前向或后向,你希望匹配的字符前面或后面出现的东西
# pattern1=re.compile('(?<=name=)\w+')#目的字符前面出现的内容,这里是:name=
# pattern2=re.compile('\d+(?=,)')#目的字符后面应出现的内容,这里是:,
# pattern3=re.compile('(?<!,)\w+(?=,)')#目的字符前面不出现',',后面出现'='
# pattern4=re.compile('\d+(?!,)')#目的字符后面不出现的内容,这里是:,
# s1='myinfo:name=zhangweiguo,age=12,id=100,sex=female'
# print pattern1.search(s1).group(),pattern1.findall(s1)
# print pattern2.search(s1).group(),pattern2.findall(s1)
# print pattern3.search(s1).group(),pattern3.findall(s1)
# print pattern4.search(s1).group(),pattern4.findall(s1)
# 下面是几个简单的实例
# 实例一:电话号码格式
# pattern1=re.compile('^\d{3}-\d{7}$')
# s1='021-2823456'
# s2='201-3456789'
# s3='12d-1235675'
# print(pattern1.findall(s1))
# print(pattern1.search(s2))
# print(pattern1.match(s3))
# 实例二:密码规则
# 要求:必须有且只有大写字母、小写字母、数字,8位以上
# pattern=re.compile('[A-Za-z\d]{8,}')
# s1='Python520'
# s2='#python520'
# s3='python5'
# print pattern.match(s1)
# print pattern.match(s2)
# print pattern.match(s3)
# 实例三:邮箱格式验证
# pattern=re.compile('[\d\w-]+@(qq|163|126|gmail|sina|Outlook|Yaho)\.com')
# s1='420943404@qq.com'
# s2='dehu_des@163.com'
# p1=pattern.findall(s1)#findall只返回括号内的内容,并且多个括号时返回元组的形式
# p2=pattern.match(s1)#match和search返回全部匹配的内容
# p3=pattern.search(s2)
# print p1
# print p2.group()
# print p3.group()