import re
#分组
#匹配数字0-100数字
# n='09'
# result=re.match('[1-9]?\d',n)
# print(result)
#(word|word|word) 区别[abc]表示的是一个字母而不是一个单词
#验证输入的邮箱163 126 qq
email='738473800@qq.com'
result=re.match(r'\w{5,20}@(163|126|qq)\.com',email)
print(result)
#不是以4、7结尾的手机号码(11位)
phone='15901018868'
result=re.match(r'1\d{9}[0-35-689]$',phone)
print(result)
#爬虫
phone='010-123456789'
result=re.match(r'(\d{3}|\d{4})-(\d{9})$',phone)
print(result)
#分别提取
print(result.group())
#()表示分组 group(1)表示提取第一组的内容
print(result.group(1))
print(result.group(2))
msg='<html>abc</html>'
msg1='<h1>hello</h1>'
result=re.match(r'<([0-9a-zA-Z]+)>(.+)</[0-9a-zA-Z]+>',msg)
print(result)
print(result.group(1))
#number
result=re.match(r'<([0-9a-zA-Z]+)>(.+)</\1>$',msg1)
print(result)
print(result.group(2))
msg='<html><h1>abc</h1></html>' #'<html><h1>abc</htm1></h1>'这样是不行的
result=re.match(r'<([0-9a-zA-Z]+)><([0-9a-zA-Z]+)>(.+)</\2></\1>$',msg)
print(result)
print(result.group(1))
print(result.group(2))
print(result.group(3))
执行结果:
<re.Match object; span=(0, 16), match='738473800@qq.com'>
<re.Match object; span=(0, 11), match='15901018868'>
<re.Match object; span=(0, 13), match='010-123456789'>
010-123456789
010
123456789
<re.Match object; span=(0, 16), match='<html>abc</html>'>
html
<re.Match object; span=(0, 14), match='<h1>hello</h1>'>
hello
<re.Match object; span=(0, 25), match='<html><h1>abc</h1></html>'>
html
h1
abc