1. 基本语法
1 正则匹配三步走
import re
pattern = re.compile(r'\d\d\d-\d\d\d\d')
phonenum = pattern.search('my num is 123-1121')
print(phonenum.group()) #直接输出的是match对象,调用match对象的group方法,返回文本
>>>123-1121
2 匹配更多模式
#2.1 括号分组 (区号与电话号码的分离)
pattern21 = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mo = pattern21.search('my num is 110-234-1234')
print(mo.group(1),mo.group(2),mo.group(0))
>>>110 234-1234 110-234-1234
#2.2 管道匹配多个分组 (匹配众多表达式中的一个)
pattern22 = re.compile(r'bat(man|mobile|bat)')
mo22 = pattern22.search('batmobile is beautiful')
print(mo22.group(),mo22.group(1))
>>>batmobile mobile
#2.3 问号实现可选匹配 ?匹配这个问号之前的分组零次或一次
pattern23 = re.compile(r'bat(wo)?man')
mo23 = pattern23.search('the adventure of batman')
print(mo23.group())
>>>batman
#2.4 星号零次或多次
pattern24 = re.compile(r'(wo)*man')
mo24 = pattern24.search('this is batwowowowoman')
print(mo24.group())
>>>wowowowoman
#2.5 加号匹配一次或多次
pattern25 = re.compile(r'(wo)+man')
mo25 = pattern25.search('this is man')
print(mo25 == None)
>>>True
#2.6 花括号匹配制定次数
pattern26 = re.compile(r'(wo){3,5}')
mo26 = pattern26.search('this is wowowowowowman')
print(mo26.group())
>>>wowowowowo
3 贪心匹配
#3 贪心匹配:匹配尽可能唱的字符串 ;非贪心匹配:匹配尽可能短的 结束后面加?
pattern3 = re.compile(r'(wo){3,5}?')
mo3 = pattern3.search('wowowowowowman')
print(mo3.group())
>>>wowowo
4 findall()方法
#4.1 无分组 返回列表
pattern41 = re.compile(r'\d\d\d-\d\d\d\d')
mo41 = pattern41.findall('this is my num 123-1253,421-3412')
print(mo41)
#4.2 有分组 返回元组列表
pattern42 = re.compile(r'(\d\d\d)-(\d\d\d\d)')
mo42 = pattern42.findall('this is my num 123-1253,421-3412')
print(mo42)
>>>['123-1253', '421-3412']
>>>[('123', '1253'), ('421', '3412')]
5 字符分类
缩写字符分类 | 表示 |
\d | 0-9的任何数字 |
\D | 除0-9数字之外的任何字符 |
\w | 任何字母、数字、下划线(可认为是匹配单词) |
\W | 除字母、数字、下划线之外的字符 |
\s | 空格、制表符、换行符(可被认为是匹配‘空白’字符) |
\S | 除空格、制表符、换行符之外的任何字符 |
6 开始与结束
#6 开始符号^(匹配发生在文本开始) 和 结束符号$(匹配发生在文本结束)
pattern6 = re.compile('^hello')
mo6 = pattern6.findall('hello beautiful world')
print(mo6)
>>>['hello']
7 通配符
#7.1 (.*)匹配所有字符
pattern71 = re.compile(r'my name is (.*)')
mo71 = pattern71.findall('my name is david')
print(mo71)
#7.2 re.DOTALL匹配所有字符
pattern72 = re.compile('.*',re.DOTALL)
>>>['david']
8 匹配不分大小写
#8 不分大小写
pattern6 = re.compile(r'robot',re.I)
mo8 = pattern6.findall('is ROboT,ROBOT,roBOT')
print(mo8)
>>>['ROboT', 'ROBOT', 'roBOT']
9 sub()替换
#9 sub替换
pattern9 = re.compile('agent \w+')
mo9 = pattern9.sub('CIA','agent A is the supervisor of agent B')
print(mo9)
>>>CIA is the supervisor of CIA
2. 课后练习
强口令检测:至少8个字符;同时有大小写;至少一个数字
(相当麻烦了..感觉正则匹配的顺序是个很大的问题)
#!python3
#-*- coding: utf-8 -*-
# 2018/4/23 0023 14:47
import re
flag = False
while not flag:
password = input("请输入口令:")
length = len(password)
if length >= 8:
pattern1 = re.compile(r'[0-9]')
pattern2 = re.compile(r'[A-Z]')
pattern3 = re.compile(r'[a-z]')
test1 = pattern1.findall(password)
test2 = pattern2.findall(password)
test3 = pattern3.findall(password)
if (len(test2) >0 )& (len(test1)>0) &(len(test3)>0 ):
flag = True
print("该口令为强口令")
else:
print("格式不对")
else:
print("长度至少为8,请再次输入")
参考:
1. 正则 ?<= 和 ?= 用法 (?=exp) 匹配exp前面的位置