Python课程第十一章正则表达式习题详解
1、匹配一行文字中的所有开头的字母内容
>>> import re
>>> re.match(r"\D+?","Abc")
>>> re.match(r"\D+?","Abc").group()
'A'
>>> re.match(r"\D+","A12345")
>>> re.match(r"\D+","A12345").goup()
Traceback (most recent call last):
File "", line 1, in
AttributeError: '_sre.SRE_Match' object has no attribute 'goup'
>>> re.match(r"\D+","A12345").group()
'A'
2、匹配一行文字中的所有开头的数字内容
>>> re.match(r"\d+?","1234567")
>>> re.match(r"\d+?","1234567").group()
'1'
>>> re.match(r"\d+","1234567").group()
'1234567'
>>> re.match(r"\d+","1ascdfgh1234").group()
'1'
>>>
3、匹配一行文字中的所有开头的数字内容或数字内容
4、 只匹配包含字母和数字的行
>>> re.search(r'\w+[a-zA-Z0-9]',"acvbghj12345*").group()
'acvbghj12345'
>>> re.search(r'\w[a-zA-Z0-9]+',"acvbghj12345*").group()
'acvbghj12345'
5、写一个正则表达式,使其能同时识别下面所有的字符串:'bat',
'bit', 'but', 'hat', 'hit', 'hut‘
>>> re.match(r"\b[bh][aiu]t","bat bit but hat hit hut")
>>> re.match(r"\b[bh][aiu]t","bat bit but hat hit hut").group()
'bat'
>>> re.findall(r"\b[bh][aiu]t","bat bit but hat hit hut")
['bat', 'bit', 'but', 'hat', 'hit', 'hut']
>>>
6、匹配所有合法的python标识符
>>> pattern='[a-zA-Z_][\w_]+'
7、提取每行中完整的年月日和时间字段
>>> test_datetime = u'他的生日是2016-12-12 14:34,是个可爱的小宝贝.二宝的生日
16-12-21 11:34,好可爱的.'
>>> pattern=re.compile(r'\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}\:\d{1,2}')
>>> search=pattern.search(test_datetime)
>>> search=pattern.search(test_datetime)
>>> print search.group()
2016-12-12 14:34
>>> match=pattern.match(test_datetime)
>>> print match.group()
Traceback (most recent call last):
File "", line 1, in
AttributeError: 'NoneType' object has no attribute 'group'
>>> findall=pattern.findall(test_datetime)
>>> print findall
[u'2016-12-12 14:34', u'2016-12-21 11:34']
>>> for datetime in findall:
... print datetime
...
2016-12-12 14:34
2016-12-21 11:34
>>>>>> a="123yuye2345"
>>> pattern=re.compile(r'[0-9].*')
Traceback (most recent call last):
File "", line 1, in
NameError: name 're' is not defined
>>> import re
>>> a="123yuye2345"
>>> pattern=re.compile(r'[0-9].*')
>>> result=pattern.findall(a)
>>> print result
['123yuye2345']
>>> result=pattern.search(a)
>>> print result
>>> print result.group()
123yuye2345
>>> result=pattern.match(a)
>>> print result
>>> print result.group()
123yuye2345
>>>
8、将每行中的电子邮件地址替换为你自己的电子邮件地址
9、匹配\home关键字:
进阶练习
1、使用正则提取出字符串中的单词
>>> a="qwer qwer sdf cvvb"
>>> pattern='[a-zA-Z]+'
>>> re.findall(pattern,a)
['qwer', 'qwer', 'sdf', 'cvvb']
2、使用正则表达式匹配合法的邮件地址:
>>> add = 'https://www.net.com.hp.ibm.edu'
>>> pattern=re.compile(r'((w{3}\.)(\w+\.)+(com|edu|cn|net))')
>>> result=pattern.findall(add)
>>> print result
[('www.net.com.hp.ibm.edu', 'www.', 'ibm.', 'edu')]
>>> re.findall(r'((w{3}\.)(\w+\.)+(com|edu|cn|net))',add)
[('www.net.com.hp.ibm.edu', 'www.', 'ibm.', 'edu')]
3、国际域名格式如下:
域名由各国文字的特定字符集、英文字母、数字及“-”(即连字符或减号)任意组
合而成, 但开头及结尾均不能含有“-”,“-”不能连续出现。域名中字母不分大
小写。域名最长可达60个字节(包括后缀.com、.net、.org等)。
4、提取字符串中合法的超链接地址
比如:s = '光荣之路官网'
要求,给出的正则表达式能兼顾所有链接地址。
>>> s='光荣之路官网'
>>> matchResult=re.search(r'(.*)',s)
>>> matchResult.group()
'\xb9\xe2\xc8\xd9\xd6\xae\xc2\xb7\xb9\xd9\xcd\
xf8'
>>> matchResult.group(1)
'\xb9\xe2\xc8\xd9\xd6\xae\xc2\xb7\xb9\xd9\xcd\xf8'
>>> matchResult=re.search(r'(.*)',s).group(1)
>>> print group(1)
Traceback (most recent call last):
File "", line 1, in
NameError: name 'group' is not defined
>>> print matchResult
光荣之路官网
>>> print matchResult.group()
Traceback (most recent call last):
File "", line 1, in
AttributeError: 'str' object has no attribute 'group'
>>> s='光荣之路官网'
>>> matchResult=re.search(r'(.*)',s)
>>> print matchResult.group()
>>>
5、统计文件中单词个数
6、写一个函数,其中用正则验证密码的强度
#coding=utf-8
import re
def checklen(pwd):
return len(pwd)>=8
def checkContainUpper(pwd):
pattern=re.compile('[A-Z]+')
match=pattern.findall(pwd)
def checkContainUpper(pwd):
pattern=re.compile('[A-Z]+')
match=pattern.findall(pwd)
if match:
return True
else:
return False
def checkContainNum(pwd):
pattern=re.compile('[0-9]+')
match=pattern.findall(pwd)
if match:
return True
else:
return False
def checkContainLower(pwd):
pattern=re.compile('[a-z]+')
match=pattern.findall(pwd)
if match:
return True
else:
return False
def checkSymbol(pwd):
pattern=re.compile('[^a-z0-9A-Z]+')
match=pattern.findall(pwd)
if match:
return True
else:
return False
def checkPassword(pwd):
lenOK=checklen(pwd)
upperOK=checkContainUpper(pwd)
lowerOK=checkContainLower(pwd)
numOK=checkContainNum(pwd)
symbolOK=checkSymbol(pwd)
return(lenOK and upperOK and lowerOK and numOK and symbolOK)
def main():
if checkPassword("Helloworld#123_hkklu&678"):
print(u'检测通过')
else:
print (u'检测未通过')
if __name__=='__main__':
main()
7、匹配ip的正则表达式:
r'^(([1-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.){3}([1-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])$'
>>> res = re.search(r"ip='(?P([1-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.){3}([1
-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]).*",s)
>>> print res.group('ip')
168.