在本章我们来一起了解下Python中的正则表达式的使用方式
一、re.match的函数原型为:re.match(pattern, string, flags) 只匹配字符串的开头
# -*- coding: gb18030 -*-
#例1:
import re
text = "china,Hello"
m = re.match("china", text)
if m is not None:
print m.group()
else:
print 'not match'
#输出 => china
#例2:
pattn = ".end"
m = re.match(pattn," end")
if m is not None:
print 'result:' + m.group()
else:
print 'not match'
#输出 => result: end
二、re.search re.search(pattern, string, flags) 从左到右开始匹配字符串
# -*- coding: gb18030 -*-
import re
text = "Hello china"
m = re.search("china", text)
if m is not None:
print 'result:' + m.group()
else:
print 'not search'
# 输出=> result:china
三、搜索并替换 re.sub() 或re.subn()
# -*- coding: gb18030 -*-
import re
print re.sub("B","I","CHBNA") #输出=> CHINA
print re.subn("B","I","CHBNA") #输出=>('CHINA',1),1表示替换次数
四、split
# -*- coding: gb18030 -*-
import re
print re.split(",","a,b,c") #=>输出['a', 'b', 'c']
print "a,b,c".split(",") #=>输出['a', 'b', 'c']
print re.split("\s\w{2}","1 aa2 bb3") #=>输出['1', '2', '3']
五、综合
# -*- coding: gb18030 -*-
import re
pattn = "^[ab]\w{3,4}\d+" #匹配以a或b开头,后面3或4个字母,再接1个或多个数字
li = ["aabb123","bbaba456","bcdefghi789","abce"]
for i in range(0,len(li)):
m = re.search(pattn,li[i])
if m is not None:
print 'result:' + m.group()
else:
print 'not match'
'''
输出如下结果=>:
result:aabb123
result:bbaba456
not match
not match
'''