一、正则表达式库的引用
在python中,你要使用正则表达式,需要引用程序库re。
import re
二、正则表达式函数说明:
match =》尝试在字符串的开头运用模式,返回一个match对象,如果没有匹配则返回None
fullmatch =》尝试对整个字符串运用模式,返回一个match对象,如果没有匹配或部分匹配则返回None
search =》 扫描整个字符串来匹配模式,返回一个match对象,如果没有找到匹配则返回None
sub =》 找到模式匹配的字符串并替换掉。返回替换了的字符串
subn =》找到模式匹配的字符串并替换掉。 返回替换了的字符串和匹配的次数
split =》根据模式匹配来分割字符串,返回子字符串列表,列表中可能包含空的字符串。
findall =》 查找所有非重叠的匹配,返回的是一个组列表
三、简单模式的使用
if m
is
not
None:
#food of the sea里面开头匹配foo: foo
print(
"food of the sea里面开头匹配foo:",m.group())
print(
"")
print(
"匹配多个字符串符号|")
pattern =
"bet|bat|bit"
str =
"bat"
m = re.match(pattern,
str)
if m
is
not
None:
#bat里面开头匹配bet|bat|bit = bat
print(
"%s里面开头匹配%s = %s"%(
str, pattern, m.group()))
m = re.match(pattern,
"blt")
if m
is
None:
#bat里面匹配bet|bat|bit = None
print(
"%s里面开头匹配%s = %s" % (
str, pattern,
"None"))
print(
"")
print(
".匹配任何单个的字符,除了\\n")
pattern =
".end"
str =
"tend"
m = re.match(pattern,
"tend")
if m
is
not
None:
#tend里面开头匹配.end = tend
print(
"%s里面开头匹配%s = %s"%(
str, pattern, m.group()))
str =
"end"
m = re.match(pattern,
str)
if m
is
None:
#end里面开头匹配.end = None
print(
"%s里面开头匹配%s = %s" % (
str, pattern,
"None"))
str =
"\nend"
m = re.match(pattern,
str)
if m
is
None:
#\nend里面开头匹配.end = None
print(
"%s里面开头匹配%s = %s" % (
str, pattern,
"None"))
print(
".不能匹配\\n")
print(
"")
print(
"创建字符串类,[]")
pattern =
"[cr][23][dp][t2]"
str =
"c3pt"
m = re.match(pattern,
str)
if m
is
not
None:
#c3pt里面开头匹配[cr][23][dp][t2] = c3pt
print(
"%s里面开头匹配%s = %s" % (
str, pattern, m.group()))
str =
"c2d2"
m = re.match(pattern,
str)
if m
is
not
None:
#c2d2里面开头匹配[cr][23][dp][t2] = c2d2
print(
"%s里面开头匹配%s = %s" % (
str, pattern, m.group()))
str =
"c2dd"
m = re.match(pattern,
str)
if m
is
None:
#c2dd里面开头匹配[cr][23][dp][t2] = None
print(
"%s里面开头匹配%s = %s" % (
str, pattern,
"None"))
print(
"")
print(
"匹配特殊字符")
pattern =
"\w+@(\w+\.)?\w+\.com"
str =
"jacky@xxx.com"
m = re.match(pattern,
str)
if m
is
not
None:
#jacky@xxx.com里面开头匹配\w+@(\w+\.)?\w+\.com = jacky@xxx.com
print(
"%s里面开头匹配%s = %s" % (
str, pattern, m.group()))
str =
"jacky@apex.tw.com"
m = re.match(pattern,
str)
if m
is
not
None:
#jacky@apex.tw.com里面开头匹配\w+@(\w+\.)?\w+\.com = jacky@apex.tw.com
print(
"%s里面开头匹配%s = %s" % (
str, pattern, m.group()))
str =
"jacky@apex.dd.tw.com"
m = re.match(pattern,
str)
if m
is
None:
#jacky@apex.dd.tw.com里面匹配\w+@(\w+\.)?\w+\.com = None
print(
"%s里面开头匹配%s = %s" % (
str, pattern,
"None"))
四、正则表达式函数的使用
print(
"")
print(
"re模式匹配函数测试")
print(
"测试fullmatch方法")
pattern =
".end"
str =
"this is the end of film"
m = re.fullmatch(pattern,
str)
if m
is
None:
#this is the end of film里面整体匹配.end = None
print(
"%s里面整体匹配%s = %s"%(
str, pattern,
"None"))
str =
"tend"
m = re.fullmatch(pattern,
str)
if m
is
not
None:
#tend里面整体匹配.end = tend
print(
"%s里面整体匹配%s = %s" % (
str, pattern, m.group()))
print(
"")
print(
"测试search方法")
str =
"this is the dend of film"
m = re.search(pattern,
str)
if m
is
not
None:
#this is the dend of film里面搜索匹配.end = dend
print(
"%s里面搜索匹配%s = %s" % (
str, pattern, m.group()))
str =
"this is the nd of film"
m = re.search(pattern,
str)
if m
is
None:
#this is the nd of film里面搜索匹配.end = None
print(
"%s里面搜索匹配%s = %s" % (
str, pattern,
"None"))
print(
"")
print(
"测试sub方法")
pattern =
"[aoe]"
str =
"mark the amoment"
m = re.sub(pattern,
"o",
str)
#mark the amoment被模式[aoe]替换后的字符串mork tho omomont
print(
"%s被模式%s替换后的字符串%s"%(
str, pattern, m))
str =
"pip pip pug"
m = re.sub(pattern,
"o",
str)
#pip pip pug被模式[aoe]替换后的字符串pip pip pug
print(
"%s被模式%s替换后的字符串%s"%(
str, pattern, m))
print(
"")
print(
"测试subn方法")
pattern =
"[aoe]"
str =
"mark the amoment"
m = re.subn(pattern,
"o",
str)
#mark the amoment被模式[aoe]替换后的字符串('mork tho omomont', 5)
print(
"%s被模式%s替换后的字符串%s"%(
str, pattern, m))
str =
"pip pip pug"
m = re.subn(pattern,
"o",
str)
#pip pip pug被模式[aoe]替换后的字符串('pip pip pug', 0)
print(
"%s被模式%s替换后的字符串%s"%(
str, pattern, m))
print(
"")
print(
"测试split方法")
pattern =
"of|the"
str =
"the book of the school, the nineth."
m = re.split(pattern,
str)
#the book of the school, the nineth.被模式of|the分割后的列表['', ' book ', ' ', ' school, ', ' nineth.']
print(
"%s被模式%s分割后的列表%s"%(
str, pattern, m))
print(
"")
print(
"测试findall方法")
pattern =
"of|the"
str =
"the book of the school, the nineth."
m = re.findall(pattern,
str)
#the book of the school, the nineth.被模式of|the分割后的列表['the', 'of', 'the', 'the']
print(
"%s被模式%s找到的字符串列表%s"%(
str, pattern, m))