import re
"""
lst = re.findall(正则表达式,要匹配的字符串)
返回列表,按照正则表达式匹配到的内容都扔到列表里
"""
lst = re.findall("\d","asdlfkjasdklfjkla_()()*(*(23423sdf4")
print(lst)
lst = re.findall("\D","asdlfkjasdklfjkla_()()*(*(23423sdf4")
print(lst)
lst = re.findall("\w","sdf&&&s9%%89!!!_你好")
print(lst)
lst = re.findall("\W","sdf&&&s9%%89!!!_你好")
print(lst)
lst = re.findall("\s"," ")
print(lst)
lst = re.findall("\S" , " aa bb")
print(lst)
strvar = """
xboy 他是一个 神秘男孩
"""
lst = re.findall(r"\n",strvar)
print(lst)
strvar = """
xboy 他是一个 神秘 男孩
"""
lst = re.findall(r"\t",strvar)
print(lst)
lst = re.findall("[abc]","oputuopctyauobpt")
print(lst)
print(re.findall('a[abc]b','aab abb acb adb'))
print(re.findall('a[0123456789]b','a1b a2b a3b acb ayb'))
print(re.findall('a[0-9]b','a1b a2b a3b acb ayb'))
print(re.findall('a[abcdefg]b','a1b a2b a3b acb ayb adb'))
print(re.findall('a[a-g]b','a1b a2b a3b acb ayb adb'))
print(re.findall('a[ABCDEFG]b','a1b a2b a3b aAb aDb aYb'))
print(re.findall('a[A-G]b','a1b a2b a3b aAb aDb aYb'))
print(re.findall('a[0-9a-zA-Z]b','a-b aab aAb aWb aqba1b'))
print(re.findall('a[0-z]b','a@b aab aAb aWb aqba1b'))
print(re.findall('a[0-9][*#/]b','a1/b a2b a29b a56b a456b'))
print(re.findall('a[^-+*/]b',"a%b ccaa*bda&bd"))
lst = re.findall(r"e[\^\-]f","e^f e-f")
print(lst)
lst = re.findall(r"a\\c","a\c")
print(lst)
print(lst[0])
"""
res = re.findall(r'a\\b',r'a\b')
res = re.findall(r'a\\n',r'a\n')
print(res)
print(res[0])
# \b -> 转义字符 backspace
# a[\^\\]b a^b a\b 这个出来的怎么只有a^b
res = re.findall(r"a[\^\\]b",r"a^b a\b")
print(res) # ['a^b', 'a\\b']
print(res[1])
"""
```python
import re
'''1) ? 匹配0个或者1个a '''
print(re.findall('a?b','abbzab abb aab'))
'''2) + 匹配1个或者多个a '''
print(re.findall('a+b','b ab aaaaaab abb'))
'''3) * 匹配0个或者多个a '''
print(re.findall('a*b','b ab aaaaaab abbbbbbb'))
'''4) {m,n} 匹配m个至n个a '''
print(re.findall('a{1,3}b','aaab ab aab abbb aaz aabb'))
print(re.findall('a{2}b','aaab ab aab abbb aaz aabb'))
print(re.findall('a{2,}b','aaab ab aab abbb aaz aabb'))
"""
贪婪模式 与 非贪婪模式:
贪婪模式 : 默认向更多次匹配,底层用的是回溯算法
非贪婪模式: 默认向更少次匹配,用一个?号来进行修饰(修饰在量词的身后)
回溯算法: 从左向右进行匹配,一直到最后,直接最后再也匹配不到了,回头,寻找最后一个
. 匹配任意字符,除了换行符\n
"""
strvar = "刘能和刘老根和刘铁棍子777子888"
lst = re.findall("刘.",strvar)
print(lst)
lst = re.findall("刘.?",strvar)
print(lst)
lst = re.findall("刘.+",strvar)
print(lst)
lst = re.findall("刘.*",strvar)
print(lst)
lst = re.findall("刘.{1,21}",strvar)
print(lst)
lst = re.findall("刘.*子",strvar)
print(lst)
lst = re.findall("刘.??",strvar)
print(lst)
lst = re.findall("刘.+?",strvar)
print(lst)
lst = re.findall("刘.*?",strvar)
print(lst)
lst = re.findall("刘.{1,21}?",strvar)
print(lst)
lst = re.findall("刘.*?子",strvar)
print(lst)
"""
\b backspace 本身就是一个转义字符
边界符 卡单词 word
卡住左边界 \bw
卡住右边界 d\b
"""
strvar = "word pwd scf"
lst = re.findall(r".*d\b",strvar)
print(lst)
lst = re.findall(r".*?d\b",strvar)
print(lst)
lst = re.findall(r"\bw",strvar)
print(lst)
lst = re.findall(r"\bw.*?",strvar)
print(lst)
lst = re.findall(r"\bw.*? ",strvar)
print(lst)
lst = re.findall(r"\bw\S*",strvar)
print(lst)
"""
^ 必须以..开头
$ 必须以..结尾
如果出现了^ $ , 要把这个字符串看成一个整体
"""
strvar = "大哥大嫂大爷"
print(re.findall('大.',strvar))
print(re.findall('^大.',strvar))
print(re.findall('大.$',strvar))
print(re.findall('^大.$',strvar))
print(re.findall('^大.*?$',strvar))
print(re.findall('^大.*?大$',strvar))
print(re.findall('^大.*?爷$',strvar))
print(re.findall('^g.*? ' , 'giveme 1gfive gay'))
print(re.findall('five$' , 'aassfive'))
print(re.findall('^giveme$' , 'giveme'))
print(re.findall('^giveme$' , 'giveme giveme'))
print(re.findall('giveme' , 'giveme giveme'))
print(re.findall("^g.*e",'giveme 1gfive gay'))
import re
strvar = "<div>明天就放假了,很开心</div>"
lst = re.findall(r"<(.*?)>(.*?)<(.*?)>",strvar)
print(lst)
lst = re.findall(r"<(.*?)>(.*?)<(/\1)>",strvar)
print(lst)
strvar = "a1b2cab"
obj = re.search(r"(.*?)\d(.*?)\d(.*?)\1\2",strvar)
print(obj)
res = obj.group()
print(res)
res = obj.groups()
print(res)
"""
# 2.命名分组 (给小组命名)
3) (?P<组名>正则表达式) 给这个组起一个名字
4) (?P=组名) 引用之前组的名字,把该组名匹配到的内容放到当前位置
"""
strvar = "a1b2cab"
obj = re.search(r"(?P<tag1>.*?)\d(?P<tag2>.*?)\d(?P<tag3>.*?)\1\2",strvar)
print(obj.group())
strvar = "a1b2cab"
obj = re.search(r"(?P<tag1>.*?)\d(?P<tag2>.*?)\d(?P<tag3>.*?)(?P=tag1)(?P=tag2)",strvar)
print(obj.group())
import re
print(re.findall('.*?_good','wusir_good alex_good secret男_good'))
print(re.findall('(.*?)_good','wusir_good alex_good secret男_good'))
print(re.findall('(?:.*?)_good','wusir_good alex_good secret男_good'))
strvar = "abcddd"
lst = re.findall("a|b",strvar)
print(lst)
"""
为了避免优先匹配前面的字符串,导致字符串匹配不完整,
把较难匹配到的字符串写在前面,容易匹配到的字符串放在后面
"""
strvar = "abc24234234ddabcd234234"
lst = re.findall("abcd|abc",strvar)
print(lst)
"""
. 除了\n,能够匹配到任意字符
\ 功效: 让有意义的字符变得无意义,或者让无意义的字符变得有意义
\. 让点原来的特殊含义失效,只是单纯的表达一个点字符.
"""
strvar = "5.33 3.13 34 34. .98 9.99 sdfsdf ......"
lst = re.findall(r"\d+\.\d+",strvar)
print(lst)
lst = re.findall(r"\d+\.\d+|\d+",strvar)
print(lst)
"""
findall 这个函数优先显示括号里面的内容,
如果不想显示括号内容,使用?:, 显示实际匹配到的内容
"""
lst = re.findall(r"\d+(?:\.\d+)?",strvar)
print(lst)
strvar = "asdfasd234 13591199444 17188886666 19145547744"
lst = re.findall("(?:135|171)\d{8}",strvar)
print(lst)
strvar = "13591199444"
lst = re.findall("^(?:135|171)[0-9]{8}$",strvar)
print(lst)
"""
findall 把所有匹配到的字符串都搜出来,返回列表
不能把分组内容和匹配的内容同时显示出来
search 只要搜索到一个结果就返回,返回对象.
可以把分组内容和匹配的内容同时显示出来
group : 对象.group() 直接获取匹配到的内容
groups: 对象.groups() 直接获取分组里面的内容
"""
strvar = "www.baidu.com"
lst = re.findall("(?:www)\.(?:baidu|oldboy)\.(?:com)",strvar)
print(lst)
obj = re.search("(www)\.(baidu|oldboy)\.(com)",strvar)
print(obj)
print(obj.group())
print(obj.groups())
print(obj.group(1))
print(obj.group(2))
print(obj.group(3))
"""search 在匹配不到时,返回的是None,无法使用group"""
strvar = "5*6-7/3"
obj = re.search(r"\d+[*/]\d+",strvar)
print(obj)
strvar1 = obj.group()
print(strvar1)
n1,n2 = strvar1.split("*")
res1 = int(n1)*int(n2)
print(res1)
strvar2 = strvar.replace(strvar1,str(res1))
print(strvar2)
obj = re.search(r"\d+[*/]\d+",strvar2)
strvar3 = obj.group()
print(strvar3)
n1,n2 = strvar3.split("/")
res2 = int(n1)/int(n2)
print(res2)
strvar4 = strvar2.replace(strvar3,str(res2))
print(strvar4)
n1,n2 = strvar4.split("-")
res_finally = float(n1) - float(n2)
print(res_finally)
import re
strvar = "1+2 3*4"
obj = re.search("\d+(.*?)\d+",strvar)
print(obj.group())
res = obj.groups()
print(res)
"""search 只要在正则表达式的前面加上^ 就和match 一模一样"""
strvar = "y13566688771"
strvar = "13566688771"
obj = re.search("^\d+",strvar)
print(obj.group())
obj = re.match("\d+",strvar)
print(obj.group())
strvar = "alex|xboy&wusir-egon"
res = re.split("[|&-]",strvar)
print(res)
strvar = "alex23423411122334xboyww90909045wusir78"
res = re.split("\d+",strvar)
print(res)
strvar = "alex|xboy&wusir-egon"
res = re.sub("[|&-]","%",strvar)
print(res)
res = re.sub("[|&-]","%",strvar,1)
print(res)
res = re.subn("[|&-]","%",strvar)
print(res)
res = re.subn("[|&-]","%",strvar,1)
print(res)
from collections import Iterator
strvar = "sdfasf234234你好23423"
it = re.finditer(r"\d+",strvar)
print(isinstance(it,Iterator))
"""
lst = list(it)
print(lst)
[
<_sre.SRE_Match object; span=(6, 12), match='234234'>,
<_sre.SRE_Match object; span=(14, 19), match='23423'>
]
234234
23423
"""
for i in it:
print(i.group())
"""
正常情况下,正则表达式执行一次,编译一次
如果需要反复使用,会浪费系统资源,比如内存,cpu
compile 可以使正则编译一次,终身受益,无需反复编译
"""
strvar = "sdfasf234234你好23423"
pattern = re.compile("\d+")
print(pattern)
obj = pattern.search(strvar)
print(obj.group())
lst = pattern.findall(strvar)
print(lst)
strvar = "<h1>123</H1>"
pattern = re.compile("<h1>(.*?)</h1>",flags=re.I)
obj = pattern.search(strvar)
print(obj)
print(obj.group())
strvar = """<h1>123</H1>
<p>123</p>
<div>123</div>
"""
pattern = re.compile("^<.*?>(?:.*?)<.*?>$",flags=re.M)
print(pattern.findall(strvar))
strvar = """give
1112233mefive
"""
pattern = re.compile("(.*?)mefive",flags=re.S)
obj = pattern.search(strvar)
print(obj.group())
"""
give
1112233mefive
"""
import re
strvar = "1-2*((60-30+(-40/5)*(9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))"
print(eval(strvar))
def sign_exp(strvar):
strvar = strvar.replace("+-","-")
strvar = strvar.replace("--","+")
strvar = strvar.replace("-+","-")
strvar = strvar.replace("++","+")
return strvar
def calc_exp(strvar):
if "*" in strvar:
a,b = strvar.split("*")
return float(a) * float(b)
if "/" in strvar:
a,b = strvar.split("/")
return float(a) / float(b)
def opreate_exp(strvar):
while True:
obj = re.search(r"\d+(\.\d+)?[*/][+-]?\d+(\.\d+)?",strvar)
if obj:
res1 = obj.group()
res2 = calc_exp(res1)
strvar = strvar.replace(res1,str(res2))
else:
break
strvar = sign_exp(strvar)
lst = re.findall("[+-]?\d+(?:\.\d+)?",strvar)
total = 0
for i in lst:
total += float(i)
return total
def remove_bracket(strvar):
while True:
obj = re.search("\([^()]+\)",strvar)
if obj:
res1 = obj.group()
res2 = opreate_exp(res1)
strvar = strvar.replace(res1,str(res2))
else:
return strvar
def main(strvar):
strvar = strvar.replace(" ","")
strvar = remove_bracket(strvar)
return opreate_exp(strvar)
res = main(strvar)
print(res)