正则表达式_不死者之王ftp-CSDN博客

本文链接：https://blog.csdn.net/qq_42281053/article/details/80705076

1. 正则表达式表示单个字符

# coding:utf-8
import re

# 1.匹配某个字符串
# text = "china America Korea England"
# ret = re.match("ch", text)
# print (ret.group())

# 2.点“.”：匹配任意的字符，但不能匹配换行符"\n"
# text = "china America Korea England"
# ret = re.match(".", text)
# print (ret.group())

# 3."\d": 匹配任意的数字（0~9）
# text = "china America Korea England"
# ret = re.match("\d", text)
# print (ret.group())

# 4."\D": 匹配任意的非数字
# text = "china America Korea England"
# ret = re.match("\D", text)
# print (ret.group())

# 5."\s": 匹配空白字符(\n,\t,\r,空白)
# text = "china America Korea England"
# ret = re.match("\s", text)
# print (ret.group())

# 6."\w": 匹配a-z、A-Z、数字、下划线
# text = "china America Korea England"
# ret = re.match("\w", text)
# print (ret.group())

# 7."\W": 匹配和"\w"相反
# text = "china America Korea England"
# ret = re.match("\W", text)
# print (ret.group())

# 8.[]组合方式，只要满足中括号中的字符，就可以匹配
# text = "010-98654223532"
# ret = re.match("[\d\-]+", text)
# print (ret.group())

# 8.1.[]形式代替\d
# text = "w010-98654223532"
# # ret = re.match("[^0-9]", text)
# # print (ret.group())

# 8.2.[]形式代替\D
# text = "w010-98654223532"
# ret = re.match("[^0-9]", text)
# print (ret.group())

# 8.3.[]形式代替\w
# text = "w010-98654223532"
# ret = re.match("[a-zA-Z0-9_]", text)
# print (ret.group())

# 8.4.[]形式代替\W
text = "+w010-98654223532"
ret = re.match("[^a-zA-Z0-9_]", text)
print (ret.group())

2.匹配多个字符

# coding:utf-8
import re
# 1."*"匹配0个或者任意多个字符
# text = "010-98654223532"
# ret = re.match("\d*", text)
# print (ret.group())

# 2."+"匹配1个或者任意多个字符
# text = "010-98654223532"
# ret = re.match("\d+", text)
# print (ret.group())

# 3."?"匹配1个或者0个,表示可有可无：比如"#？"，表示可以有#号，也可以没有#号)
# text = "010-98654223532"
# ret = re.match("\d?", text)
# print (ret.group())

# 4."{m}"匹配m个
# text = "010-98654223532"
# ret = re.match("\d{3}", text)
# print (ret.group())

# 5."{m,n}"匹配m~n个
text = "01012156-98654223532"
ret = re.match("\d{3,5}", text)
print (ret.group())

3.正则表达式的案例练习

# coding:utf-8
import re

# 1.验证手机号码：首位，第二位（34578）,后面9位数字
# text = "15812369500"
# ret = re.match("1[34578]\d{9}", text)
# print ret.group()

# 2.验证邮箱：任意字符，@,任意,.com
# text = "hugj2014@163.com"
# ret = re.match("\w+@[a-z0-9]+\.[a-z]+", text)
# print ret.group()

# 3.验证url: http://或者https://,
# text = "http://www.baudu.com"
# ret = re.match("(http|https|fttp)://[^\s]+", text)
# print ret.group()

# 4.验证身份证：18位，17位数字+1位
text = "41256319985201247X"
ret = re.match("[\d]{17}(x|X)", text)
print ret.group()

4.特殊符号的意义

# coding:utf-8
import re

# 1.^：表示以什么开始
# text = "41256319985201247X"
# ret = re.search("^4", text)
# print ret.group()

# 2.$：表示以什么结尾
# text = "41256319985201@163.com"
# ret = re.match("\w+@163.com$", text)
# print ret.group()

# 3.|：表示匹配多个表达式或字符串
# text = "http15346546424346465435486431"
# ret = re.search("^(http|https)\w+", text)
# print ret.group()

# 4.贪婪模式: + :尽量匹配更多字符
# text = "0123456"
# ret = re.search("\d+", text)
# print ret.group()

# 5.非贪婪模式: +? :尽量匹配更少的字符
text = "0123456"
ret = re.search("\d+?", text)
print ret.group()

5.特殊符号案例

# coding:utf-8
import re

# 1.匹配1~100之间的一个数字
# 有三种情况、可以出现、不可以出现
text = "102"
ret = re.search("[1-9]\d?$|100$", text)
print ret.group()

6.python中的正则表达式：re库

# encoding:utf-8
import re

# 1.分组:group(1)、group(1, 2)、groups()
# text = "banana apple$256 pee watermaland $125"
# res = re.search(".*(\$\d+).*(\$\d+)", text)
# print res.group(1)
# print res.group(1, 2)
# print res.groups()

# 2.re.findall()返回的是列表
# text = "banana apple$256 pee watermaland $125"
# ret = re.findall("\$\d+", text)
# print ret
# print ret[0]

# 3.sub()函数：替换
# text = "banana apple$256 pee watermaland $125"
# ret = re.sub("\$\d+", "0", text)
# print ret

# 4.sub()函数：替换
# html = """
#   <script type='text/javascript'>
#     var _vwo_code=(function(){
#       var account_id=249272,
#           settings_tolerance=2000,
#           library_tolerance=2500,
#           use_existing_jquery=false,
#           // DO NOT EDIT BELOW THIS LINE
#           f=false,d=document;return{use_existing_jquery:function(){return use_existing_jquery;},library_tolerance:function(){return library_tolerance;},finish:function(){if(!f){f=true;var a=d.getElementById('_vis_opt_path_hides');if(a)a.parentNode.removeChild(a);}},finished:function(){return f;},load:function(a){var b=d.createElement('script');b.src=a;b.type='text/javascript';b.innerText;b.οnerrοr=function(){_vwo_code.finish();};d.getElementsByTagName('head')[0].appendChild(b);},init:function(){settings_timer=setTimeout('_vwo_code.finish()',settings_tolerance);var a=d.createElement('style'),b='body{opacity:0 !important;filter:alpha(opacity=0) !important;background:none !important;}',h=d.getElementsByTagName('head')[0];a.setAttribute('id','_vis_opt_path_hides');a.setAttribute('type','text/css');if(a.styleSheet)a.styleSheet.cssText=b;else a.appendChild(d.createTextNode(b));h.appendChild(a);this.load('//dev.visualwebsiteoptimizer.com/j.php?a='+account_id+'&u='+encodeURIComponent(d.URL)+'&r='+Math.random());return settings_timer;}};}());_vwo_settings_timer=_vwo_code.init();
#   </script>
# """
# # 注意使用非贪婪模式“？”
# ret = re.sub("<.+?>", "", html)
# print ret

# 5.split()函数分隔
# text = "banana /apple$256 pee wat/ermaland $125"
# ret = re.split("\$|/", text)
# print ret

# 6.compile()函数：对于经常要用到的正则表达式，可用compile进行编译，后期直接使用，提高效率
# text = "The number is 12.05"
# # +:1个或多个、？0个或1个、*0个或任意个
# com = re.compile("\d+\.?\d*")
# ret = re.search(com, text)
# print ret.group()

# 7.compile()函数：用re.VERBOSE关键字，进行注释
text = "The number is 12.05"
# +:1个或多个、？0个或1个、*0个或任意个
com = re.compile(r"""
    \d+ # 小数点前面的数字，至少有1个
    \.? # 小数点，可有可无，最多1个
    \d* # 小数点后面的数字，可有可无，可以有很多个
""", re.VERBOSE)
ret = re.search(com, text)
print ret.group()