class match_singe_character(object):
def digital(self):
re_digital1 = re.match(r"速度与激情\d\d", "速度与激情98")
re_digital2 = re.match(r"速度与激情[12345678]", "速度与激情8")
re_digital3 = re.match(r"速度与激情[1-8]", "速度与激情8")
re_digital4 = re.match(r"速度与激情[1-35-8]", "速度与激情5")
re_no_digital = re.match(r"速度与激情\D", "速度与激情h")
print(re_digital1.group())
print(re_digital2.group())
print(re_digital3.group())
print(re_digital4.group())
print(re_no_digital.group())
def word(self):
re_word1 = re.match(r"速度与激情\w", "速度与激情aA8_")
re_word2 = re.match(r"速度与激情[a-z]", "速度与激情b")
re_word3 = re.match(r"速度与激情[A-Z]", "速度与激情Z")
re_no_word = re.match(r"速度与激情\W", '速度与激情。')
print(re_word1.group())
print(re_word2.group())
print(re_word3.group())
print(re_no_word.group())
def space(self):
re_space1 = re.match(r"速度与激情\s", "速度与激情 1")
re_no_space = re.match(r"速度与激情\S", "速度与激情啊")
re_any_character = re.match(r"速度与激情.", "速度与激情¥")
print(re_space1.group())
print(re_no_space.group())
print(re_any_character.group())
class match_multiple_characters(object):
def digital(self):
re_digital1 = re.match(r"123\d{1,3}","1234567")
re_digital2 = re.match(r"\d{3}-?\d{8}","02112345678")
re_digital3 = re.match(r"\d*","")
re_digital4 = re.match(r"\d+","21212")
print(re_digital1.group())
print(re_digital2.group())
print(re_digital3.group())
print(re_digital4.group())
# match默认从头匹配, ^匹配开头, $匹配结尾, \w会匹配到中文
def Check_if_the_variable_is_valid():
variables = ['sdf_', 'dsf df ', '1_2df']
for variable in variables:
re_variable = re.match(r"^[a-zA-Z_][a-zA-Z_0-9]*$", variable)
if re_variable:
print("variable: %s, re_variable: %s NO problem" % (variable, re_variable.group()))
else:
print("variable: %s has a problem" %variable)
def Check_if_the_email_is_valid():
emails = ['dsff@163.com', 'sdfdfdsf@163.comcom ', 'asfs@163acom','fdsf333@gmail.com']
for email in emails:
re_email = re.match(r"^([a-zA-Z_0-9]{4,20})@(163|gmail)\.com$", email)
if re_email:
print("email: %s, re_email: %s NO problem" % (email, re_email.group()))
print('%s 是 %s邮箱' % (email, re_email.group(2)))
else:
print("email: %s has a problem" %email)
def Check_the_html():
html_str = "<h1>dsfsdfdsf</h1>"
re_html1 = re.match(r"<(\w*)>.*</\1>", html_str)
re_html2 = re.match(r"<(?P<p1>\w*)>.*</(?P=p1)>", html_str)
print(re_html1.group())
print(re_html2.group())
def search_findall_sub_split():
str = "电话110,电话119"
re_search = re.search(r"\d+", str)
re_findall = re.findall(r"\d+", str)
re_sub = re.sub(r"\d+",'250', str)
re_sub_add = re.sub(r"\d+", sub_add, str)
re_split = re.split(r":| ","name:fuck you")
print(re_search.group())
print(re_findall)
print(re_sub, str)
print(re_sub_add, str)
print(re_split)
#sub支持替代和函数调用
def sub_add(temp):
str_num = temp.group()
num = int(str_num) + 1
return str(num)
def html_clear():
with open('content.html','r') as f:
content = f.read()
re_content = re.sub(r"<p> -|<.*>", '', content)
print(re_content)