这节课讲价了正则表达式,正则表达式的内容挺多挺杂的,但python的强大之处就在于有人给我们造轮子了,有很多匹配规则其实是不用我们自己写的
NUMB3RS
检测Ipv4地址(以#.#.#.#的形式),取值范围在0到255之间,符合规则就返回True,否则返回False
numb3rs.py
import re
import sys
def main():
print(validate(input("IPv4 Address: ")))
def validate(ip):
# 捕获后转换,如果不是整数类型返回false
try:
if numbers := re.search(r"^(.+)\.(.+)\.(.+)\.(.+)$", ip):
num = numbers.groups()
for i in num:
if int(i) > 255:
return False
# 如果俘获不到内容(不符合规范)
else:
return False
# 都没有发生返回True,因为此时说明捕获到数字又不会大于255
return True
# 输入的是字符时
except ValueError:
return False
if __name__ == "__main__":
main()
test_numb3rs.py
from numb3rs import validate
import pytest
def test_num():
assert validate("127.0.0.1")
assert validate("255.255.255.255")
assert validate("256.255.255.1") == False
assert validate("255.256.255.1") == False
def test_str():
assert validate("cat") == False
assert validate("cat.set.la.kk") == False
if __name__ == "__main__":
pytest.main()
Watch on YouTube
这是一个最低限度的链接http://youtube.com/embed/xvFZjo5PgG0
我们要做的是把含有http://www.youtube.com/embed/...提取成htpps://youtu.be/...,
也就是说http要替换成htpps,www去掉,youtube.com替换成youtu.be,去掉embed还有其/,
只保留htpps://youtu.be/...,...是xv号
watch.py
import re
import sys
def main():
print(parse(input("HTML: ")))
def parse(s):
if url := re.search(r"(http|https)(://)(www\.|)(youtube\.com/embed/)(.+?)\"", s):
uri = url.group(0)
# print(uri)
# 使用 \5 来引用第5个捕获组即 embed/ 之后的内容,保留第5个,替换之前的内容,最后的\"不在括号内不被捕获说明没有被返回
uri = re.sub(r"(http|https)(://)(www\.|)(youtube\.com/embed/)(.+?)\"", r"https://youtu.be/\5", uri)
return uri
if __name__ == "__main__":
main()
Working 9 to 5
9:00 AM to 5:00 PM => 09:00 to 17:00
把12小时变成24小时,并且补零,记得检验小时数和分钟数
对于不符合规则的抛出ValueError异常
working.py
import re
import sys
def main():
print(convert(input("Hours: ").strip()))
def convert(s):
# 关于捕获组:1是外面的括号,2是外面括号里面嵌套的小括号
if time := re.search(r"^(([0-9]+)+?([:0-9]+)*) (AM|PM) to (([0-9]+)+?([:0-9]+)*) (PM|AM)$", s):
#对于没有被捕获到的会返回None
time_before_h = int(time.group(2))
time_after_h = int(time.group(6))
# 对于分钟和小时大于界限值抛出异常
if time_before_h > 12 or time_after_h > 12:
raise ValueError()
elif time_before_h == 12:
time_before_h = 0
if time_after_h == 12:
time_after_h = 0
# 小时和分钟的获取得分开,如果有输入分钟就是group(3),没有的话就得判断group(3)是None
b_m = time.group(3) if time.group(3) else ":00"
# 去除:,然后转换为整数类型然后加12,没有的话return00
a_m = time.group(7) if time.group(7) else ":00"
if int(b_m.replace(":", "")) > 59 or int(a_m.replace(":", "")) > 59:
raise ValueError()
# 判断PM和AM
if time.group(4) == "PM":
time_before_h += 12
if time.group(8) == "PM":
time_after_h += 12
else:
# 对于没有to的情况是无法捕获到的,所以抛出ValueError异常
raise ValueError()
return f"{time_before_h:02}{b_m:02} to {time_after_h:02}{a_m:02}"
# return time
if __name__ == "__main__":
main()
test_working.py
from working import convert
import pytest
def test_just_h():
assert convert("9 AM to 5 PM") == "09:00 to 17:00"
assert convert("1 AM to 5 PM") == "01:00 to 17:00"
def test_h_m():
assert convert("10:30 PM to 8:50 AM") == "22:30 to 08:50"
assert convert("8:30 AM to 5:50 PM") == "08:30 to 17:50"
def test_pm_am():
assert convert("8:30 AM to 11:50 AM") == "08:30 to 11:50"
assert convert("2:30 PM to 6:50 PM") == "14:30 to 18:50"
# 对三种不符合规范的进行检测
def test_wrong_format():
with pytest.raises(ValueError):
convert("9 AM - 9 PM")
def test_wrong_minute():
with pytest.raises(ValueError):
convert("9:60 AM to 9:60 PM")
def test_wrong_hour():
with pytest.raises(ValueError):
convert("13 PM to 17 PM")
Regular, um, Expressions
um.py
import re
import sys
def main():
print(count(input("Text: ")))
def count(s):
# 首先获得正则表达式,捕获um单词出现的文本,再使用count()记录次数, \b是单词界限符
if um := re.findall(r"\b(um)\b", s, re.IGNORECASE):
# um_list = um.group(0)
# join 是链接,''是连接的符号,(是连接的内容,join()是把有um的元组连接起来)
joined_str = ''.join(''.join(sub_tuple) for sub_tuple in um)
count = joined_str.lower().count('um')
return count
else:
return 0
if __name__ == "__main__":
main()
test_um.py
from um import count
import pytest
def test_alone_um():
assert count("um") == 1
assert count("text") == 0
def test_um_in_world():
assert count("um... yes") == 1
assert count("um... lump") == 1
assert count("lum... lump") == 0
def test_other_subtitle():
assert count("um?") == 1
assert count("Yum?") == 0
assert count("um... Ah um...") == 2
def test_upper_lower():
assert count("UM") == 1
assert count("Um") == 1
assert count("uM") == 1
Response Validation
GitHub - python-validators/validators: Python Data Validation for Humans™.:
>>> import validators
>>>
>>> validators.email('someone@example.com')
True应该不需要说什么了吧
response.py
import validators
def main():
print(check_email(input("What's your email addres: ")))
def check_email(s):
if validators.email(s):
return "Valid"
else:
return "Invalid"
if __name__ == "__main__":
main()