CS50P Problem Sets7 | 问题集7

这节课讲价了正则表达式,正则表达式的内容挺多挺杂的,但python的强大之处就在于有人给我们造轮子了,有很多匹配规则其实是不用我们自己写的

NUMB3RS

检测Ipv4地址(以#.#.#.#的形式),取值范围在0到255之间,符合规则就返回True,否则返回False

 numb3rs.py

import re
import sys


def main():
    print(validate(input("IPv4 Address: ")))


def validate(ip):
    # 捕获后转换,如果不是整数类型返回false
    try:
        if numbers := re.search(r"^(.+)\.(.+)\.(.+)\.(.+)$", ip):
            num = numbers.groups()
            for i in num:
                if int(i) > 255:
                    return False
        # 如果俘获不到内容(不符合规范)
        else:
            return False
        # 都没有发生返回True,因为此时说明捕获到数字又不会大于255
        return True
        # 输入的是字符时
    except ValueError:
        return False


if __name__ == "__main__":
    main()

 test_numb3rs.py

from numb3rs import validate
import pytest

def test_num():
    assert validate("127.0.0.1")
    assert validate("255.255.255.255")
    assert validate("256.255.255.1") == False
    assert validate("255.256.255.1") == False


def test_str():
    assert validate("cat") == False
    assert validate("cat.set.la.kk") == False

if __name__ == "__main__":
    pytest.main()

 Watch on YouTube

 这是一个最低限度的链接http://youtube.com/embed/xvFZjo5PgG0

我们要做的是把含有http://www.youtube.com/embed/...提取成htpps://youtu.be/...,

也就是说http要替换成htpps,www去掉,youtube.com替换成youtu.be,去掉embed还有其/,

只保留htpps://youtu.be/...,...是xv号

watch.py 

import re
import sys

def main():
    print(parse(input("HTML: ")))

def parse(s):
    if url := re.search(r"(http|https)(://)(www\.|)(youtube\.com/embed/)(.+?)\"", s):

        uri = url.group(0)
        # print(uri)
        # 使用 \5 来引用第5个捕获组即 embed/ 之后的内容,保留第5个,替换之前的内容,最后的\"不在括号内不被捕获说明没有被返回
        uri = re.sub(r"(http|https)(://)(www\.|)(youtube\.com/embed/)(.+?)\"", r"https://youtu.be/\5", uri)
        return uri

if __name__ == "__main__":
    main()

Working 9 to 5 

9:00 AM to 5:00 PM => 09:00 to 17:00

把12小时变成24小时,并且补零,记得检验小时数和分钟数 

对于不符合规则的抛出ValueError异常

 working.py

import re
import sys


def main():
    print(convert(input("Hours: ").strip()))


def convert(s):
    # 关于捕获组:1是外面的括号,2是外面括号里面嵌套的小括号
    if time := re.search(r"^(([0-9]+)+?([:0-9]+)*) (AM|PM) to (([0-9]+)+?([:0-9]+)*) (PM|AM)$", s):

        #对于没有被捕获到的会返回None
        time_before_h = int(time.group(2))
        time_after_h = int(time.group(6))

        # 对于分钟和小时大于界限值抛出异常
        if time_before_h > 12 or time_after_h > 12:
            raise ValueError()
        elif time_before_h == 12:
            time_before_h = 0
        if time_after_h == 12:
            time_after_h = 0


        # 小时和分钟的获取得分开,如果有输入分钟就是group(3),没有的话就得判断group(3)是None
        b_m = time.group(3) if time.group(3) else ":00"
        # 去除:,然后转换为整数类型然后加12,没有的话return00
        a_m = time.group(7) if time.group(7) else ":00"

        if int(b_m.replace(":", "")) > 59 or int(a_m.replace(":", "")) > 59:
            raise ValueError()

        # 判断PM和AM
        if time.group(4) == "PM":
            time_before_h += 12
        if time.group(8) == "PM":
            time_after_h += 12
    else:
        # 对于没有to的情况是无法捕获到的,所以抛出ValueError异常
        raise ValueError()

    return f"{time_before_h:02}{b_m:02} to {time_after_h:02}{a_m:02}"
    # return time


if __name__ == "__main__":
    main()

test_working.py

from working import convert
import pytest

def test_just_h():
    assert convert("9 AM to 5 PM") == "09:00 to 17:00"
    assert convert("1 AM to 5 PM") == "01:00 to 17:00"

def test_h_m():
    assert convert("10:30 PM to 8:50 AM") == "22:30 to 08:50"
    assert convert("8:30 AM to 5:50 PM") == "08:30 to 17:50"

def test_pm_am():
    assert convert("8:30 AM to 11:50 AM") == "08:30 to 11:50"
    assert convert("2:30 PM to 6:50 PM") == "14:30 to 18:50"

# 对三种不符合规范的进行检测
def test_wrong_format():
    with pytest.raises(ValueError):
        convert("9 AM - 9 PM")


def test_wrong_minute():
    with pytest.raises(ValueError):
         convert("9:60 AM to 9:60 PM")


def test_wrong_hour():
    with pytest.raises(ValueError):
        convert("13 PM to 17 PM")

Regular, um, Expressions

um.py

import re
import sys


def main():
    print(count(input("Text: ")))


def count(s):
    # 首先获得正则表达式,捕获um单词出现的文本,再使用count()记录次数, \b是单词界限符
    if um := re.findall(r"\b(um)\b", s, re.IGNORECASE):
        # um_list = um.group(0)
        # join 是链接,''是连接的符号,(是连接的内容,join()是把有um的元组连接起来)
        joined_str = ''.join(''.join(sub_tuple) for sub_tuple in um)
        count = joined_str.lower().count('um')
        return count
    else:
        return 0

if __name__ == "__main__":
    main()

test_um.py

from um import count
import pytest

def test_alone_um():
    assert count("um") == 1
    assert count("text") == 0

def test_um_in_world():
    assert count("um... yes") == 1
    assert count("um... lump") == 1
    assert count("lum... lump") == 0

def test_other_subtitle():
    assert count("um?") == 1
    assert count("Yum?") == 0
    assert count("um... Ah um...") == 2

def test_upper_lower():
    assert count("UM") == 1
    assert count("Um") == 1
    assert count("uM") == 1

Response Validation

GitHub - python-validators/validators: Python Data Validation for Humans™.:

>>> import validators
>>> 
>>> validators.email('someone@example.com')
True

应该不需要说什么了吧

response.py 

import validators

def main():
    print(check_email(input("What's your email addres: ")))


def check_email(s):
    if validators.email(s):
        return "Valid"
    else:
        return "Invalid"

if __name__ == "__main__":
    main()

 

  • 15
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值