莫烦 Python 基础 Set&正则表达

我的视频学习笔记
Set

char_list = ['a', 'b', 'c', 'c', 'd', 'd', 'd']  # 通过set可以去除掉不同的东西
sentence = 'Welcome Back to This Tutorial'
print(set(char_list))  # {'b', 'd', 'c', 'a'}
print(type(set(char_list)))  # <class 'set'>
print(type({1: 2}))  # <class 'dict'>
print(set(sentence))  # {' ', 'h', 'k', 's', 'u', 't', 'B', 'e', 'a', 'r', 'T', 'i', 'l', 'o', 'W', 'c', 'm'}

unique_char = set(char_list)
# 添加
unique_char.add('x')  # {'b', 'c', 'x', 'd', 'a'}
print(set(unique_char))
unique_char.add('a')  # {'b', 'c', 'x', 'd', 'a'}  不能加list
print(set(unique_char))
# 删除
unique_char.remove('x')
print(set(unique_char))  # {'a', 'b', 'd', 'c'}
# unique_char.remove('y')  # 用remove删除本身没有的元素会报错
unique_char.discard('y')  # 用discard删除 如果本身没有 则返回原来的元素 {'d', 'a', 'c', 'b'} 不会报错
# 清空
unique_char.clear()
print(set(unique_char))  # 传回空的set  set()

对比两个set

unique_char = set(char_list)
unique_char.add('x')
# 对比两个set
set1 = unique_char
set2 = {'a', 'e', 'i'}
print(set1.difference(set2))  # 返回set1中有而set2中没有的部分 {'d', 'c', 'x', 'b'}
print(set1.intersection(set2))  # 返回set1和set2中共有的部分 {'a'}

正则表达式

import re  # 引入正则表达式的模块

# matching string
pattern1 = "cat"
pattern2 = "bird"
string = "dog runs to cat"
print(pattern1 in string)  # 判断字符串里是否有包含关键词 True
print(pattern2 in string)  # False

# regular expression
pattern1 = "cat"
pattern2 = "bird"
string = "dog runs to cat"
print(re.search(pattern1, string))  # <re.Match object; span=(12, 15), match='cat'>
print(re.search(pattern2, string))  # None

# multiple patterns {"run" or "ran"}
ptn = r"r[au]n"  # 中括号内两种检索元素 run ran
print(re.search(ptn, string))  # <re.Match object; span=(4, 7), match='run'>

# 匹配更多种可能
print(re.search(r"r[A-Z]n", "dog runs to cat"))  # None
print(re.search(r"r[a-z]n", "dog runs to cat"))  # <re.Match object; span=(4, 7), match='run'>
print(re.search(r"r[0-9]n", "dog r1ns to cat"))  # <re.Match object; span=(4, 7), match='r1n'>
print(re.search(r"r[0-9a-z]n", "dog runs to cat"))  # <re.Match object; span=(4, 7), match='run'>

# 特殊匹配方式
# 数字
# \d : decimal digit
print(re.search(r"r\dn", "run r4n"))  # /d 匹配所有数字形式 类似于 r[0-9]n <re.Match object; span=(4, 7), match='r4n'>
# \D : any non-decimal digit
print(re.search(r"r\Dn", "run r4n"))  # /D 匹配所有非数字形式 <re.Match object; span=(0, 3), match='run'>
# 空白
# \s : any white space [\t\n\r\f\v]
print(re.search(r"r\sn", "r\nn r4n"))  # /s 匹配所有空白形式 <re.Match object; span=(0, 3), match='r\nn'>
# \S : opposite to \s, any non-white space
print(re.search(r"r\Sn", "r\nn r4n"))  # /S 匹配所有非空白形式 <re.Match object; span=(4, 7), match='r4n'>
# 所有数字字母和_
# \w : [a-zA-Z0-9_]
print(re.search(r"r\wn", "r\nn r4n"))  # /w 匹配所有所有数字字母和_形式 <re.Match object; span=(4, 7), match='r4n'>
# \W : opposite to \w
print(re.search(r"r\Wn", "r\nn r4n"))  # /W 匹配所有非所有数字字母和_形式 <re.Match object; span=(0, 3), match='r\nn'>
# 空白字符
# \b : empty string [only at the start or end of the word]
print(re.search(r"\bruns\b", "dog runs to cat"))  # /b <re.Match object; span=(4, 8), match='runs'>
# \b "runs"贴着文字的空白格可以匹配
# \B : empty string [But only at the start or end of the word]
print(re.search(r"\B runs \B", "dog  runs  to cat"))  # /B <re.Match object; span=(4, 10), match=' runs '>
# \B " runs "前后均大于1个空白格就可以匹配
# 任意字符
# \\ : match \
print(re.search(r"runs\\", "runs\ to me"))  # \\ <re.Match object; span=(0, 5), match='runs\\'>
# . : match anything except \n  匹配除了空行的所有字符
print(re.search(r"r.n", "r[ns to me"))  # . <re.Match object; span=(0, 5), match='runs\\'>
# 句尾句首
# ^ : match line beginning
print(re.search(r"^dog", "dog  runs  to cat"))  # ^ <re.Match object; span=(0, 3), match='dog'>
# $ : match line ending
print(re.search(r"cat$", "dog  runs  to cat"))  # $ <re.Match object; span=(14, 17), match='cat'>
# 是否
# ? : may or may not occur  匹配括号里面有没有的都拿出来
print(re.search(r"Mon(day)?", "Monday"))  # ? <re.Match object; span=(0, 6), match='Monday'>
print(re.search(r"Mon(day)?", "Mon"))  # ? <re.Match object; span=(0, 3), match='Mon'>
# 多行匹配
# multi-line
string = """
dog runs to cat.
I run to dog.
"""
print(re.search(r"^I", string))  # None
print(re.search(r"^I", string, flags=re.M))  # <re.Match object; span=(18, 19), match='I'>
# re.M 把每一行都当作新的一句话都找出句尾句首
# 0次或多次
# * : occur 0 or more times
print(re.search(r"ab*", "a"))  # * 匹配0次b <re.Match object; span=(0, 1), match='a'>
print(re.search(r"ab*", "abbbb"))  # * 匹配多次b <re.Match object; span=(0, 5), match='abbbb'>
# 1次或多次
# + : occur 1 or more times
print(re.search(r"ab+", "a"))  # + 匹配0次b 返回的是None
print(re.search(r"ab+", "abbbb"))  # + 匹配多次b <re.Match object; span=(0, 5), match='abbbb'>
# 可选次数
# {n,m} : occur n to m times
print(re.search(r"ab{2,10}", "a"))  # 出现0次匹配不到 None
print(re.search(r"ab{2,10}", "abbbb"))  # 出现4次 可以匹配到 <re.Match object; span=(0, 5), match='abbbb'>
# group
match = re.search(r"(\d+), Date:(.+)", "ID: 021523, Date: Wed/03/2020")
print(match.group())  # 返回所有内容 021523, Date: Wed/03/2020
print(match.group(1))  # 返回第一个括号内的东西 021523
print(match.group(2))  # 返回第二个括号内的东西 Wed/03/2020
# 为了避免混淆很多组 可以加上名字 ?P<id> ?P<date>
match = re.search(r"(?P<id>\d+), Date:(?P<date>.+)", "ID: 021523, Date: Wed/03/2020")
print(match.group('id'))  # 021523
print(match.group('date'))  # Wed/03/2020
# 寻找所有匹配
# findall
print(re.findall(r"r[au]n", "ran run ren"))  # 返回所有符合的元素 ['ran', 'run']
# | : or
print(re.findall(r"run|ran", "ran run ren"))  # ['ran', 'run']
# 替换
# re.sub() replace
print(re.sub(r"r[au]ns", "catchs", "dog runs to cat"))  # 用后面的替换前面的 dog catchs to cat
# 分裂
# re.split()
print(re.split(r"[,;\.]", "a, n, b; c. t"))  # ['a', ' n', ' b', ' c', ' t'] \去除 . 对于任何东西的描述
# compile
compiled_re = re.compile(r"r[au]n")  # 先编译出来再拿去search
print(compiled_re.search("dog runs to cat"))  # <re.Match object; span=(4, 7), match='run'>
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值