Python第六章（正则表达式，import re等等，字符串的相关操作）

爱莉希雅&&&

已于 2024-07-17 12:02:10 修改

阅读量705

点赞数 30

文章标签： python

于 2024-06-09 20:58:20 首次发布

本文链接：https://blog.csdn.net/2301_81704123/article/details/139179950

版权

格式化字符串

代码演示

#第一种使用占位符进行格式化
name="马冬梅"
age=18
sorce=98
print("name%s,age:%d,sorce:%f"%(name,age,sorce))#name马冬梅,age:18,sorce:98.000000
print("name%s,age:%d,sorce:%.2f"%(name,age,sorce))#name马冬梅,age:18,sorce:98.00
print()
#第二种 3.6之后f-string
print(f"name:{name},age:{age},sorce:{sorce}")  #name:马冬梅,age:18,sorce:98
#第三种使用字符串的format方法
print("name:{0},age:{1},sorce:{2}".format(name,age,sorce)) #name:马冬梅,age:18,sorce:98
print("name:{1},age:{2},sorce:{0}".format(sorce,name,age)) #name:马冬梅,age:18,sorce:98 {}中的数字为format中对应索引的位置

代码运行结果

name马冬梅,age:18,sorce:98.000000
name马冬梅,age:18,sorce:98.00

name:马冬梅,age:18,sorce:98
name:马冬梅,age:18,sorce:98
name:马冬梅,age:18,sorce:98

数据的验证

代码的演示及结果

#isdigit
print("123".isdigit())#True  isdigit只认阿拉伯数字
print("一二三".isdigit())#False
print("0b10111".isdigit())#False
print()
#isumeric
print("123".isnumeric())#True  isumeric还认识繁体的123，还有罗马数字....
print("一二三".isnumeric())#True
print("0b10111".isnumeric())#False
print()
#所有都是字母（包含中文字符）isalpha
print("hellow你好".isalpha()) #True
print("hellow你好1".isalpha())#False
print("hellow你好一二三".isalpha())#True  罗马数字不可以
print()
#所有都是字母和数字,基本都是可以
print("hellow你好".isalnum()) #True
print("hellow你好1".isalnum())#Ture
print("hellow你好一二三".isalnum())#True
print()
#判断字母大小写
print("Hellow".islower())#False  判断小写
print("hellow你好".islower())#True   中文既是大写也是小写
print("HELLOW你好".isupper())#True   判断大写
print()
#判断首字母是不是大写
print("Hellow".istitle()) #True
print("HelloWorld".istitle())#False 不是首字母大写也会false
print("Hellowworld你好一二三".istitle())#True 在这文字是判断为小写
print("Hellow world".istitle())#False
print("Hellow World".istitle()) #True
print()
#判断是否为空白字符
print("\t".isspace()) #True
print("\n".isspace()) #True
print("\a".isspace())#False
print(" ".isspace()) #True

字符串的编码和解码

代码演示：

s="伟大的中国梦"
#编码str--->bytes
scode=s.encode(errors="replace")  #utf-8一个中文占3个字节
print(scode)
scode_gbk=s.encode("gbk",errors="replace") #GBK一个中文占2个字节
print(scode_gbk)
#编码的出错处理
s2="爱❤"
scode2=s2.encode("gbk",errors="ignore") #b'\xb0\xae'编译不了的忽略
print(scode2)
# s2="爱❤"
# scode2=s2.encode("gbk",errors="strict") can't encode character 严格的编译不了直接报错
# print(scode2)
s2="爱❤"
scode2=s2.encode("gbk",errors="replace") #b'\xb0\xae?' 编译不了用？代替
print(scode2)
#解码 bytes--->str
print(bytes.decode(scode_gbk,"gbk"))#伟大的中国梦  用什么编就用什么解
print(bytes.decode(scode,"utf-8")) #伟大的中国梦

代码结果

b'\xe4\xbc\x9f\xe5\xa4\xa7\xe7\x9a\x84\xe4\xb8\xad\xe5\x9b\xbd\xe6\xa2\xa6'
b'\xce\xb0\xb4\xf3\xb5\xc4\xd6\xd0\xb9\xfa\xc3\xce'
b'\xb0\xae'
b'\xb0\xae?'
伟大的中国梦
伟大的中国梦

字符串的相关处理

s1="Hellow World"
print(s1)
s2=s1.lower() #全部小写
print(s2)
s3=s1.upper() #全部大写
print(s3)
e_mail="dyh@123.com"
lst=e_mail.split("@")  #分隔符
print("邮箱名：",lst[0],"服务器域名：",lst[1])
print(s1.count("o"))#o出现的次数
print(s1.find("o"))#o首次出现的位置
print(s1.find("p"))#-1  表示没有
print(s1.index("o"))#跟find的效果相似，但是不存在即没有找到会报错
#判断前缀和后缀
print("demo.py".endswith("py"))#True
print("demo.txt".endswith(".txt"))#True
print(s1.startswith("h"))#False
print(s1.startswith("H"))#Ture

字符串的相关处理2

s1="hellowworld"
print(s1)
s2=s1.replace("o","你好")#前面是旧的字符串，后面是新的字符串
print(s2)   #hell你好ww你好rld
s3=s1.replace("o","你好",1)#可以控制替换的次数
print(s3)    #hell你好wworld
#使字符串在指定的宽度范围内居中
print(s1.center(20,"*"))#****hellowworld*****  第一个参数是宽度，第二个是填充的元素
#去掉字符串左右的空格
s4="    hellow  world    "
print(s4.strip())  #去掉左右的空格
print(s4.lstrip()) #去掉左边的空格
print(s4.rstrip()) #去掉右边的空格
#去掉左右指定参数
print(s1.strip("he"))  #llowworld
print(s1.strip("hellow"))  #rld  与顺序无关，ow wo 都会去掉
s5="lddl123dl"
print(s5.strip("ld"))  #123
print(s5.lstrip("ld"))#123dl
print(s5.rstrip("ld"))#lddl123

代码演示：

hellowworld
hell你好ww你好rld
hell你好wworld
****hellowworld*****
hellow world
hellow world
hellow world
llowworld
rld
123
123dl
lddl123

字符串的拼接

s1="hellow"
s2="world"
#第一种方法 +
print(s1+s2)
#第二种，使用join
print("".join([s1,s2])) #使用空字符串拼接，可以指定
print("*".join([s1,s2]))
#第三 直接拼接
print("hellow""world")
#第四 格式化
print("%s%s"%(s1,s2))
print(f"{s1}{s2}")
print("{0}{1}".format(s1,s2))

代码结果

hellowworld
hellowworld
hellow*world
hellowworld
hellowworld
hellowworld
hellowworld

字符串的去重操作

s="hellowworldhellowworldffgghhjj"
#(1)
new_s=""
for item in s:
    if item not in new_s:
        new_s+=item  #拼接
print(new_s)
print()
#（2）通过索引
new_s2=""
for i in range(len(s)):
    if s[i] not in new_s2:
        new_s2+=s[i]
print(new_s2)
print()
#(3)通过集合+列表的排序
new_s3=set(s)
print(new_s3)
lst=list(new_s3)
lst.sort(key=s.index)
print("".join(lst))

代码结果

helowrdfgj

{'o', 'f', 'w', 'l', 'd', 'e', 'h', 'j', 'r', 'g'}
helowrdfgj

findall方法的使用

import re
pattern="\d\.\d+"#+为限定符出现一次到多次，\d表示数字0-9, 指定规则(一个小数的形式，出现整数不符合)
s="I study python3.11 every day python2.7 i love you" #待匹配字符串
lst=re.findall(pattern,s)#输出结果是列表
print(lst)#findall搜索第全部匹配的值

代码结果

['3.11', '2.7']

format的格式控制

s='hellowworld'
print("{0:*<20}".format(s))#hellowworld********* 宽度为20，左对齐，不够的位置用*填充
print("{0:*>20}".format(s))#*********hellowworld    右对齐
print("{0:*^20}".format(s))#****hellowworld*****    居中
#千位分隔符只用于整数和分数
print("{0:,}".format(88888888))
print("{0:,}".format(8888888.99998))
#浮点数小数部分的精度
print("{0:.2f}".format(8888.22222))
#字符串类型，表示最大的显示长度
print("{0:.2}".format("hellow"))  #he
#整数类型
print("二进制：{0:b}，十进制：{0:d},八进制：{0:o},十六进制：{0:x}".format(123))
#浮点数类型
print("{0:.2f},{0:e},{0:E},{0:.2%}".format(3.1415926))  #3.14,3.141593e+00,3.141593E+00,314.16%

代码结果
hellowworld*********
*********hellowworld
****hellowworld*****
88,888,888
8,888,888.99998
8888.22
he
二进制：1111011，十进制：123,八进制：173,十六进制：7b
3.14,3.141593e+00,3.141593E+00,314.16%

match方法的使用

import re
pattern="\d\.\d+"#+为限定符出现一次到多次，第一个\d表示数字0-9  指定规则
s="I study python every day" #待匹配字符串
match=re.match(pattern,s,re.I) #re.I表示忽略大小写
print(match)  #None
s2="3.11 python I study every day"  #匹配开头，所以这个可以上面那个不行
match2=re.match(pattern,s2)
print(match2) #<re.Match object; span=(0, 4), match='3.11'>
print("匹配值的开始位置：",match2.start())
print("匹配值的结束位置：",match2.end())
print("匹配区间的位置元素：",match2.span())
print("待匹配的字符串：",match2.string)
print("匹配的数据：",match2.group())

代码结果

None
<re.Match object; span=(0, 4), match='3.11'>
匹配值的开始位置： 0
匹配值的结束位置： 4
匹配区间的位置元素： (0, 4)
待匹配的字符串： 3.11 python I study every day
匹配的数据： 3.11

search方法的使用

import re
pattern="\d\.\d+"#+为限定符出现一次到多次，\d表示数字0-9, 指定规则(一个小数的形式，出现整数不符合)
s="I study python3.11 every day python2.7 i love you" #待匹配字符串
match=re.search(pattern,s)
print(match)#search搜索第一个匹配的值
print(match.group())

代码结果

<re.Match object; span=(14, 18), match='3.11'>
3.11

sub和spilt的使用

import re
pattern="破解|黑客"
s="我想学习黑客去破解别人的密码"
new_s=re.sub(pattern,"**",s)  #检查字符串，有规定的词用*替代
print(new_s)
print()
s2="https://mbd.baidu.com/newspage/data/landingsuper?context=%7B%22nid%22%3A%22"
pattern="[?|%]"
lst=re.split(pattern,s2)
print(lst)

代码结果

我想学习**去**别人的密码

['https://mbd.baidu.com/newspage/data/landingsuper', 'context=', '7B', '22nid', '22', '3A', '22']