整体文章目录
一、 当前章节目录
二、常见的字符串操作
2.1 字符串的格式化
# 格式化字符串
str1 = "version"
num = 1.0
format = "%s" % str1
print(format)
format = "%s %d" % (str1, num)
print(format)
运行结果:
version
version 1
# 带精度的格式化
print("浮点型数字: %f" % 1.25) # 以浮点数格式打印
print("浮点型数字: %.1f" % 1.25) # 精确到小数点后1位
print("浮点型数字: %.2f" % 1.254) # 精确到小数点后2位
运行结果:
浮点型数字: 1.250000
浮点型数字: 1.2
浮点型数字: 1.25
# 使用字典格式化字符串
print("%(version)s: %(num).1f" % {"version": "version", "num": 2})
运行结果:
version: 2.0
# 字符串对齐
word = "version3.0"
print(word.center(20))
print(word.center(20, "*"))
print(word.ljust(0))
print(word.rjust(20))
print("%30s" % word)
运行结果:
2.2 字符串的转义字符
# 输出转义字符
path = "hello\tworld\n"
print(path)
print(len(path))
path = r"hello\tworld\n"
print(path)
print(len(path))
运行结果:
# strip()去掉转义字符
word = "\thello world\n"
print("直接输出:", word)
print("strip()后输出:", word.strip())
print("lstrip()后输出:", word.lstrip())
print("rstrip()后输出:", word.rstrip())
运行结果:
2.3 字符串的合并
# 使用"+"连接字符串
str1 = "hello "
str2 = "world "
str3 = "hello "
str4 = "China "
result = str1 + str2 +str3
result += str4
print(result)
运行结果:
hello world hello China
# 使用join()连接字符串
strs = ["hello ", "world ", "hello ", "China "]
result = "".join(strs)
print(result)
运行结果:
hello world hello China
# 使用reduce()连接字符串
from functools import reduce
import operator
strs2 = ["hello ", "world ", "hello ", "China "]
result = reduce(operator.add, strs2, "")
print(result)
运行结果:
hello world hello China
2.4 字符串的截取
# 使用索引截取子串
word = "world"
print(word[4])
运行结果:
d
# 使用特殊切片截取子串
str1 = "hello world"
print(str1[0:3])
print(str1[::2])
print(str1[1::2])
运行结果:
hel
hlowrd
el ol
# 使用split()获取子串
sentence = "Bob said: 1, 2, 3, 4"
print("使用空格获取子串:", sentence.split())
print("使用逗号获取子串:", sentence.split(","))
print("使用两个逗号获取子串:", sentence.split(",", 2))
运行结果:
使用空格获取子串: [‘Bob’, ‘said:’, ‘1,’, ‘2,’, ‘3,’, ‘4’]
使用逗号获取子串: [‘Bob said: 1’, ’ 2’, ’ 3’, ’ 4’]
使用两个逗号获取子串: [‘Bob said: 1’, ’ 2’, ’ 3, 4’]
str1 = "a"
print(id(str1))
print(id(str1 + "b"))
运行结果:
2716228415920
2716234269488
2.5 字符串的比较
# 字符串的比较
str1 = 1
str2 = "1"
if str1 == str2:
print("相同")
else:
print("不相同")
if str(str1) == str2:
print("相同")
else:
print("不相同")
运行结果:
不相同
相同
# 比较字符串的开始和结束处
word = "hello world"
print("hello" == word[0:5])
print(word.startswith("hello"))
print(word.endswith("ld", 6))
print(word.endswith("ld", 6, 10))
print(word.endswith("ld", 6, len(word)))
运行结果:
True
True
True
False
True
2.6 字符串的反转
# 循环输出反转的字符串
def reverse(s):
out = ""
li = list(s)
for i in range(len(li), 0, -1):
out += "".join(li[i-1])
return out
print(reverse("hello"))
运行结果:
olleh
# 使用list的reverse()
def reverse2(s):
li = list(s)
li.reverse()
s = "".join(li)
return s
print(reverse2("hello"))
运行结果:
olleh
def reverse3(s):
return s[::-1]
print(reverse3("hello"))
运行结果:
olleh
2.7 字符串的查找和替换
# 查找字符串
sentence = "This is a apple."
print(sentence.find("a"))
sentence = "This is a apple."
print(sentence.rfind("a"))
运行结果:
8
10
# 字符串的替换
sentence = "hello world, hello China"
print(sentence.replace("hello", "hi"))
print(sentence.replace("hello", "hi", 1))
print(sentence.replace("abc", "hi"))
运行结果:
hi world, hi China
hi world, hello China
hello world, hello China
2.8 字符串与日期的转换
import time,datetime # 导入time、datetime模块
# 时间到字符串的转化
print(time.strftime("%Y-%m-%d %X", time.localtime()))
# 字符串到时间的转换
t = time.strptime("2019-10-08", "%Y-%m-%d")
y, m, d = t[0:3]
print(datetime.datetime(y, m, d))
运行结果:
2021-06-11 17:24:05
2019-10-08 00:00:00
三、正则表达式应用
3.1 正则表达式简介
3.2 使用re模块处理正则表达式
import re # 导入re模块
# ^与$的使用
s = "HELLO WORLD"
print(re.findall(r"^hello", s))
print(re.findall(r"^hello", s, re.I))
print(re.findall("WORLD$", s))
print(re.findall(r"wORld$", s, re.I))
print(re.findall(r"\b\w+\b", s))
运行结果:
[]
[‘HELLO’]
[‘WORLD’]
[‘WORLD’]
[‘HELLO’, ‘WORLD’]
import re # 导入re模块
s = "hello world"
print(re.sub("hello", "hi", s))
print(re.sub("hello", "hi", s[-4:]))
print(re.sub("world", "China", s[-5:]))
运行结果:
hi world
orld
China
import re # 导入re模块
# 特殊字符的使用
s = "你好 WORLD2"
print("匹配字母、数字、下划线、汉字字符:" + re.sub(r"\w", "hi", s))
print("替换次数:" + str(re.subn(r"\w", "hi", s)[1]))
print("匹配非字母、数字、下划线、汉字的字符:" + re.sub(r"\W", "hi", s))
print("替换次数:" + str(re.subn(r"\W", "hi", s)[1]))
print("匹配空白字符:" + re.sub(r"\s", "*", s))
print("替换次数:" + str(re.subn(r"\s", "*", s)[1]))
print("匹配非空白字符:" + re.sub(r"\S", "hi", s))
print("替换次数:" + str(re.subn(r"\S", "hi", s)[1]))
print("匹配数字:" + re.sub(r"\d", "2.0", s))
print("替换次数:" + str(re.subn(r"\d", "2.0", s)[1]))
print("匹配非数字:" + re.sub(r"\D", "hi", s))
print("替换次数:" + str(re.subn(r"\D", "hi", s)[1]))
print("匹配任意字符:" + re.sub(r".", "hi", s))
print("替换次数:" + str(re.subn(r".", "hi", s)[1]))
运行结果:
匹配字母、数字、下划线、汉字字符:hihi hihihihihihi
替换次数:8
匹配非字母、数字、下划线、汉字的字符:你好hiWORLD2
替换次数:1
匹配空白字符:你好*WORLD2
替换次数:1
匹配非空白字符:hihi hihihihihihi
替换次数:8
匹配数字:你好 WORLD2.0
替换次数:1
匹配非数字:hihihihihihihihi2
替换次数:8
匹配任意字符:hihihihihihihihihi
替换次数:9
import re # 导入re模块
# 限定符的使用
tell = "0791-1234567"
print(re.findall(r"\d{3}-\d{8}|\d{4}-\d{7}", tell))
tel2 = "010-12345678"
print(re.findall(r"\d{3}-\d{8}|\d{4}-\d{7}", tel2))
tel3 = "(010)12345678"
print(re.findall(r"[\(]?\d{3}[\) -]?\d{8}|[\(]?\d{4}[\) -]?\d{7}", tel3))
运行结果:
[‘0791-1234567’]
[‘010-12345678’]
[’(010)12345678’]
import re # 导入re模块
# compile()预编译
s = "1abc23def45"
p = re.compile(r"\d+")
print(p.findall(s))
print(p.pattern)
运行结果:
[‘1’, ‘23’, ‘45’]
\d+
import re # 导入re模块
# 分组
p = re.compile(r"(abc)\1")
m = p.match("abcabcabc")
print(m.group(0))
print(m.group(1))
print(m.group())
p = re.compile(r"(?P<one>abc)(?P=one)")
m = p.search("abcabcabc")
print(m.group("one"))
print(m.groupdict().keys())
print(m.groupdict().values())
print(m.re.pattern)
运行结果:
四、习题
习题:
- 存在字符串“I,love,python”,取出love并输出。
- 存在字符串“aabbccddee”,将dd替换为ff。
- 存在字符串“ab2b3n5n2n67mm4n2”,编程实现下面要求:
(1) 使用re取出字符串中所有的数字,并组合成一个新的字符串输出。
(2) 统计字符串中字母n出现的次数。
(3) 统计每个字符出现的次数,使用字典输出,如{‘a’:1,’b’:2}.
答案:
- 代码如下:
import re
str = "I,love,python"
print(re.sub(r"love", "", str))
运行结果:
I,python
- 代码如下:
import re
str = "aabbccddee"
print(re.sub(r"dd", "ff", str))
运行结果:
aabbccffee
- 代码如下:
import re
st = "ab2b3n5n2n67mm4n2"
print(re.sub(r"\d", "", st))
print(str(re.subn(r"n", "", st)[1]))
while len(st) != 0:
print(st[0], " : ", str(re.subn(st[0], "", st)[1]))
st = re.sub(st[0], "", st)
运行结果:
abbnnnmmn
4
a : 1
b : 2
2 : 3
3 : 1
n : 4
5 : 1
6 : 1
7 : 1
m : 2
4 : 1