目录
首先,导入正则表达式模块:
import re
1.简单的匹配
pattern = r'\d+' # 匹配一个或多个数字
text = "There are 123 apples"
match = re.search(pattern, text)
print(match.group()) # 输出: 123
2.匹配所有出现
matches = re.findall(pattern, text)
print(matches) # 输出: ['123']
3.替换文本
new_text = re.sub(pattern, '456', text)
print(new_text) # 输出: There are 456 apples
4.拆分字符串
split_text = re.split(r'\s+', text)
print(split_text) # 输出: ['There', 'are', '123', 'apples']
5.从字符串开头匹配
match = re.match(r'There', text)
print(match.group()) # 输出: There
6.使用组
pattern = r'(\d+)\s+apples'
match = re.search(pattern, text)
print(match.group(1)) # 输出: 123
7.非贪婪匹配
pattern = r'<.*?>'
html = "<div><span>Test</span></div>"
match = re.search(pattern, html)
print(match.group()) # 输出: <div>
8.匹配数字
pattern = r'\d+'
numbers = re.findall(pattern, "There are 3 cats and 4 dogs")
print(numbers) # 输出: ['3', '4']
9.匹配单词边界
pattern = r'\bcat\b'
text = "The cat is on the catwalk"
match = re.search(pattern, text)
print(match.group()) # 输出: cat
10.忽略大小写匹配
pattern = r'cat'
text = "The Cat is on the catwalk"
matches = re.findall(pattern, text, re.IGNORECASE)
print(matches) # 输出: ['Cat', 'cat']
11.匹配多行文本
pattern = r'^cat'
text = "cat\nDog\ncat"
matches = re.findall(pattern, text, re.MULTILINE)
print(matches) # 输出: ['cat', 'cat']
12.替换匹配项的函数
def replace_function(match):
return str(int(match.group()) * 2)
pattern = r'\d+'
text = "There are 3 cats and 4 dogs"
new_text = re.sub(pattern, replace_function, text)
print(new_text) # 输出: There are 6 cats and 8 dogs
13.匹配任意字符
pattern = r'c.t'
text = "cat cut cot"
matches = re.findall(pattern, text)
print(matches) # 输出: ['cat', 'cut', 'cot']
14.匹配可选项
pattern = r'colou?r'
text = "color colour"
matches = re.findall(pattern, text)
print(matches) # 输出: ['color', 'colour']
15.匹配前导空白字符
pattern = r'^\s+'
text = " leading spaces"
match = re.search(pattern, text)
print(match.group()) # 输出: (空白字符)
16.匹配结尾空白字符
pattern = r'\s+$'
text = "trailing spaces "
match = re.search(pattern, text)
print(match.group()) # 输出: (空白字符)
17.匹配开头和结尾
pattern = r'^hello$'
text = "hello"
match = re.match(pattern, text)
print(bool(match)) # 输出: True
18.匹配单个字符
pattern = r'h.llo'
text = "hello hallo hxllo"
matches = re.findall(pattern, text)
print(matches) # 输出: ['hello', 'hallo', 'hxllo']
19.匹配重复出现的字符
pattern = r'\d{2,4}'
text = "123 12345 1234"
matches = re.findall(pattern, text)
print(matches) # 输出: ['123', '1234']
20.分组和命名组
pattern = r'(?P<first_name>\w+) (?P<last_name>\w+)'
text = "John Doe"
match = re.search(pattern, text)
print(match.group('first_name')) # 输出: John
print(match.group('last_name')) # 输出: Doe
21.断言匹配
pattern = r'cat(?=walk)'
text = "The catwalk"
match = re.search(pattern, text)
print(match.group()) # 输出: cat
22.负断言匹配
pattern = r'cat(?!walk)'
text = "The cat is on the catwalk"
matches = re.findall(pattern, text)
print(matches) # 输出: ['cat']
23.预搜索
pattern = r'(?<=The )\w+'
text = "The cat in the hat"
matches = re.findall(pattern, text)
print(matches) # 输出: ['cat']
24.负预搜索
pattern = r'(?<!The )\b\w+'
text = "The cat in the hat"
matches = re.findall(pattern, text)
print(matches) # 输出: ['cat', 'in', 'the', 'hat']
25.查找所有字符
pattern = r'.'
text = "Hello"
matches = re.findall(pattern, text)
print(matches) # 输出: ['H', 'e', 'l', 'l', 'o']
26.非数字字符
pattern = r'\D+'
text = "123abc456def"
matches = re.findall(pattern, text)
print(matches) # 输出: ['abc', 'def']
27.非单词字符
pattern = r'\W+'
text = "Hello, world!"
matches = re.findall(pattern, text)
print(matches) # 输出: [', ', '!']
28.非空白字符
pattern = r'\S+'
text = "Hello world"
matches = re.findall(pattern, text)
print(matches) # 输出: ['Hello', 'world']
29.查找字符串中的位置
pattern = r'cat'
text = "The cat is on the catwalk"
match = re.search(pattern, text)
print(match.start()) # 输出: 4
print(match.end()) # 输出: 7
30.查找所有匹配的位置
pattern = r'cat'
text = "The cat is on the catwalk"
matches = re.finditer(pattern, text)
for match in matches:
print(match.start(), match.end()) # 输出: (4, 7) 和 (19, 22)
31.反向查找
pattern = r'\d+(?=\D*$)'
text = "abc123def456"
match = re.search(pattern, text)
print(match.group()) # 输出: 456
32.匹配不在某字符集中的字符
pattern = r'[^abc]+'
text = "abc123def"
matches = re.findall(pattern, text)
print(matches) # 输出: ['123def']
33.匹配 Unicode 字符
pattern = r'\w+'
text = "你好,世界"
matches = re.findall(pattern, text, re.UNICODE)
print(matches) # 输出: ['你好', '世界']
34.多种分隔符拆分字符串
pattern = r'[;,\s]\s*'
text = "apple;banana,orange grape"
split_text = re.split(pattern, text)
print(split_text) # 输出: ['apple', 'banana', 'orange', 'grape']
35.搜索并替换多次
pattern = r'cat'
text = "The cat is on the catwalk. That cat is cute."
new_text = re.sub(pattern, 'dog', text, count=2)
print(new_text) # 输出: The dog is on the dogwalk. That cat is cute.
-
以上示例涵盖了Python正则表达式操作的许多常见用法,包括匹配、查找、替换、分组、断言等.根据实际需求调整和应用代码的演示功能.