十分钟搞定Python进行正则表达式操作的35个常见操作示例_python 匹配、搜索、替换的用法-CSDN博客

本文链接：https://blog.csdn.net/xyh2004/article/details/140405913

首先,导入正则表达式模块：

import re

1.简单的匹配

pattern = r'\d+'  # 匹配一个或多个数字
text = "There are 123 apples"
match = re.search(pattern, text)
print(match.group())  # 输出: 123

2.匹配所有出现

matches = re.findall(pattern, text)
print(matches)  # 输出: ['123']

3.替换文本

new_text = re.sub(pattern, '456', text)
print(new_text)  # 输出: There are 456 apples

4.拆分字符串

split_text = re.split(r'\s+', text)
print(split_text)  # 输出: ['There', 'are', '123', 'apples']

5.从字符串开头匹配

match = re.match(r'There', text)
print(match.group())  # 输出: There

6.使用组

pattern = r'(\d+)\s+apples'
match = re.search(pattern, text)
print(match.group(1))  # 输出: 123

7.非贪婪匹配

pattern = r'<.*?>'
html = "<div><span>Test</span></div>"
match = re.search(pattern, html)
print(match.group())  # 输出: <div>

8.匹配数字

pattern = r'\d+'
numbers = re.findall(pattern, "There are 3 cats and 4 dogs")
print(numbers)  # 输出: ['3', '4']

9.匹配单词边界

pattern = r'\bcat\b'
text = "The cat is on the catwalk"
match = re.search(pattern, text)
print(match.group())  # 输出: cat

10.忽略大小写匹配

pattern = r'cat'
text = "The Cat is on the catwalk"
matches = re.findall(pattern, text, re.IGNORECASE)
print(matches)  # 输出: ['Cat', 'cat']

11.匹配多行文本

pattern = r'^cat'
text = "cat\nDog\ncat"
matches = re.findall(pattern, text, re.MULTILINE)
print(matches)  # 输出: ['cat', 'cat']

12.替换匹配项的函数

def replace_function(match):
    return str(int(match.group()) * 2)

pattern = r'\d+'
text = "There are 3 cats and 4 dogs"
new_text = re.sub(pattern, replace_function, text)
print(new_text)  # 输出: There are 6 cats and 8 dogs

13.匹配任意字符

pattern = r'c.t'
text = "cat cut cot"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['cat', 'cut', 'cot']

14.匹配可选项

pattern = r'colou?r'
text = "color colour"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['color', 'colour']

15.匹配前导空白字符

pattern = r'^\s+'
text = "   leading spaces"
match = re.search(pattern, text)
print(match.group())  # 输出: (空白字符)

16.匹配结尾空白字符

pattern = r'\s+$'
text = "trailing spaces   "
match = re.search(pattern, text)
print(match.group())  # 输出: (空白字符)

17.匹配开头和结尾

pattern = r'^hello$'
text = "hello"
match = re.match(pattern, text)
print(bool(match))  # 输出: True

18.匹配单个字符

pattern = r'h.llo'
text = "hello hallo hxllo"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['hello', 'hallo', 'hxllo']

19.匹配重复出现的字符

pattern = r'\d{2,4}'
text = "123 12345 1234"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['123', '1234']

20.分组和命名组

pattern = r'(?P<first_name>\w+) (?P<last_name>\w+)'
text = "John Doe"
match = re.search(pattern, text)
print(match.group('first_name'))  # 输出: John
print(match.group('last_name'))   # 输出: Doe

21.断言匹配

pattern = r'cat(?=walk)'
text = "The catwalk"
match = re.search(pattern, text)
print(match.group())  # 输出: cat

22.负断言匹配

pattern = r'cat(?!walk)'
text = "The cat is on the catwalk"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['cat']

23.预搜索

pattern = r'(?<=The )\w+'
text = "The cat in the hat"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['cat']

24.负预搜索

pattern = r'(?<!The )\b\w+'
text = "The cat in the hat"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['cat', 'in', 'the', 'hat']

25.查找所有字符

pattern = r'.'
text = "Hello"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['H', 'e', 'l', 'l', 'o']

26.非数字字符

pattern = r'\D+'
text = "123abc456def"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['abc', 'def']

27.非单词字符

pattern = r'\W+'
text = "Hello, world!"
matches = re.findall(pattern, text)
print(matches)  # 输出: [', ', '!']

28.非空白字符

pattern = r'\S+'
text = "Hello world"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['Hello', 'world']

29.查找字符串中的位置

pattern = r'cat'
text = "The cat is on the catwalk"
match = re.search(pattern, text)
print(match.start())  # 输出: 4
print(match.end())    # 输出: 7

30.查找所有匹配的位置

pattern = r'cat'
text = "The cat is on the catwalk"
matches = re.finditer(pattern, text)
for match in matches:
print(match.start(), match.end())  # 输出: (4, 7) 和 (19, 22)

31.反向查找

pattern = r'\d+(?=\D*$)'
text = "abc123def456"
match = re.search(pattern, text)
print(match.group())  # 输出: 456

32.匹配不在某字符集中的字符

pattern = r'[^abc]+'
text = "abc123def"
matches = re.findall(pattern, text)
print(matches)  # 输出: ['123def']

33.匹配 Unicode 字符

pattern = r'\w+'
text = "你好,世界"
matches = re.findall(pattern, text, re.UNICODE)
print(matches)  # 输出: ['你好', '世界']

34.多种分隔符拆分字符串

pattern = r'[;,\s]\s*'
text = "apple;banana,orange grape"
split_text = re.split(pattern, text)
print(split_text)  # 输出: ['apple', 'banana', 'orange', 'grape']

35.搜索并替换多次

pattern = r'cat'
text = "The cat is on the catwalk. That cat is cute."
new_text = re.sub(pattern, 'dog', text, count=2)
print(new_text)  # 输出: The dog is on the dogwalk. That cat is cute.