- 知识点:
1、如何读取docx文件,pip install python-docx包,导包用from docx import Document
2、将docx文件读取成字符串,进行正则匹配,即可统计
import re
from docx import Document
def read_word_to_string(doc_path):
doc = Document(doc_path)
text = ''
for paragraph in doc.paragraphs:
text += paragraph.text + ' '
return text
def count_parentheses(text):
pattern = r'\('
left_parentheses_count = len(re.findall(pattern, text))
pattern = r'\)'
right_parentheses_count = len(re.findall(pattern, text))
if left_parentheses_count == right_parentheses_count:
return left_parentheses_count
else:
print("Warning: Unbalanced parentheses detected!")
def match_parentheses_content(text, limit=10):
pattern = r'\((.*?)\)'
matches = re.findall(pattern, text)[:limit]
matched_letters = [re.sub(r'\W+', '', match) for match in matches]
return matched_letters
doc_path = r"D:\test\book\test.docx"
word_content = read_word_to_string(doc_path)
word_content = word_content.replace('(', '(')
word_content = word_content.replace(')', ')')
word_content = word_content.upper()
print("括号出现的次数: ", count_parentheses(word_content))
print("括号里的字母: ", match_parentheses_content(word_content,10))
- 结果
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/9242ba3f8746d7c37f8a011683e1fdb7.png)