安装Python扩展库python-docx,然后读取一个Word文章中所有段落的文本,查找并输出其中所有AABB形式的词语,例如踏踏实实、密密麻麻、简简单单、时时刻刻
import docx
import re
file = docx.Document("test.docx")
alist = []
for i in range(len(file.paragraphs)):
pat1 = r'(([\u4e00-\u9fa5])\2([\u4e00-\u9fa5])\3)'
if re.findall(pat1, file.paragraphs[i].text):
alist.append(re.findall(pat1, file.paragraphs[i].text))
print(alist)