import docx
import re
from docx import Document
import string
path = '/Users/ya/Desktop/11246441.docx'
document = Document(path)
for line in document.paragraphs:
line = line.text.strip().split()
temp = []
if len(line) != 0:
for i in line:
rule = re.compile("[^a-zA-Z0-9\u4e00-\u9fa5]")
line = rule.sub('',i)
print(line)