背景需要分析一批高层次人才简历数据
环境
python 3.6
pip install docx
案例代码
def parse():
path = "xx简历.docx"
doc = docx.Document(path)
dict_rel = doc.part._rels
for rel in dict_rel:
rel = dict_rel[rel]
if "image" in rel.target_ref: # 这里内容中会包含Image的信息
print(rel.target_ref) # media / image1.jpeg
img_name = re.findall("/(.*)", rel.target_ref)[0]
img_path = path.split(".")[0]+".jpeg"
with open(img_path, 'wb') as f:
f.write(rel.target_part.blob)