1.做科研的时候,下载word,全是别人的图,但是我不想要图,应该如何解决:
- 每一页都是这样子的图,我不可能一个一个删除吧,
2. 解决办法,直接看代码:
import zipfile
import os
import shutil
from xml.etree import ElementTree as ET
def remove_images_from_docx(docx_path, output_path):
temp_dir = "temp_docx"
with zipfile.ZipFile(docx_path, 'r') as docx_file:
docx_file.extractall(temp_dir)
document_xml_path = os.path.join(temp_dir, "word", "document.xml")
tree = ET.parse(document_xml_path)
root = tree.getroot()
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
for parent in root.findall('.//w:drawing/..', namespaces):
for child in list(parent):
if child.tag.endswith('drawing'):
parent.remove(child)
tree.write(document_xml_path)
media_dir = os.path.join(temp_dir, "word", "media")
if os.path.exists(media_dir):
for file in os.listdir(media_dir):
os.remove(os.path.join(media_dir, file))
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as docx_out:
for root, dirs, files in os.walk(temp_dir):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, temp_dir)
docx_out.write(file_path, arcname)
shutil.rmtree(temp_dir)
remove_images_from_docx('path_to_your_input.docx', 'path_to_your_output.docx')
Ok,完美