使用Python-docx库时
import os
from docx import Document
filnames = os .listdir('./')
docx_files = [f for f in filnames if f.split('.')[-1] == "docx"]
print(docx_files)
docxs =[]
for docx_file in docx_files:
doc = Document(docx_file)
for x in doc.paragraphs:
docxs.append(x)
print(docxs)
出现了如下的错误
--------------------------------------------------------------------------
PackageNotFoundError Traceback (most recent call last)
<ipython-input-9-0acb64b59e38> in <module>
9
10 for docx_file in docx_files:
---> 11 doc = Document(docx_file)
12 for x in doc.paragraphs:
13 docxs.append(x)
~/.local/lib/python3.6/site-packages/docx/api.py in Document(docx)
23 """
24 docx = _default_docx_path() if docx is None else docx
---> 25 document_part = Package.open(docx).main_document_part
26 if document_part.content_type != CT.WML_DOCUMENT_MAIN:
27 tmpl = "file '%s' is not a Word file, content type is '%s'"
~/.local/lib/python3.6/site-packages/docx/opc/package.py in open(cls, pkg_file)
126 *pkg_file*.
127 """
--> 128 pkg_reader = PackageReader.from_file(pkg_file)
129 package = cls()
130 Unmarshaller.unmarshal(pkg_reader, package, PartFactory)
~/.local/lib/python3.6/site-packages/docx/opc/pkgreader.py in from_file(pkg_file)
30 Return a |PackageReader| instance loaded with contents of *pkg_file*.
31 """
---> 32 phys_reader = PhysPkgReader(pkg_file)
33 content_types = _ContentTypeMap.from_xml(phys_reader.content_types_xml)
34 pkg_srels = PackageReader._srels_for(phys_reader, PACKAGE_URI)
~/.local/lib/python3.6/site-packages/docx/opc/phys_pkg.py in __new__(cls, pkg_file)
29 else:
30 raise PackageNotFoundError(
---> 31 "Package not found at '%s'" % pkg_file
32 )
33 else: # assume it's a stream and pass it to Zip reader to sort out
PackageNotFoundError: Package not found at '
解决此问题最好的方法是使用os.walk(path)
os.walk()的使用方法:Python os.walk() 方法,将python工作的切换到docx文件所在的目录下,这样就不会出现的错误。