# 1、安装tika包
pip install tika
# 2、加载读取文件
from tika import parser
def read_doc(file_path):
parsed = parser.from_file(file_path)
text = parsed['content']
return text
# 读取4344315995.doc
file_path = '4344315995.doc'
print(read_doc(file_path))
PS:
1、系统版本: Ubuntu 18.04.1 LTS (GNU/Linux 4.15.0-193-generic x86_64)
2、初次执行:会自适应加载一些服务,耗时会稍微长一些