import os
import docx
import sys
file_type = ['.json', '.py', '.txt', '.docx', '.doc', '.md', '.html', '.css', '.js', '.data', '.java']
def get_all_path(rootDir, ):
path_list = []
all_list = os.listdir(rootDir)
for i in all_list:
com_path = os.path.join(rootDir, i)
if os.path.isfile(com_path):
global file_type
if '.' + i.split('.')[-1] in file_type:
path_list.append(com_path)
if os.path.isdir(com_path):
path_list.extend(get_all_path(com_path))
get_all_path(com_path)
return path_list
def readFile(path, fw):
if "." + path.split("\\")[-1].split(".")[-1] != ".docx":
f = open(path, "r", encoding='utf8')
for line in f:
line = line.rstrip()
if line:
line = line + "\n"
fw.write(line)
else:
f = docx.Document(path)
for para in f.paragraphs:
para = para.text.rstrip()
if para:
para = para + '\n'
fw.write(para)
def docx_new(rootDir, newFile):
doc_new = docx.Document()
for path in get_all_path(rootDir):
readFile(path, doc_new)
doc_new.save(newFile)
def txt_new(rootDir, newFile):
with open(newFile, 'a', encoding='utf8') as fw:
for path in get_all_path(rootDir):
print(path)
readFile(path, fw)
if __name__ == "__main__":
rootDir = sys.argv[1]
newFile = sys.argv[2]
txt_new(rootDir, newFile)
(python3.6.6) E:\PycharmProject\多格式文件合并>python merge.py 所合并的多个文件所在绝对路径 新文件所在绝对路径