# coding=gbk
import os
from docx import Document
from docx.shared import Pt
import re
url = r'D:\\idea_workspace\\landpatrol-master\\app\\src'
# 使用全局列表保存文件路径
fileList = []
# def getAllFile(path, fileList):
# dirList = [] #用于保存文件夹
# files = os.listdir(path)
# for f in files:
# if(os.path.isdir(path + '/' + f)):
# dirList.append(path + '/' + f)
# if(os.path.isfile(path + '/' + f)):
# fileList.append(path + '/' + f)
# for dir in dirList:
# getAllFile(dir, fileList) #递归方式将文件夹下的所有文件存入fileList
# 获取文件夹下所有的.java文件
def getAllFile(path, fileList):
for root, dirs, files in os.walk(path):
# 只保留.java后缀的文件
for file in files:
if file.endswith('.java'):
fileList.append(root +'/'+ file)
# 读取文件保存在word文档中
def saveDocFile():
doc = Document()
from docx.enum.text import WD_LINE_SPACING
p = doc.add_paragraph('') #增加一页
doc.styles['Normal'].font.name = 'Times New Roman' # 正文是normal, 设置正文的字体格式
doc.styles['Normal'].font.size = Pt(8) # 设置字体的大小为 5 号字体
p.line_spacing_rule = WD_LINE_SPACING.EXACTLY # 固定值
paragraph_format = doc.styles['Normal'].paragraph_format
paragraph_format.line_spacing = Pt(12.9) # 固定值12,9磅, 保证每页有50行代码
save_file = r'D:\\text.doc'
codeNum = 0
for i, f in enumerate(fileList):
# print('starting deal %d' % i)
with open(f, encoding='utf-8') as file:
for line in file.readlines():
if line == '\n': #删除空格
continue
if re.match(r'^\s+$', line):
continue
if line.__contains__(r'/*') or line.__contains__(' *'):
continue
if line.__contains__(r'//'):
continue
p.add_run(line)
codeNum += 1 # 记录是已经写入的数据
if codeNum == 3050: # 保证打印出不大大超过与 60 页:
doc.save(save_file)
return
doc.save(save_file)
print('all done')
if __name__ == '__main__':
getAllFile(url, fileList)
saveDocFile()
Python读取文件夹下的所有文件写入文档中
最新推荐文章于 2024-07-20 17:12:48 发布