为了批量处理大量文档的属性信息,如作者、编辑时间等,写了这个代码
个人原因,懒得打包了,也没啥要自主修改的地方,一般也就是个文件夹路径
如果有其他更好更快捷的方法,欢迎交流啊
import os
import random
from docx import Document
from datetime import datetime, timedelta
def random_date(start_date, end_date):
"""
Generate a random date between the given start and end dates.
"""
time_diff = end_date - start_date
random_days = random.randint(0, time_diff.days)
return start_date + timedelta(days=random_days)
def process_docx(file_path):
try:
doc = Document(file_path)
# Delete title, author, and last modified by information
doc.core_properties.title = ''
doc.core_properties.author = ''
doc.core_properties.last_modified_by = ''
# Get the current date and time
current_datetime = datetime.now()
# Randomly generate creation date and last modified date
creation_datetime = random_date(current_datetime - timedelta(days=365), current_datetime)
last_modified_datetime = random_date(creation_datetime, current_datetime)
# Set creation and last modified dates
doc.core_properties.created = creation_datetime
doc.core_properties.modified = last_modified_datetime
# Save the modified document
doc.save(file_path)
except Exception as e:
print(f'Error processing {file_path}: {e}')
return
def process_folder(folder_path):
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.endswith('.docx'):
file_path = os.path.join(root, file)
process_docx(file_path)
folder_path = 'D:\\20230724'
process_folder(folder_path)