素材
1.文章模板文档
2.文章属性文档
3.文章标题文档
拼接文章原理
通过文章属性文档生成多条随机段落,然后把文章模板中的关键词替换成随机段落,使文章成为新的文章,并保存到本地。
技术名词解释
- Python
- openpyxl
- random
- os
- re
技术代码
import openpyxl
import random
import os
import re
"""文章模板路径"""
mubanlujing = [r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\多动症模板1.txt",
r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\多动症模板2.txt",
r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\多动症模板3.txt"]
"""文章模板标题"""
mubanbiaoti = r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\文章标题.xlsx"
#这是工作表支持多个
mubanbiaotisheets = ["医院"]
"""文章模板变量属性"""
mubanshuxing = r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\多动症-素材.xlsx"
#这是属性的工作表,遍历多个属性
mubanshuxingsheets = ["首段","尾段","赞美","治疗","注意事项","症状","费用","病因","费用首段","介绍","表现","原因","医院"]
"""自定义地名字典,包含一些地区名称"""
#这是自定义的地区,用来替换标题中地区关键词,或者替换模板中的地区关键词
region_names = ["北京", "天津", "上海", "重庆","河北"]
"""自定义文章保存路径"""
wenzhangbaocun = r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\content"
def run():
global wenzhangbaocun
if not os.path.exists(wenzhangbaocun):
os.makedirs(wenzhangbaocun)
bianlibiaoti()
def chulimoban(title):
if not title:
return False
global mubanlujing;
shuxing_data = shuxing_int()
if not shuxing_data:
print("缺少属性文档")
exit(0)
mubanlujing2 = random.choice(mubanlujing)
with open(mubanlujing2,'r',encoding='utf-8') as file:
content = file.read()
if not content:
print("缺少模板")
exit(0)
f = []
for key,value in shuxing_data.items():
content = content.replace("{"+str(key)+"}", value,1)
count = content.count("{"+str(key)+"}")
if count>0:
f.append(count)
if f:
max_value = max(f)
for i in range(0,max_value) :
content = tihuanshuxing(content)
content = content.replace("{title}", title)
content = chulidiqu(title,content)
content = content.replace("{标题}", title)
baocunwenjian(title,content,mubanlujing2)
def baocunwenjian(title,content,mubanlujing2):
global wenzhangbaocun
file_name = re.sub(r'[\\/:"*?<>|]', '', title)
file_path = f"{wenzhangbaocun}\{file_name}.txt"
try:
# 打开文件以写入内容
with open(file_path, 'w') as file:
file.write(f"{title}\n{content}")
print(f"文件 '{file_path}',模板 '{mubanlujing2}' 已成功保存到目录 。")
except Exception as e:
print(f"保存文件时出错:{e}")
exit(0)
def chulidiqu(title,content):
diqu = huoqubiaotizhongdediqu(title)
if diqu:
content = content.replace("{地区}", diqu)
return content
#提取标题中涉及到的地区,替换到文章中
def huoqubiaotizhongdediqu(title):
global region_names
# 根据地区名称的长度进行排序(从长到短),优先匹配长的地区名称
region_names.sort(key=lambda x: len(x), reverse=True)
# 从字符串中提取地区名称
extracted_regions = []
for region in region_names:
if region in title:
extracted_regions.append(region)
if extracted_regions:
return extracted_regions[0]
else:
return ''
def tihuanshuxing(content):
shuxing_data = shuxing_int()
for key,value in shuxing_data.items():
content = content.replace("{"+str(key)+"}", value,1)
return content
def bianlibiaoti():
global mubanbiaoti;
global mubanbiaotisheets;
biaotiwork = openpyxl.load_workbook(mubanbiaoti)
# 标志变量,用于跟踪是否在第一行
is_first_row = True
for biaoti in mubanbiaotisheets:
sheet = biaotiwork[biaoti]
for row in sheet.iter_rows(values_only=True):
if is_first_row:
is_first_row = False
continue
for cell in row:
if cell:
chulimoban(chulibiaoti(cell))
def chulibiaoti(title):
global region_names
chengshi = random.choice(region_names)
if ("地区" in title) and chengshi:
title = title.replace("地区", chengshi)
return title
def shuxing_int():
global mubanshuxing
global mubanshuxingsheets
shuxing_data = {}
shuxingwork = openpyxl.load_workbook(mubanshuxing)
for shuxing in mubanshuxingsheets:
column_data = []
sheet = shuxingwork[shuxing]
total_column = sheet.max_column #获取每个工作表中有多少列
total_rows = sheet.max_row #获取每个工作表中有多少行
# 如果只有一行,直接跳出,说明没有内容
if total_rows<=1:
break
# 每列都随机一段
for column_index in range(1,total_column+1):
while True:
row_index = random.randint(2,total_rows)
cell_value = sheet.cell(row=row_index, column=column_index).value
if cell_value:
column_data.append(cell_value)
break
current_sheet_data = "".join(column_data)
shuxing_data[shuxing] = current_sheet_data
return shuxing_data
if __name__ == "__main__":
run()
小结
以上代码可以优化成:每个文章工作表的标题,可以使用对应的文章模板。
感谢chatgpt帮忙。