使用python,根据素材文档，文章模板生成随机的文章并保存在本地

最新推荐文章于 2024-10-02 10:53:34 发布

10年php菜鸟

最新推荐文章于 2024-10-02 10:53:34 发布

阅读量122

点赞数

文章标签： python 前端

本文链接：https://blog.csdn.net/wei042/article/details/133681946

版权

素材

1.文章模板文档
2.文章属性文档
3.文章标题文档

拼接文章原理

通过文章属性文档生成多条随机段落，然后把文章模板中的关键词替换成随机段落，使文章成为新的文章，并保存到本地。

技术名词解释

Python
openpyxl
random
os
re

技术代码

import openpyxl
import random
import os
import re

"""文章模板路径"""
mubanlujing = [r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\多动症模板1.txt",
               r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\多动症模板2.txt",
               r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\多动症模板3.txt"]

"""文章模板标题"""
mubanbiaoti =  r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\文章标题.xlsx"
#这是工作表支持多个
mubanbiaotisheets = ["医院"]

"""文章模板变量属性"""
mubanshuxing =  r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\多动症-素材.xlsx"
#这是属性的工作表，遍历多个属性
mubanshuxingsheets = ["首段","尾段","赞美","治疗","注意事项","症状","费用","病因","费用首段","介绍","表现","原因","医院"]


"""自定义地名字典，包含一些地区名称"""
#这是自定义的地区，用来替换标题中地区关键词，或者替换模板中的地区关键词
region_names = ["北京", "天津", "上海", "重庆","河北"]


"""自定义文章保存路径"""
wenzhangbaocun =  r"E:\workspace\python\合成文章\文章-多动症\文章-多动症\content"

def run():
    global wenzhangbaocun
    if not os.path.exists(wenzhangbaocun):
        os.makedirs(wenzhangbaocun)

    bianlibiaoti()

def chulimoban(title):
    if not title:
        return False

    global mubanlujing;

    shuxing_data = shuxing_int()
    if not shuxing_data:
        print("缺少属性文档")
        exit(0)

    mubanlujing2 =  random.choice(mubanlujing)
    with open(mubanlujing2,'r',encoding='utf-8') as file:
        content = file.read()
    if not content:
        print("缺少模板")
        exit(0)

    f = []
    for key,value in shuxing_data.items():
        content = content.replace("{"+str(key)+"}", value,1)
        count = content.count("{"+str(key)+"}")
        if count>0:
            f.append(count)

    if f:
        max_value = max(f)
        for i in range(0,max_value) :
            content = tihuanshuxing(content)
    content = content.replace("{title}", title)
    content = chulidiqu(title,content)
    content = content.replace("{标题}", title)
    baocunwenjian(title,content,mubanlujing2)


def baocunwenjian(title,content,mubanlujing2):
    global wenzhangbaocun
    file_name = re.sub(r'[\\/:"*?<>|]', '', title)
    file_path = f"{wenzhangbaocun}\{file_name}.txt"
    try:
        # 打开文件以写入内容
        with open(file_path, 'w') as file:
            file.write(f"{title}\n{content}")
        print(f"文件 '{file_path}',模板 '{mubanlujing2}' 已成功保存到目录 。")
    except Exception as e:
        print(f"保存文件时出错：{e}")
        exit(0)

def chulidiqu(title,content):
    diqu = huoqubiaotizhongdediqu(title)
    if diqu:
        content = content.replace("{地区}", diqu)
    return  content

#提取标题中涉及到的地区，替换到文章中
def huoqubiaotizhongdediqu(title):
    global  region_names

    # 根据地区名称的长度进行排序（从长到短），优先匹配长的地区名称
    region_names.sort(key=lambda x: len(x), reverse=True)

    # 从字符串中提取地区名称
    extracted_regions = []

    for region in region_names:
        if region in title:
            extracted_regions.append(region)

    if extracted_regions:
        return extracted_regions[0]
    else:
        return ''


def tihuanshuxing(content):
    shuxing_data = shuxing_int()
    for key,value in shuxing_data.items():
        content = content.replace("{"+str(key)+"}", value,1)
    return content


def bianlibiaoti():
    global mubanbiaoti;
    global mubanbiaotisheets;
    biaotiwork = openpyxl.load_workbook(mubanbiaoti)
    # 标志变量，用于跟踪是否在第一行
    is_first_row = True
    for biaoti in mubanbiaotisheets:
        sheet = biaotiwork[biaoti]
        for row in sheet.iter_rows(values_only=True):
            if is_first_row:
                is_first_row = False
                continue
            for cell in row:
                if cell:
                    chulimoban(chulibiaoti(cell))


def chulibiaoti(title):
    global region_names
    chengshi = random.choice(region_names)
    if ("地区" in title) and chengshi:
        title = title.replace("地区", chengshi)

    return title
def shuxing_int():
    global mubanshuxing
    global mubanshuxingsheets
    shuxing_data = {}
    shuxingwork = openpyxl.load_workbook(mubanshuxing)
    for shuxing in mubanshuxingsheets:
        column_data = []
        sheet = shuxingwork[shuxing]
        total_column = sheet.max_column  #获取每个工作表中有多少列
        total_rows   = sheet.max_row       #获取每个工作表中有多少行

        # 如果只有一行，直接跳出，说明没有内容
        if total_rows<=1:
            break

        # 每列都随机一段
        for column_index in range(1,total_column+1):
            while True:
                row_index = random.randint(2,total_rows)
                cell_value = sheet.cell(row=row_index, column=column_index).value
                if cell_value:
                    column_data.append(cell_value)
                    break
        current_sheet_data = "".join(column_data)
        shuxing_data[shuxing] = current_sheet_data
    return  shuxing_data
if __name__ == "__main__":
    run()