epub解压的多个html制作单个html

最新推荐文章于 2024-04-21 10:06:15 发布

W.H.

最新推荐文章于 2024-04-21 10:06:15 发布

阅读量726

点赞数

本文链接：https://blog.csdn.net/wsl985/article/details/108877504

版权

这是一个Python脚本，用于将EPUB文件解压缩后的text文件夹中的所有HTML文件合并成一个单一的HTML文件。它首先检查脚本是否位于正确的目录下，然后递归遍历指定目录，找到所有HTML文件并读取其body部分，最后将内容写入新的HTML文件中。此脚本适用于整理和整合电子书内容。

摘要由CSDN通过智能技术生成


'''
运行方法："python " 加上 "build_one_html_from_multi_html_in_epub.py"所在路径
'''

import os
import sys

srcpth = os.path.split(sys.argv[0])[0].replace("\\","/")

print(srcpth)


'''
递归遍历
'''
def dirlist(path, ext="", allfile=None):
    if allfile == None:
        allfile = []
    filelist = os.listdir(path)
    for filename in filelist: #广义
        filepath = os.path.join(path, filename)
        if os.path.isdir(filepath):
            dirlist(filepath, ext, allfile)
        if ext == "":
            filepath = filepath.replace("\\", "/")
            allfile.append(filepath)
        else:
            if filepath.endswith(ext):
                filepath = filepath.replace("\\", "/")
                allfile.append(filepath)
    return allfile

if os.path.split(sys.argv[0])[0].replace("\\","/")+"/text" not in dirlist(srcpth):
    print("请放在epub改后缀zip解压后text文件夹的同级目录下！")
    exit()

fp = open(srcpth+"/all.html","w",encoding="utf-8")

mark1 = "<?xml version='1.0' encoding='utf-8'?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">\n  <head>\n    <title>Burnham’s Celestial Handbook</title>\n    <link rel=\"stylesheet\" type=\"application/vnd.adobe-page-template+xml\" href=\"page-template.xpgt\"/>\n    <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\n  <link href=\"stylesheet.css\" rel=\"stylesheet\" type=\"text/css\"/>\n<link href=\"page_styles.css\" rel=\"stylesheet\" type=\"text/css\"/>\n</head>\n  <body class=\"calibre\">\n"
fp.write(mark1)
pass
marks = mark1.split("\n")

marks = marks + ["</body></html>"]
#print(marks)


for src in dirlist(srcpth+"/text"):
    print(src)
    bodyflag = False
    for line in open(src,encoding="utf-8").readlines():
        if line.startswith("  <body") is True:
            bodyflag = None
        elif line.startswith("</body>") is True:
            bodyflag = False

        if bodyflag == True:
            fp.write(line.replace("../",""))
        elif bodyflag == None:
            bodyflag = True


fp.write("</body></html>\n")
pass

fp.close()