'''
运行方法:"python " 加上 "build_one_html_from_multi_html_in_epub.py"所在路径
'''
import os
import sys
srcpth = os.path.split(sys.argv[0])[0].replace("\\","/")
print(srcpth)
'''
递归遍历
'''
def dirlist(path, ext="", allfile=None):
if allfile == None:
allfile = []
filelist = os.listdir(path)
for filename in filelist: #广义
filepath = os.path.join(path, filename)
if os.path.isdir(filepath):
dirlist(filepath, ext, allfile)
if ext == "":
filepath = filepath.replace("\\", "/")
allfile.append(filepath)
else:
if filepath.endswith(ext):
filepath = filepath.replace("\\", "/")
allfile.append(filepath)
return allfile
if os.path.split(sys.argv[0])[0].replace("\\","/")+"/text" not in dirlist(srcpth):
print("请放在epub改后缀zip解压后text文件夹的同级目录下!")
exit()
fp = open(srcpth+"/all.html","w",encoding="utf-8")
mark1 = "<?xml version='1.0' encoding='utf-8'?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">\n <head>\n <title>Burnham’s Celestial Handbook</title>\n <link rel=\"stylesheet\" type=\"application/vnd.adobe-page-template+xml\" href=\"page-template.xpgt\"/>\n <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\n <link href=\"stylesheet.css\" rel=\"stylesheet\" type=\"text/css\"/>\n<link href=\"page_styles.css\" rel=\"stylesheet\" type=\"text/css\"/>\n</head>\n <body class=\"calibre\">\n"
fp.write(mark1)
pass
marks = mark1.split("\n")
marks = marks + ["</body></html>"]
#print(marks)
for src in dirlist(srcpth+"/text"):
print(src)
bodyflag = False
for line in open(src,encoding="utf-8").readlines():
if line.startswith(" <body") is True:
bodyflag = None
elif line.startswith("</body>") is True:
bodyflag = False
if bodyflag == True:
fp.write(line.replace("../",""))
elif bodyflag == None:
bodyflag = True
fp.write("</body></html>\n")
pass
fp.close()
epub解压的多个html制作单个html
最新推荐文章于 2024-04-21 10:06:15 发布
这是一个Python脚本,用于将EPUB文件解压缩后的text文件夹中的所有HTML文件合并成一个单一的HTML文件。它首先检查脚本是否位于正确的目录下,然后递归遍历指定目录,找到所有HTML文件并读取其body部分,最后将内容写入新的HTML文件中。此脚本适用于整理和整合电子书内容。
摘要由CSDN通过智能技术生成