import logging
from ebooklib import epub
import urllib3
import certifi
from lxml import html
import re
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
log_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
log_handler.setFormatter(formatter)
log.addHandler(log_handler)
http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())
base = 'https://www.qingyunian.net/qingyunian'
book = epub.EpubBook()
book.set_cover(file_name='cover.jpg', content='<h2>庆余年</h2><b>猫腻 </b>', create_page=True)
book.set_identifier('id123456')
book.set_title('庆余年')
book.set_language('zh')
book.add_author('猫腻')
chapter_count = 1
toc_list = list()
spine_list = ['nav']
for vol_number in range(1, 8):
vol_toc_list = list()
url = f'{base}{vol_number}'
vol_res = http.request(method='GET', url=url)
vol_page = html.fromstring(vol_res.data.decode('utf-8'))
vol_title = vol_page.xpath('/html/body/div[@class="main"]/h1/a/text()')[0]
vol_title_short = vol_title.replace('庆余年 ', '')
log.info(f'vol_title_short: {vol_title_short}')
vol_toc_list.append(epub.Section(title=vol_title_short, href=f'ch_{chapter_count:03}.xhtml'))
chapter_list = vol_page.xpath('/html/body/div[@class="main"]/div[@class="content"]/ul/li')
ch_toc_list = list()
for ch_item in chapter_list:
ch_title = ch_item.xpath('a/text()')[0]
ch_title = ch_title.replace(vol_title, '').lstrip(' ').rstrip('?')
ch_href = ch_item.xpath('a/@href')[0]
# log.info(f'{ch_title}: {ch_href}')
ch_res = http.request(method='GET', url=ch_href)
ch_page = html.fromstring(ch_res.data.decode('utf-8'))
paragraph_list = ch_page.xpath('/html/body/div[@class="main"]/div[@class="content"]/p')
ch_content = ''
for p in paragraph_list[:-1]:
ch_content += html.tostring(p, encoding='utf-8').decode('utf-8')
ch_content = re.sub(pattern=r'(\xa0)+', repl=' ', string=ch_content)
ch_content = ch_content.replace('\n', '')
chapter = epub.EpubHtml(title=ch_title, file_name=f'ch_{chapter_count:03}.xhtml', lang='zh')
chapter.content = f'<h1>{ch_title}</h1>{ch_content}'
book.add_item(chapter)
spine_list.append(chapter)
ch_toc_list.append(epub.Link(href=f'ch_{chapter_count:03}.xhtml', title=ch_title, uid=f'ch_{chapter_count:03}'))
chapter_count += 1
vol_toc_list.append(tuple(ch_toc_list))
toc_list.append(tuple(vol_toc_list))
book.toc = tuple(toc_list)
# add default NCX and Nav file
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
# define CSS style
style = 'BODY {color: white;}'
nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
# add CSS file
book.add_item(nav_css)
# basic spine
book.spine = spine_list
# write to the file
epub.write_epub('庆余年.epub', book)
Python Create EPUB
最新推荐文章于 2024-08-09 07:10:01 发布