extract_novel
import requests
from bs4 import BeautifulSoup
def chapters():
base_url = "http://www.timeread.com/book/45274/list"
resp = requests.get(base_url)
resp.encoding = "utf-8"
bf = BeautifulSoup(resp.text, "html.parser")
data = []
for li in bf.find("ul", id="htmlChapterList").find_all("li"):
link_a = li.find("a")
if not link_a:
continue
data.append((link_a["href"], link_a.get_text()))
return data
def content_novel(url):
res = requests.get(url)
res.encoding = "UTF-8"
bf = BeautifulSoup(res.text, "html.parser")
if not bf.find("div", id="htmlContent"):
return None
return bf.find("div", id="htmlContent").get_text()
novels = chapters()
numbers = len(novels)
ids = 0
for novel_title in chapters():
url, title = novel_title
ids += 1
print("download: "+str(ids)+","+"total:"+ str(numbers))
with open("%s.txt"%title, 'w', encoding="UTF-8") as fwrite:
if content_novel(url) ==None:
continue
fwrite.write(content_novel(url))