import requests
import parsel
def getBook(chapterNum):
chapterNum = chapterNum + 393828
url = "https://m.xinshuhaige.net/50656/" + str(chapterNum) + ".html"
htmlContent = requests.get(url).text
titleIndex1 = htmlContent.find("headline") + 10
titleIndex2 = htmlContent.find("</h1>")
title = htmlContent[titleIndex1:titleIndex2]
index1 = htmlContent.find("<p>")
index2 = htmlContent[index1:].find("</div>")
mainContent = htmlContent[index1:index1+index2]
mainContent = mainContent.replace("</p>", "")
mainContent = mainContent.replace("<p>", "")
mainContent = mainContent.replace("。", "。\n")
print(title)
print(mainContent)
if __name__ == '__main__':
for i in range(101):
getBook(i)
python爬取小说
最新推荐文章于 2024-05-01 21:57:08 发布