Python顶点小说爬虫(《三寸人间》爬取)
获取整个页面
import requests
from bs4 import BeautifulSoup
url = "https://www.23us.so/files/article/html/0/43/3615671.html"
r = requests.get(url, timeout=30)
r.raise_for_status()#检验连接状态
r.encoding = 'utf-8'#中文格式
soup = BeautifulSoup(r,"html.parser")
a = str(soup.find_all('h1'))#得到章节标题并转化成str类型
b = str(soup.find_all('dd',{
'id',contents}))#得到小说内容并将它转化成str类型
写入.txt文档
dingdian = open("顶点.txt","a",encoding = "utf-8"