import requests
from bs4 import BeautifulSoup
from docx import Document
from requests.packages.urllib3.exceptions import InsecureRequestWarning
# 禁用特定警告
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
# 创建一个新的文档对象
doc = Document()
# 添加标题
doc.add_heading('偷偷藏不住', 0)
for page in range(1, 90):
print(f"正在爬取《偷偷藏不住》第{page}集···")
url = f'https://www.51shucheng.net/yanqing/toutoucangbuzhu/{page + 248176}.html'
response = requests.get(url, verify=False)
response.encoding = 'utf-8'
if response.status_code == 200:
# 添加带有样式的段落
doc.add_paragraph(f"第{page}集", style='Heading 1')
soup = BeautifulSoup(response.text, "html.parser")
frame = soup.find("div", id="neirong")
ps = frame.find_all('p')
for p in ps:
doc.add_paragraph(p.text)
print(f"《偷偷藏不住》第{page}集爬取成功")
else:
print(response.status_code)
# 保存文档
doc.save('《偷偷藏不住》.docx')