import requests
import time
from lxml import etree
from bs4 import BeautifulSoup
import re
def get_html(url):
headers={'user-agent':' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41'}
return requests.get(url,headers).text
def judge(url):
c=re.findall('https://www.uubiqu.com/read/[0-9]+/',url)
return c[0]
url=input("输入你要下载小说的链接:")
target_url=judge(url)
soup=get_html(url)
e=BeautifulSoup(soup,'lxml')
dl_list=e.find('div',id='list').dl
dd_list=dl_list.find_all("dd")
fp=open("./测试.txt",'w',encoding="utf-8")
for dd in dd_list[9:]:
content_url='https://www.uubiqu.com'+dd.a['href']
content_text=get_html(content_url)
content_soup=BeautifulSoup(content_text,'lxml')
content=content_soup.select('.content>p')
fp.write(dd.a.string + "\n")
for i in content:
fp.write(i.get_text())
fp.write("\n")
print(dd.a.string+"完成")
time.sleep(0.1)
写于2021-6-09
新手,如有问题希望批评