import requests
from bs4 import BeautifulSoup
import os
def find_book():
book = str(input('你要阅读的书集是:'))
url = f'https://so.gushiwen.cn/search.aspx?value={book}&valuej={book[0]}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'}
content = requests.get(url, headers=headers)
content.encoding = 'utf-8'
r = content.text
soup = BeautifulSoup(r, "html.parser")
name = soup.find('div',attrs={'class':'sonspic'}).find('a')
new_url = 'https://so.gushiwen.cn' + name['href']
return book,new_url
name,url = find_book()
save_dir = name
if not os.path.exists(save_dir):
os.makedirs(save_dir)
content = requests.get(url)
content.encoding = 'utf-8'
r = content.text
soup = BeautifulSoup(r, "html.parser")
name1 = soup.find('div',attrs={'class':'bookcont'}).find('ul').find_all('span')
num = 1
for n in name1:
url1 = f'https://so.gushiwen.cn {n.find('a')['href']}'
print(n.find('a'))
book = requests.get(url1)
book.encoding = 'utf-8'
r = book.text
soup = BeautifulSoup(r, "html.parser")
book1 = soup.find('div',_class='contson').get_text()
with open(f'{save_dir}/第{num}回.text', mode='w', encoding='utf-8') as f:
f.write(book1)
num += 1
PYTHON爬取小说
最新推荐文章于 2024-09-09 23:28:21 发布