import requests
from bs4 import BeautifulSoup
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
def down_text(url):
html = requests.get(url, headers=headers).text
soup = BeautifulSoup(html, 'lxml')
title = soup.find('h1', class_='wap_none')
content = soup.find('div', id='chaptercontent')
if title and content:
title = title.get_text()
content = content.get_text()
with open(f'txt/{title}.txt', 'w') as f:
f.write(content)
print(f'{title}下载完毕......')
if __name__ == '__main__':
start_time = datetime. now().timestamp()
url = 'https://www.bqka.cc/book/159995/'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
}
html = requests.get(url, headers=headers).text
soup = BeautifulSoup(html, 'lxml')
items = soup.find('div', class_='listmain').find_all('a')
urls = []
for item in items:
url = item['href']
if url != 'javascript:dd_show()':
url = 'https://www.bqka.cc' + url
urls.append(url)
with ThreadPoolExecutor(max_workers=50) as exe:
for url in urls:
exe.submit(down_text, url)
end_time = datetime.now().timestamp()
print(f'总共花了{end_time - start_time}seconds')