""" 88读书网——独本下载 例:https://www.88dus.com/xiaoshuo/97/97208/ async版本,使用aiohttp执行多个异步请求 """ from bs4 import BeautifulSoup import requests import os,time import sys import asyncio from aiohttp import ClientSession class books: def __init__(self): self.headers = { 'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"} pass def star(self): soup = self.request(url) # 书名 bookname = soup.find('div', class_='jieshao').find('div', class_='rt').find('h1').get_text() # 作者 author = soup.find('div', class_='jieshao').find('div', class_='rt').find('div', class_='msg').find( 'em').get_text() author = author[3:] # 判断是否有相同的书存在 path = 'D:/娱乐/E小说/' + bookname + '_' + author + '.txt' exist = os.path.exists(path) if exist: print('\n《%s》_%s 已存在\n' % (bookname, author)) sys.exit() print('\n《%s》_%s 下载开始\n' % (bookname, author)) # 目录 list = soup.find('div', class_='mulu').find_all('a') # print(list) tasks = [] for i in list: print(i) href = url + i['href'] # print(href) soup_href = asyncio.ensure_future(self.async_response(href)) tasks.append(soup_href) result = loop.run_until_complete(asyncio.gather(*tasks)) # print(result) try: for respon in result: respon = BeautifulSoup(respon,"lxml") content = respon.find('div', class_='yd_text2').get_text() # 小说正文 title = respon.find("div",class_="novel").find("h1").get_text() # 章节标题 file = open(path, 'a', encoding='utf-8') file.write('\n' + title + '\n\n\n' + content + '\n\n\n') file.close() print(bookname + ' ' + title + '--------下载完成!') except AttributeError as e : print("AttributeError:%s"%e) print('《' + bookname + '》' + ' 下载完成!**************') print(path) # 解析器 def request(self, url): request = requests.get(url, headers=self.headers) request.encoding = 'gbk' soup = BeautifulSoup(request.text, 'html5lib') # print(soup) return soup # 异步 async def async_response(self,url): async with ClientSession() as session: async with session.get(url) as response: # print('Hello,This is asyncing:%s' % time.time()) return await response.text(encoding="gb18030") url = 'https://www.x88dushu.com/xiaoshuo/18/18713/' # 88小说 loop = asyncio.get_event_loop() books = books() books.star()
小说下载_异步协程
最新推荐文章于 2022-10-25 16:19:09 发布