import re
import time
import json
import aiohttp
import asyncio
import execjs
import requests
from fake_useragent import UserAgent
import nest_asyncio
nest_asyncio.apply()
class Spider:
def __init__(self):
self.session = None
# 登录获得session
# 运行程序方法
def run(self):
task = [asyncio.ensure_future(self.main())]
loop = asyncio.get_event_loop()
data = loop.run_until_complete(asyncio.gather(*task))[0]
return data
# 同步登录获得session
def login(self, username, password, **kwargs):
headers = {
'User-Agent': UserAgent().random,
}
self.session = requests.post()
pass
# 异步爬虫主程序
async def main(self, url=None):
# session = self.login(username, password)
# 将登录获得session的cookie添加至异步会话中
cookies = requests.utils.dict_from_cookiejar(self.session.cookies)
async with aiohttp.ClientSession(cookies=cookies) as session:
data = await self.post(url, session)
# 嵌套异步请求
await self.get_data(session)
return data
# 异步方法获得数据
async def get_data(self, session=None):
# 往task中添加发起请求函数
task = [asyncio.ensure_future(function=None)]
loop = asyncio.get_event_loop()
data = loop.run_until_complete(asyncio.gather(*task))
return data
# 定义aiohttp异步get和post方法
async def get(self, url, session, headers=None):
async with session.get(url, headers=headers) as r:
return await r.text()
async def post(self, url, session, headers=None, json=None):
async with session.post(url, headers=headers, json=json) as r:
if r.status == 200:
return await r.json()
else:
# print(r.status, r.text())
return {'code': r.status, 'msg': r}
异步协程爬虫模板
最新推荐文章于 2024-04-29 09:14:59 发布