#用pyppeteer库登陆,结合requests的快速爬虫。
import asyncio
import pyppeteer as pyp
import bs4
import requests
import re
def sessionGetHtml(session,url): #发送带session的网页请求
fakeHeaders = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 Edg/81.0.416.77' } # 伪装浏览器用的请求头
try:
result = session.get(url,headers = fakeHeaders)
result.encoding = result.apparent_encoding
return result.text
except Exception as e:
print(e)
return ""
async def makeSession(page): # 返回一个session,将其内部cookies修改成pypeteer浏览器页面对象中的cookies
cookies = await page.cookies() #cookies是一个列表,每个元素都是一个字典
cookies1 = {}
for cookie in cookie