https://blog.csdn.net/mouday/article/details/89574718
https://blog.csdn.net/ywdhzxf/article/details/94649327
https://www.jianshu.com/p/fd9eb385a70e scrapy整合
https : //github.com/Python3WebSpider/ScrapyPyppeteer scrapy整合
https://www.cnblogs.com/dyfblog/p/10170959.html 笔记
# -*- coding: utf-8 -*-
import asyncio
from pyppeteer import launch
from pyquery import PyQuery as pq
# 示例一: 渲染页面
async def crawl_page():
brower = await launch({
"headless": False, # 设置模式, 默认无头
'userDataDir': r'C:\DataDir', # 设置缓存目录
'args':
[
'--no-sandbox',
'--disable-infobars', # 隐藏 ‘浏览器正在被监控提示条’
], 'dumpio': True # 防止阻塞
})
# 打开新页面
page = await brower.newPage()
# 设置页面视图大小
await page.setViewport(viewport={'width': 1280, 'height': 800})
# 是否启用JS,enabled设为False,则无渲染效果
await page.setJavaScriptEnabled(enabled