1.以命令窗口启动chrome浏览器,选择远程连接的端口为9222
chrome.exe --disable-infobars --remote-debugging-port=9222 --user-data-dir="设置路径"
--disable-infobars 表示关闭提示
2. 启动之后chromium通过http://localhost:9222/json得到调试信息,chrome通过http://127.0.0.1:9222/json/version 得到调试信息,
其中webSocketDebuggerUrl为pyppeteer连接的ws地址。
页面显示为
{ "Browser": "Chrome/78.0.3904.70", "Protocol-Version": "1.3", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36", "V8-Version": "7.8.279.17", "WebKit-Version": "537.36 (@edb9c9f3de0247fd912a77b7f6cae7447f6d3ad5)", "webSocketDebuggerUrl": "ws://127.0.0.1:9222/devtools/browser/8fc97fd6-a7dd-4ff2-b760-3f6b25b7419b" }
3.案例
import asyncio
import random
from pyppeteer.launcher import connect
useragents=['Mozilla/5.0 (Windows NT 6.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36','Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36','Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36','Mozilla/5.0 (Windows NT 6.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36','Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36']
async def main(url):
connect_params={
'browserWSEndpoint': 'ws://127.0.0.1:9222/devtools/browser/8fc97fd6-a7dd-4ff2-b760-3f6b25b7419b',
'logLevel':3,
}
browser = await connect(connect_params)
page = await browser.newPage() # 启动个新的浏览器页面
await page.setUserAgent(random.choice(useragents))
#设置页面超时时间
page.setDefaultNavigationTimeout(1000*60) #60s
#启用js
await page.setJavaScriptEnabled(True)
await page.setViewport({'width':1300,'height':750}) #设置界面
try:
await page.goto(url) # 访问登录页面
except Exception as e1:
print('e1:',e1)
await page.close()
await browser.close()
return
if __name__ == '__main__':
loop = asyncio.get_event_loop()
url = 'https://www.baidu.com'
m = main(url)
loop.run_until_complete(m)
#方式2
from anti_useragent import UserAgent ua=UserAgent(platform='windows',min_version=80) import asyncio from pyppeteer.launcher import connect import json async def main(): port=9222 options = {'browserURL': f'http://127.0.0.1:{port}/json/version'} browser = await connect(options=options) page = await browser.newPage() # 启动个新的浏览器页面 page.setDefaultNavigationTimeout(1000 * 60) # 60s await page.setUserAgent(ua.chrome) await page.goto('https://www.baidu.com') loop = asyncio.get_event_loop() loop.run_until_complete(main())