playwright下载及基本使用
1. 下载playwright库
pip install playwright==1.33.0
playwright install
2. playwright使用
2.1导入库
from playwright.sync_api import sync_playwright
2.2 基本使用
- 启动引擎,实例化对象
browser = playwright.chromium.launch( headless=False,channel='chrome')
- 创建上下文
context
context = browser.new_context()
- 创建页面对象
page= context.new_page()
- 页面请求
page.goto('https://www.baidu.com/')
- 页面关闭
context.close() browser.close()
- 禁止加载图片
import re # 禁止加载图片,加速加载 def cancel_request(route): route.abort() page.route(re.compile(r"(.png)|(.jpg)"), cancel_request)
- 窗口最大化
- 设置参数
args=['--start-maximized']
和no_viewport=True
with sync_playwright() as playwright: browser = playwright.chromium.launch( headless=False, args=['--start-maximized'], # 窗口大小打开最大 ) # 启动引擎,实例化对象 context = browser.new_context(no_viewport=True) #显示最大窗口 page = context.new_page() page.goto(url) context.close() browser.close()
- 设置参数
- 请求百度总代码
from playwright.sync_api import sync_playwright url='https://www.baidu.com/' with sync_playwright() as playwright: browser = playwright.chromium.launch( headless=True, # 无头 args=['--start-maximized'], # 窗口大小打开最大 channel='chrome' # 使用google浏览器打开 ) # 启动引擎,实例化对象 # no_viewport=True显示最大窗口 context = browser.new_context(no_viewport=True) # 需要创建一个 context,这是一个新的概念,不同于selenium,以后更为方便管理 cookies 和 ip代理 page = context.new_page() page.goto(url) context.close() browser.close()
3.XPATH元素定位方法
- xpath教程·
3.1 xpath定位语法
- 点位xpath语句前加
xpath=
即可input_xpath = 'xpath= //input[@id="su"]'
3.2 相关操作
-
点击元素
page.locator(input_xpath).click()
-
获取元素属性
attribute1 = page.get_attribute(input_xpath, 'value') # 获取元素value值 attribute2= page.get_attribute(input_xpath, 'href') # 获取元素href值 attribute3 = page.get_attribute(input_xpath, 'src') # 获取元素src值 attribute4 = page.get_attribute(input_xpath, 'id') # 获取元素id值
-
获取元素文本
text1=page.locator(ul).all_inner_texts() # 获取所有元素列表 text2=page.locator(ul).inner_text() # 所有元素字符串,所有文本变为一整个字符串
4. 等待及缓存
4.1 等待操作
- 强制等待
import time time.sleep(n)
- playwright强制等待
page.wait_for_timeout(1000) # 单位ms
- 设置访问最长等待时长
- 请求操作设置时长
page.goto(url, timeout=1000) # 单位ms
- 点击操作设置时长
page.locator(href_xpath).click(timeout=1000) # 单位ms
- 请求操作设置时长
4.2 添加缓存
- 添加缓存,提高加载速度
from playwright.sync_api import Playwright, sync_playwright def run(playwright: Playwright) -> None: browser = playwright.firefox.launch_persistent_context(headless=False, args=['--start-maximized'], user_data_dir=os.path.join(root_path, 'user_data'),# 保存缓存地址 accept_downloads=True, slow_mo=250) page = browser.new_page() page.goto('https://www.baidu.com/') browser.close() with sync_playwright() as playwright: run(playwright)