1. 自动等待
1.1 Click Action
- Attached to DOM
- Visible, Stable & Enabled
- Receive Pointer Events
Example:
from playwright.sync_api import sync_playwright
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=500
)
page = browser.new_page()
page.goto("https://bootswatch.com/default")
link = page.locator("a.dropdown-item").first
link.click()
browser.close()
Run,将会出现如下报错信息:
from playwright.sync_api import sync_playwright
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=500
)
page = browser.new_page()
page.goto("https://bootswatch.com/default")
link = page.locator("a.dropdown-item").first
link.click(timeout=2_000)
browser.close()
Run,将会出现如下报错信息:
Run,将会出现如下报错信息:
2. 页面访问事件
page.goto(
"https://bootswatch.com/default",
wait_until='load'
# wait_until='domcontentloaded'
# wait_until='commit'
# wait_until='networkidle'
)
domcontentloaded和load的比较:
load:html,图片,document等都加载完成
domcontentloaded:html加载完成,不会等待图片,icon等资源加载完毕
commit:请求有响应就认为加载完成
networkidle:网络空闲即认为页面加载完成
3. 自定义等待
Example:
初始页面,点击‘2015’:
出现页面加载图标
页面加载完成
from playwright.sync_api import sync_playwright
from time import perf_counter
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=500
)
page = browser.new_page()
page.goto(
"https://www.scrapethissite.com/pages/ajax-javascript/",
# wait_until='load'
# wait_until='domcontentloaded'
)
link = page.get_by_role("link", name="2015")
link.click()
print("Loading oscars for 2015...")
start = perf_counter()
first_table_data = page.locator("td.film-title").first
first_table_data.wait_for()
time_taken = perf_counter() - start
print(f"...movies are loaded, in {round(time_taken, 2)}s!")
browser.close()
上述代码可以做如下修改:
# first_table_data = page.locator("td.film-title").first
# first_table_data.wait_for()
page.wait_for_selector(selector="td.film-title")
4. 事件监听
Example 1:
from playwright.sync_api import sync_playwright
from time import perf_counter
def on_load(page):
print("Page loaded:", page)
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=500
)
page = browser.new_page()
# register an event
page.on("load", on_load)
page.goto("https://bootswatch.com/default")
browser.close()
我们也可以监听domcontentloaded,close,response,request等。
注意,如果page.on(“load”, on_load)在page.goto(“https://bootswatch.com/default”)之后,那么什么也不会发生。
如果不想继续监听,可以用remove_listener();如果只想监听一次,可以用once()。
from playwright.sync_api import sync_playwright
def on_load(page):
print("Page loaded:", page)
def on_request(request):
print("Request sent:", request)
def on_filechooser(file_chooser):
print("File chooser opened")
file_chooser.set_files("file.txt")
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=500
)
page = browser.new_page()
page.on("load", on_load)
# page.on("request", on_request)
page.on("filechooser", on_filechooser)
# 如果只想监听一次
# page.once("load", on_load)
page.goto("https://bootswatch.com/default")
file_input = page.get_by_label("Default file input example")
file_input.click()
# 如果不想继续监听了
page.remove_listener("load", on_load)
browser.close()
Example 2:
from playwright.sync_api import sync_playwright
from time import perf_counter
def on_load(page):
print("Page loaded:", page)
def on_request(request):
print("Request sent:", request)
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=500
)
page = browser.new_page()
# page.on("load", on_load)
page.on("request", on_request)
page.goto("https://bootswatch.com/default")
browser.close()
Example 3:
from playwright.sync_api import sync_playwright
from time import perf_counter
def on_load(page):
print("Page loaded:", page)
def on_request(request):
print("Request sent:", request)
def on_filechooser(file_chooser):
print("File chooser opened")
file_chooser.set_files("file.txt")
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=500
)
page = browser.new_page()
# page.on("load", on_load)
# page.on("request", on_request)
page.on("filechooser", on_filechooser)
page.goto("https://bootswatch.com/default")
file_input = page.get_by_label("Default file input example")
file_input.click()
browser.close()
5. 对话框处理
5.1 Alert
from playwright.sync_api import sync_playwright
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=1000
)
page = browser.new_page()
page.goto("https://testpages.herokuapp.com/styled/alerts/alert-test.html")
# alert box
alert_btn = page.get_by_text("Show alert box")
alert_btn.click()
browser.close()
5.2 Confirm
from playwright.sync_api import sync_playwright
def on_dialog(dialog):
print("Dialog opened:", dialog)
dialog.accept()
# dialog.dismiss()
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=1000
)
page = browser.new_page()
page.goto("https://testpages.herokuapp.com/styled/alerts/alert-test.html")
page.on("dialog", on_dialog)
# confirm box
confirm_btn = page.get_by_text("Show confirm box")
confirm_btn.click()
browser.close()
5.3 Prompt
from playwright.sync_api import sync_playwright
def on_dialog(dialog):
print("Dialog opened:", dialog)
dialog.accept("Playwright is cool")
# dialog.dismiss()
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=1000
)
page = browser.new_page()
page.goto("https://testpages.herokuapp.com/styled/alerts/alert-test.html")
page.on("dialog", on_dialog)
# prompt box
prompt_btn = page.get_by_text("Show prompt box")
prompt_btn.click()
browser.close()
6. 下载文件
from playwright.sync_api import sync_playwright
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=200
)
page = browser.new_page()
page.goto("https://unsplash.com/photos/skyscraper-covered-with-fog-at-daytime-OrwkD-iWgqg")
btn = page.get_by_role("link", name="Download free")
with page.expect_download() as download_info:
btn.click()
download = download_info.value
download.save_as("pink.jpg")
browser.close()
from playwright.sync_api import sync_playwright
def on_download(download):
print("Download received!")
download.save_as("yellow.jpg")
with sync_playwright() as playwright:
browser = playwright.chromium.launch(
headless=False,slow_mo=200
)
page = browser.new_page()
page.goto("https://unsplash.com/photos/a-close-up-of-a-bunch-of-orange-flowers--aNEDY_t-VM")
page.on("download", on_download)
btn = page.get_by_role("link", name="Download free")
with page.expect_download() as download_info:
btn.click()
# download = download_info.values
# download.save_as("pink.jpg")
browser.close()
7. Sync和Async
Sync:request之间有前后关系,一个执行完成之后执行另一个
Async:request之间没有任何关系,可以多个一起执行
8. Async Playwright
import asyncio
from playwright.async_api import async_playwright
async def main():
async with async_playwright() as playwright:
browser = await playwright.chromium.launch(headless=False)
page = await browser.new_page()
await page.goto("https://playwright.dev")
print(await page.title())
await browser.close()
asyncio.run(main())