from playwright.sync_api import Playwright, sync_playwright, expect
import re
with sync_playwright() as playwright:
browser = playwright.chromium.launch(executable_path='C:/Program Files/Google/Chrome/Application/chrome.exe', headless=False)
context = browser.new_context()
page = context.new_page()
page.goto("https://www.toutiao.com/")
count=1 #计数器 换一换
count1=0
#数据处理
with open("热点.txt", "a+", encoding="utf-8") as f:
f.write("\n以下数据来自今日头条\n")
while count1<50:
# 等待动态内容加载
page.wait_for_timeout(1000)
page.wait_for_selector(
'#root > div > div.main-content > div.right-container > div:nth-child(4) > div > div > div.ttp-hot-board > ol > li:nth-child(10) > a')
page_source = page.content()
# print(page_source)
r=r'<li><a aria-label="(?P<content>.*?)"'
obj=re.compile(r)
result=obj.finditer(page_source)
for i in result:
#print(i.group("content"))
hot=i.group("content")
with open("热点.txt","a+",encoding="utf-8") as f:
f.write(f"{count}:{hot}\n")
count+=1
count1+=1
if count>1 and count%10==1:
page.get_by_role("button", name="换一换", exact=True).click()
#print("=======")
print("写入成功")
爬取今日头条的热点榜单
最新推荐文章于 2025-03-30 10:00:32 发布