爬取今日头条的热点榜单

from playwright.sync_api import Playwright, sync_playwright, expect
import re
with sync_playwright() as playwright:

    browser = playwright.chromium.launch(executable_path='C:/Program Files/Google/Chrome/Application/chrome.exe', headless=False)
    context = browser.new_context()
    page = context.new_page()
    page.goto("https://www.toutiao.com/")

    count=1 #计数器 换一换
    count1=0
    #数据处理
    with open("热点.txt", "a+", encoding="utf-8") as f:
        f.write("\n以下数据来自今日头条\n")
    while count1<50:
        # 等待动态内容加载
        page.wait_for_timeout(1000)
        page.wait_for_selector(
            '#root > div > div.main-content > div.right-container > div:nth-child(4) > div > div > div.ttp-hot-board > ol > li:nth-child(10) > a')
        page_source = page.content()
       # print(page_source)

        r=r'<li><a aria-label="(?P<content>.*?)"'
        obj=re.compile(r)
        result=obj.finditer(page_source)

        for i in result:
             #print(i.group("content"))
             hot=i.group("content")
             with open("热点.txt","a+",encoding="utf-8") as f:
                 f.write(f"{count}:{hot}\n")
             count+=1
             count1+=1
        if count>1 and count%10==1:
            page.get_by_role("button", name="换一换", exact=True).click()
            #print("=======")

print("写入成功")

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值