selenium爬虫示例爬取号角瓦cn赛区从今日起比赛赛程

import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import os

# 修改保存路径为当前文件夹
excel_file_path = 'C:\\Users\\valorant_25.xlsx'
# 创建 Chrome WebDriver 实例
time.sleep(3) 
driver = webdriver.Edge()
time.sleep(3) 
# 打开百度首页
# 打开页面
driver.get("https://web.haojiao.cc/wiki/schedule/t2Ud5pOQlscKLbRC")

# 最大化浏览器窗口
driver.maximize_window()
time.sleep(3)  # 等待页面完全加载
time.sleep(1)
botton_select_saiqu_cn = driver.find_element(By.XPATH, "/html/body/div[1]/div/div/div[2]/div/div[2]/div/div[1]/div/div/div[1]/div[2]/span[3]")
time.sleep(1)
botton_select_saiqu_cn.click()
time.sleep(1)
botton_select_saiqu_Pacific = driver.find_element(By.XPATH, "/html/body/div[1]/div/div/div[2]/div/div[2]/div/div[1]/div/div/div[1]/div[2]/span[4]")
time.sleep(1)
botton_select_saiqu_Pacific.click()
time.sleep(1)
botton_select_saiqu_EMEA = driver.find_element(By.XPATH, "/html/body/div[1]/div/div/div[2]/div/div[2]/div/div[1]/div/div/div[1]/div[2]/span[5]")
time.sleep(1)
botton_select_saiqu_EMEA.click()
time.sleep(1)
botton_select_saiqu_America = driver.find_element(By.XPATH, "/html/body/div[1]/div/div/div[2]/div/div[2]/div/div[1]/div/div/div[1]/div[2]/span[6]")
time.sleep(1)
botton_select_saiqu_America.click()
time.sleep(1)
botton_select_dengji = driver.find_element(By.XPATH, "/html/body/div[1]/div/div/div[2]/div/div[2]/div/div[1]/div/div/div[2]/div[2]/span[2]")
time.sleep(1)
botton_select_dengji.click()
time.sleep(1)
botton_select_zhuangtai = driver.find_element(By.XPATH, "/html/body/div[1]/div/div/div[2]/div/div[2]/div/div[1]/div/div/div[3]/div[2]/span[1]")
time.sleep(1)
botton_select_zhuangtai.click()
time.sleep(1)
# 获取页面高度
# 获取滚动容器
scroll_container = driver.find_element(By.ID, 'wiki_schedule_scroll_list')

# 获取容器高度
last_height = driver.execute_script("return arguments[0].scrollHeight", scroll_container)

while True:
    # 滚动到底部
    driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scroll_container)
    time.sleep(2)  # 等待新内容加载
    
    # 计算新的容器高度
    new_height = driver.execute_script("return arguments[0].scrollHeight", scroll_container)
    
    # 如果容器高度没有变化,说明已经滚动到底部
    if new_height == last_height:
        break
    last_height = new_height

# 获取所有比赛日期的容器
date_containers = scroll_container.find_elements(By.CLASS_NAME, '_1M0NY._1Y3Y6')

data = []
for container in date_containers:
    # 获取日期
    date_element = container.find_element(By.CLASS_NAME, '_2m1xv')
    date = date_element.text
    
    # 获取该日期下的所有比赛
    matches = container.find_elements(By.CLASS_NAME, '_1-ZM3')
    for match in matches:
        # 获取比赛时间
        time_element = match.find_element(By.CLASS_NAME, '_23Pc7')
        match_time = time_element.text
        
        # 获取比赛队伍
        teams = match.find_elements(By.CLASS_NAME, '_2iA0F')
        home_team = teams[0].text
        away_team = teams[1].text

        #获取赛区
        area = match.find_element(By.CLASS_NAME, '_3Vlq1')
        area1 = area.text
        # 添加到数据列表
        data.append([date, match_time, home_team, away_team,area1])

# 创建DataFrame
df = pd.DataFrame(data, columns=['日期', '时间', '主场方', '客场方','赛区'])
df.to_excel(excel_file_path, index=False)

driver.quit()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值