Selenium爬取动态网页

yuwu.

已于 2024-06-26 14:40:46 修改

阅读量261

点赞数 3

文章标签： selenium 测试工具

于 2024-06-26 14:28:16 首次发布

本文链接：https://blog.csdn.net/m0_66302373/article/details/139988334

版权

基于静态网页爬取进行修改，爬取动态网页

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re

def fetch_weather_data(url):
    # 配置Chrome选项
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # 以无头模式运行
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")

    # 启动Chrome浏览器
    service = Service('path/to/chromedriver')  
    driver = webdriver.Chrome(service=service, options=chrome_options)

    try:
        # 访问目标URL
        driver.get(url)
        time.sleep(5)  # 等待页面加载完成

        weather_data = []

        # 查找天气信息
        weather_wraps = driver.find_elements(By.CLASS_NAME, 'weatherWrap')
        for wrap in weather_wraps:
            date = wrap.find_element(By.CLASS_NAME, 'date').text.strip()
            weather_desc = wrap.find_element(By.CLASS_NAME, 'desc').text.strip()
            wind_direction = wrap.find_element(By.CLASS_NAME, 'windd').text.strip()
            wind_volume = wrap.find_element(By.CLASS_NAME, 'winds').text.strip()
            temperature_tags = wrap.find_elements(By.CSS_SELECTOR, 'div[class*="tmp.tmp_lte"]')
            if len(temperature_tags) >= 2:
                high_temp = temperature_tags[0].text.strip()
                low_temp = temperature_tags[1].text.strip()
            else:
                high_temp = 'N/A'
                low_temp = 'N/A'
            weather_info = {
                'date': date,
                'weather': weather_desc,
                'wind_direction': wind_direction,
                'wind_volume': wind_volume,
                'high_temp': high_temp,
                'low_temp': low_temp
            }
            weather_data.append(weather_info)
    finally:
        driver.quit()

    return weather_data

def save_to_file(data, filename):
    with open(filename, 'w', encoding='utf-8') as file:
        for item in data:
            file.write(f"{item}\n")

if __name__ == "__main__":
    weather_url = "http://www.nmc.cn/publish/forecast/ASH/pudong.html"  
    
    weather_data = fetch_weather_data(weather_url)
    
    if weather_data:
        save_to_file(weather_data, 'weather.txt')
        print("Weather data has been saved to weather.txt")
        for data in weather_data:
            print(data)
    else:
        print("No weather data found or failed to retrieve the page.")