Selenium爬取动态网页

基于静态网页爬取进行修改,爬取动态网页

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import re

def fetch_weather_data(url):
    # 配置Chrome选项
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # 以无头模式运行
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")

    # 启动Chrome浏览器
    service = Service('path/to/chromedriver')  
    driver = webdriver.Chrome(service=service, options=chrome_options)

    try:
        # 访问目标URL
        driver.get(url)
        time.sleep(5)  # 等待页面加载完成

        weather_data = []

        # 查找天气信息
        weather_wraps = driver.find_elements(By.CLASS_NAME, 'weatherWrap')
        for wrap in weather_wraps:
            date = wrap.find_element(By.CLASS_NAME, 'date').text.strip()
            weather_desc = wrap.find_element(By.CLASS_NAME, 'desc').text.strip()
            wind_direction = wrap.find_element(By.CLASS_NAME, 'windd').text.strip()
            wind_volume = wrap.find_element(By.CLASS_NAME, 'winds').text.strip()
            temperature_tags = wrap.find_elements(By.CSS_SELECTOR, 'div[class*="tmp.tmp_lte"]')
            if len(temperature_tags) >= 2:
                high_temp = temperature_tags[0].text.strip()
                low_temp = temperature_tags[1].text.strip()
            else:
                high_temp = 'N/A'
                low_temp = 'N/A'
            weather_info = {
                'date': date,
                'weather': weather_desc,
                'wind_direction': wind_direction,
                'wind_volume': wind_volume,
                'high_temp': high_temp,
                'low_temp': low_temp
            }
            weather_data.append(weather_info)
    finally:
        driver.quit()

    return weather_data

def save_to_file(data, filename):
    with open(filename, 'w', encoding='utf-8') as file:
        for item in data:
            file.write(f"{item}\n")

if __name__ == "__main__":
    weather_url = "http://www.nmc.cn/publish/forecast/ASH/pudong.html"  
    
    weather_data = fetch_weather_data(weather_url)
    
    if weather_data:
        save_to_file(weather_data, 'weather.txt')
        print("Weather data has been saved to weather.txt")
        for data in weather_data:
            print(data)
    else:
        print("No weather data found or failed to retrieve the page.")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值