python爬虫天气

import os
import re
import csv
import time
import datetime
import requests

from bs4 import BeautifulSoup
from user_agent import make_headers

import matplotlib.pyplot as plt


class WeatherCrawler:
    year = datetime.date.today().year # 获取当前年月
    month = datetime.date.today().month
    plt.rcParams['font.family'] = ['sans-serif']  # plt展示中文字体
    plt.rcParams['font.sans-serif'] = ['SimSun']

    def __init__(self, city: str, path: str, year=year, month=month, ALL=False, SAVE_CSV=True, SAVE_FIG=True,
                 SHOW_WEATHER=False, year_start=2011, month_start=1):
        self.base_url = "http://lishi.tianqi.com/" 
        self.city = city  # 城市名 需传参
        self.path = path    # 文件夹名 需传参
        self.ALL = ALL  # 如果ALL是False表示只获取单月天气否则从起止日期到截至日期全部
        self.SAVE_CSV = SAVE_CSV  # 保存CSV,默认True
        self.SAVE_FIG = SAVE_FIG  # 保存PNG,默认False
        self.SHOW_WEATHER = SHOW_WEATHER  # 展示天气,默认不展示
        self.year_start = year_start  # 开始年份
        self.month_start = month_start  # 开始月份
        self.year = year  # 截至年 默认当前年月
        self.month = month  # 截至月
        self.url_list = self.create_url_list()  # 所有网址列表
        self.save_path = self.path + "/" + self.city + "_weather" # 文件名
        self.create_file()    # 创建文件(夹)

        self.days = []    # 存日期
        self.max_ts = []    # 存最高温
        self.min_ts = []    #存最低温
        self.weathers = []    # 存天气
        self.winds = []    # 存风向
        self.wind_ls = []    #存风级

        self.length = None  # 图的长度
    
    # 制做网址
    def format_url(self, year, month):
        month_str = str(month) if month > 9 else f"0{month}"
        url = self.base_url + '/' + self.city + '/' + str(year) + month_str + ".html"
        return url
    # 生成文件
    def create_file(self):
        if not os.path.exists(self.path):
            os.makedirs(self.path)
        with open(self.save_path + ".csv", "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(["日期", "星期", "最高温", "最低温", "天气", "风向", "风级"])
    # 储存文件
    def save_data(self, data):
        with open(self.save_path + ".csv", "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(data)
    # 创建网址列表
    def create_url_list(self):
        url_list = []
        if self.ALL:
            for year in range(self.year_start, self.year):
                for month in range(1, 13):
                    url = self.format_url(year, month)
                    url_list.append(url)
            for month in range(1, self.month + 1):
                url = self.format_url(self.year, month)
                url_list.append(url)
        else:
            url = self.format_url(self.year, self.month)
            url_list.append(url)
        return url_list
    # 获取网址
    def request_url(self, url):
        headers = make_headers()
        resp = requests.get(url, headers=headers).text
        self.handle_request(resp)
    # 画图
    def draw_fig(self):
        year = self.year
        month = self.month
        if self.ALL:
            title = f"{self.city} {self.year_start}-{self.month_start}至{year}-{month}天气变化图"
        else:
            title = f"{self.city} {year}-{month}天气变化图"
        plt.figure(figsize=(self.length, 12))
        plt.xlabel('日期')
        plt.xticks(rotation=90)
        plt.ylabel('温度')
        plt.title(title)
        ax = plt.subplot()
        ax.spines['bottom'].set_position(('data', 0))  # x轴设为0基准
        plt.plot(self.days, self.max_ts, '-', label='最高温', color='blue', linewidth=1.0)
        plt.plot(self.days, self.min_ts, '-', label='最低温', color='red', linewidth=1.0)
        if self.ALL:
            ticks = [i for i in self.days if i.split("-")[-1] == "01"]
            plt.xticks(ticks=ticks, rotation=90)
        if self.SHOW_WEATHER:
            plt.scatter(self.days, self.max_ts)
            for i in range(len(self.days)):
                y = self.max_ts[i]
                yp = y
                if i > 1:
                    yp = self.max_ts[i - 1]
                if y > yp:
                    c = 1
                else:
                    c = -1
                plt.annotate(self.weathers[i], xy=(self.days[i], y
                                                   ), xytext=(self.days[i], y + c))

        plt.legend(loc='upper right')
        plt.savefig(self.save_path + ".png", dpi=300)
    # 处理请求
    def handle_request(self, resp):
        soup = BeautifulSoup(resp, "html.parser")
        thrui = soup.find("ul", attrs={"class": "thrui"})
        day_weathers = thrui.find_all("li")
        for day_weather in day_weathers:
            data = day_weather.text.split('\n')
            date = data[1].split(" ")[0]
            week = data[1].split(" ")[1]
            max_t = data[2].split("℃")[0]
            min_t = data[3].split("℃")[0]
            weather = data[4]
            wind_from = data[5].split(" ")[0]
            wind_l = data[5].split(" ")[1]
            day = (date, week, max_t, min_t, weather, wind_from, wind_l)

            self.days.append(date)
            self.max_ts.append(int(max_t))
            self.min_ts.append(int(min_t))
            self.weathers.append(weather)
            self.winds.append(wind_from)
            self.wind_ls.append(wind_l)

            self.save_data(day)
    # 执行
    def run(self):
        total = 0
        n = len(self.url_list)
        self.length = n * 4 if n > 1 else 10

        for i in range(len(self.url_list)):
            url = self.url_list[i]
            count = 1
            date = re.findall(r'\d+', url)
            print(f"正在请求: {date[0]}的天气状况!", end="")
            while True:
                try:
                    self.request_url(url)
                    total += 1
                    break
                except Exception as err:
                    count += 1
                    print("请求失败,", end="")
                    if count == 5:
                        print("已跳过!", end="")
                        self.length -= 1
                        break
                    else:
                        print(f"正在尝试第{count}次;", end="")
                        time.sleep(1)
            print()
        print(f"已完成!总计{total}个月份记录!")
        self.draw_fig() if self.SAVE_FIG else None


if __name__ == '__main__':
    save_path = "weather"
    Weather = WeatherCrawler("huhehaote", save_path,  # 下面都有默认值,可不写
                             year_start=2023, month_start=1, year=2024, month=1, ALL=True,
                             SAVE_FIG=True, SHOW_WEATHER=True)
    Weather.run()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值