天气爬取程序

以下是911天气爬取的程序
爬取内容包括时间,图片,天气,温度,湿度,风力,风级,降水量,体感温度,云量,不过最近911没数据了,于是又换了天气爬取网站,对应文章:https://blog.csdn.net/weixin_43128028/article/details/108391587

import requests
from  bs4 import BeautifulSoup
from collections import defaultdict
from dateutil.relativedelta import relativedelta
from datetime import datetime


class weather_data:
    def __init__(self,city,start_year,end_year,start_month=1,end_month = 12):
        """

        :param city: 需爬取的城市全拼
        :param start_year: 爬取开始年份
        :param end_year: 爬取结束年份
        :param start_month: 爬取开始月份
        :param end_month: 爬取结束月份
        """
        self.city = city
        self.start_time = datetime.strptime(f"{start_year}-{start_month}",'%Y-%m')
        self.end_time = datetime.strptime(f"{end_year}-{end_month}",'%Y-%m')

    def _get_original_html(self):
        """
            网页爬取
        """

        url = f"https://tianqi.911cha.com/{self.city}/{self.start_time.year}-{self.start_time.month}.html"
        print(url)
        header = {
            "User-Agent": "……"}#填写自己浏览器内容
        response = requests.get(url, headers=header)
        return response.content.decode("utf-8")

    def _parse_data(self):
        #一次解析一个月
        soup = BeautifulSoup(self.html,"html.parser")
        data = defaultdict(dict)
        for n, tr in enumerate(soup.find_all("tr")):
            if n == 0:
                continue

            if n%2!=0:
                date = tr.find("a").get_text()
                #创建日期字典
                #[时间,图片,天气,温度,湿度,风力,风级,降水量,体感温度,云量]
                data[date]["Day"] = {str(self.start_time.year)+'-'+key:con.get_text() for key,con in zip(['time','image','weather','temperature','humidity','wind_force','wind_scale',
                                                                 'precipitation','sendible_temperature','cloud_amount'],tr.find_all("td"))}

            else:
                data[date]["Night"] = {key: con.get_text() for key, con in zip(
                    ['time', 'image', 'weather', 'temperature', 'humidity', 'wind_force', 'wind_scale',
                     'precipitation', 'sendible_temperature', 'cloud_amount'], tr.find_all("td"))}
        return data

    def main(self):

        data = []
        while self.start_time<=self.end_time:
            self.html = self._get_original_html()
            data.append(self._parse_data())
            self.start_time+=relativedelta(months=1)

        return data



if __name__ == "__main__":
    T = weather_data(city="jinan",start_year=2017,end_year=2020,start_month=1,end_month=2)
    with open('weather_dict.txt','w',encoding='UTF-8') as f:
        for line in T.main():
            f.writelines(str(line))
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值