python 爬虫实践-天气数据

最新推荐文章于 2024-07-22 15:15:29 发布

论一个测试的养成

最新推荐文章于 2024-07-22 15:15:29 发布

阅读量94

点赞数 2

文章标签： python 爬虫开发语言

本文链接：https://blog.csdn.net/weixin_44517891/article/details/138116758

版权

import requests
from lxml import etree
import csv
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Bar, Timeline

class TQ:
    def __init__(self):
        self.weather_info = []
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
            'Referer': 'https://lishi.tianqi.com/beijing/202302.html',
        }

    def req(self, year=2023):
        '''
        获取北京某年所有月份的天气
        :param year:
        :return:
        '''
        data_html = []
        for i in range(1, 13):
            month = str(i).zfill(2)
            # print(month)
            url = f'https://lishi.tianqi.com/beijing/{year}{month}.html'
            res = requests.get(url, headers=self.headers).text
            data_html.append(res)
        return data_html

    def get_data(self, text):
        '''
        拿到天气数据
        :param text:
        :return:
        '''
        html = etree.HTML(text)
        '''获取所有天气数据'''
        all_data = html.xpath('//ul[@class="thrui"]//li')
        for value in all_data:
            day_wather_info = {}
            date_info = value.xpath('.//div//text()')
            day_wather_info['日期'] = date_info[0].split(' ')[0]
            day_wather_info['最高气温'] = date_info[1]
            day_wather_info['最低气温'] = date_info[2]
            day_wather_info['天气'] = date_info[3]
            self.weather_info.append(day_wather_info)

    def data_write(self, filename='wetather.csv'):
        '''
        天气数据写入csv
        :param data:
        :return:
        '''
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            # 写入列名
            key_list = ["日期", "最高气温", "最低气温", "天气"]
            csv_f = csv.DictWriter(csvfile, fieldnames=key_list)
            # 写入所有数据
            csv_f.writeheader()  # 设置表头
            csv_f.writerows(self.weather_info)

    def draw_weather(self, origin_filename, result_file):
        # 画柱状图
        df = pd.read_csv(origin_filename, encoding='utf-8')
        df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x))
        df['month'] = df['日期'].dt.month
        df_egg = df.groupby(['month', '天气']).size().reset_index()
        df_egg.columns = ['month', 'tianqi', 'count']
        timeline = Timeline()
        timeline.add_schema(play_interval=1000, is_auto_play=True)
        for month in df_egg['month'].unique():
            data = (df_egg[df_egg['month'] == month][['tianqi', 'count']]
                    .sort_values(by='count', ascending=True)
                    .values.tolist())
            bar = Bar()
            bar.set_series_opts(label_opts=opts.LabelOpts(position='right'))
            bar.set_global_opts(title_opts=opts.TitleOpts(title='2023年天气'), tooltip_opts=opts.TooltipOpts(
                is_show=True, trigger="axis", axis_pointer_type="shadow"))
            bar.add_xaxis([x[0] for x in data])
            bar.add_yaxis('', [x[1] for x in data])
            bar.reversal_axis()
            timeline.add(bar, str(month))
        timeline.render(result_file)

    def main(self, year='2023'):
        #主函数调用
        text_html = self.req(year)
        for text in text_html:
            self.get_data(text)
        origin_filename = f'{year}_wetather.csv'
        result_filename = f'{year}_wetather.html'
        self.data_write(origin_filename)
        self.draw_weather(origin_filename, result_filename)

if __name__ == '__main__':
    tq = TQ()
    tq.main()