import requests
from lxml import etree
import csv
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Bar, Timeline
class TQ:
def __init__(self):
self.weather_info = []
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Referer': 'https://lishi.tianqi.com/beijing/202302.html',
}
def req(self, year=2023):
'''
获取北京某年所有月份的天气
:param year:
:return:
'''
data_html = []
for i in range(1, 13):
month = str(i).zfill(2)
# print(month)
url = f'https://lishi.tianqi.com/beijing/{year}{month}.html'
res = requests.get(url, headers=self.headers).text
data_html.append(res)
return data_html
def get_data(self, text):
'''
拿到天气数据
:param text:
:return:
'''
html = etree.HTML(text)
'''获取所有天气数据'''
all_data = html.xpath('//ul[@class="thrui"]//li')
for value in all_data:
day_wather_info = {}
date_info = value.xpath('.//div//text()')
day_wather_info['日期'] = date_info[0].split(' ')[0]
day_wather_info['最高气温'] = date_info[1]
day_wather_info['最低气温'] = date_info[2]
day_wather_info['天气'] = date_info[3]
self.weather_info.append(day_wather_info)
def data_write(self, filename='wetather.csv'):
'''
天气数据写入csv
:param data:
:return:
'''
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
# 写入列名
key_list = ["日期", "最高气温", "最低气温", "天气"]
csv_f = csv.DictWriter(csvfile, fieldnames=key_list)
# 写入所有数据
csv_f.writeheader() # 设置表头
csv_f.writerows(self.weather_info)
def draw_weather(self, origin_filename, result_file):
# 画柱状图
df = pd.read_csv(origin_filename, encoding='utf-8')
df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x))
df['month'] = df['日期'].dt.month
df_egg = df.groupby(['month', '天气']).size().reset_index()
df_egg.columns = ['month', 'tianqi', 'count']
timeline = Timeline()
timeline.add_schema(play_interval=1000, is_auto_play=True)
for month in df_egg['month'].unique():
data = (df_egg[df_egg['month'] == month][['tianqi', 'count']]
.sort_values(by='count', ascending=True)
.values.tolist())
bar = Bar()
bar.set_series_opts(label_opts=opts.LabelOpts(position='right'))
bar.set_global_opts(title_opts=opts.TitleOpts(title='2023年天气'), tooltip_opts=opts.TooltipOpts(
is_show=True, trigger="axis", axis_pointer_type="shadow"))
bar.add_xaxis([x[0] for x in data])
bar.add_yaxis('', [x[1] for x in data])
bar.reversal_axis()
timeline.add(bar, str(month))
timeline.render(result_file)
def main(self, year='2023'):
#主函数调用
text_html = self.req(year)
for text in text_html:
self.get_data(text)
origin_filename = f'{year}_wetather.csv'
result_filename = f'{year}_wetather.html'
self.data_write(origin_filename)
self.draw_weather(origin_filename, result_filename)
if __name__ == '__main__':
tq = TQ()
tq.main()
python 爬虫实践-天气数据
最新推荐文章于 2024-07-22 15:15:29 发布