python 爬取中国天气网城市温度数据

先是爬取代码

代码功能都很简单,只是想做为以后模板copy

import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

ALL_DATA =  []

#网页的解析函数
def parse_page(url):
    headers = {'User-Agent': 'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebkit/537.36 (KHTML, like Geoko) Chrome/70.0.3538.102 safari/537.36'}
    response = requests.get(url,headers = headers)
    text = response.content.decode('utf-8')
    soup = BeautifulSoup(text, 'html5lib')
    conMidtab = soup.find('div',class_='conMidtab')
    tables = conMidtab.find_all('table')
    #查看是否拿到每个城市的天气
    for table in tables: 
        trs = table.find_all('tr')[2:]
        for index,tr in enumerate(trs):
            tds = tr.find_all('td')
            city_td = tds[0]
            if index == 0:
                city_td = tds[1]
            city = list(city_td.stripped_strings)[0] #获取标签里面的字符串属性返回一个生成器,转化为列表
            temp_td = tds[-2]
            min_temp = list(temp_td.stripped_strings)[0]
            ALL_DATA.append({'城市':city,'最低气温':int(min_temp)})
			
def main():
   #预设需要请求的url
    urls = [
        'http://www.weather.com.cn/textFC/hb.shtml',
        'http://www.weather.com.cn/textFC/db.shtml',
        'http://www.weather.com.cn/textFC/hz.shtml',
        'http://www.weather.com.cn/textFC/hn.shtml',
        'http://www.weather.com.cn/textFC/hd.shtml',
        'http://www.weather.com.cn/textFC/xb.shtml',
        'http://www.weather.com.cn/textFC/xn.shtml',
        'http://www.weather.com.cn/textFC/gat.shtml'
    ]
    for url in urls:
        parse_page(url)
    #分析数据,根据最低气温进行排序
    ALL_DATA.sort(key=lambda data: data['最低气温'])
    data = ALL_DATA[0:10] #取出前10的最低气温及其城市
    return data

if __name__ == '__main__':
    datas = main()
    city = []
    temp = []
    for data in datas:
        city.append(data['城市'])
        temp.append(data['最低气温'])
    plt.bar(range(len(city)), temp, tick_label=city)#绘制柱状图
    plt.show()#显示图表

这是简单数据可视化代码

import pandas as pd
import matplotlib.pyplot as plt
from pyecharts import options as opts
from pyecharts.charts import Bar

path = './china_city_AQI.csv'
path_AQI_top = './china_city_AQI_top10.csv'
path_AQI_tail = './china_city_AQI_tail10.csv'

data_AQI = pd.read_csv(path)
#最大值
AQI_max=data_AQI['AQI'].max()
#最小值
AQI_min=data_AQI['AQI'].min()
#平均值
AQI_mean=data_AQI['AQI'].mean()

AQI_top=data_AQI.sort_values(by='AQI')[:10]
AQI_top.to_csv(path_AQI_top)

AQI_tail=data_AQI.sort_values(by='AQI',ascending=False)[:10]
AQI_tail.to_csv(path_AQI_tail)

def matplotlib_top50():
    city = []
    temp = []
    AQI_top = data_AQI.sort_values(by='AQI')[:50]
    for index,data in AQI_top.iterrows():
        city.append(data['City'])
        temp.append(data['AQI'])
    plt.bar(range(len(AQI_top)), temp, tick_label=city)#绘制柱状图
    plt.show()#显示图表
    
def pyecharts_top50() -> Bar:
    AQI_top = data_AQI.sort_values(by='AQI')[:50]
    city = []
    temp = []
    for index,data in AQI_top.iterrows():
        city.append(data['City'])
        temp.append(data['AQI'])
    c = (
        Bar()
        .add_xaxis(city)
        .add_yaxis("中国AQI", temp)
        .set_global_opts(title_opts=opts.TitleOpts(title="城市AQI值top50"))
        .set_series_opts(
            label_opts=opts.LabelOpts(is_show=False),
            markline_opts=opts.MarkLineOpts(
                data=[
                    opts.MarkLineItem(type_="min", name="最小值"),
                    opts.MarkLineItem(type_="max", name="最大值"),
                    opts.MarkLineItem(type_="average", name="平均值"),
                ]
        )
    )
    )
    return c
if __name__ == '__main__':
    pyecharts_top50().render()

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值