基于大数据爬虫的全国空气质量指数实时数据可视化

from lxml import etree
import urllib
import urllib.request
import xlwt
import pandas as pd
from pyecharts import Geo
import matplotlib.pyplot as plt
import matplotlib as mpl

def getpage(url):
    req = urllib.request.Request(url)
    req.add_header('User-Agent', "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36")     #添加自己的用户代理
    data = urllib.request.urlopen(req).read().decode("gbk")
    return data

def getdata(data):
    AQI = []
    start = 0
    html = etree.HTML(data)
    infor = html.xpath('//li[@id!="tr-fixed"]//text()')   #利用xpath解析路径
    while True:
        if start < len(infor):
            AQI.append(infor[start:start+5])
            start = start + 5
        else:
            break
    return AQI

#写入excel表格
def writeExcel(AQI):
    f = xlwt.Workbook()
    sheet1 = f.add_sheet('The AQI', cell_overwrite_ok=True)
    rowTitle = ['order', 'city', 'province', 'AQI', 'quality']
    for i in range(len(rowTitle)):
        sheet1.write(0, i, rowTitle[i])
    for j in range(len(AQI)):
        for k in range(len(AQI[j])):
            sheet1.write(j+1, k, AQI[j][k])
    f.save("E:\\python\\aqi.xls")

if __name__ == "__main__":
    url = "http://tianqi.2345.com/air-rank.htm"
    data = getpage(url)
    AQI = getdata(data)
    writeExcel(AQI)
    city = []; value = []
    fbook = pd.DataFrame(pd.read_excel("E:\\python\\aqi.xls", 0))
    for each in fbook['city']:
        city.append(str(each))
    for each in fbook['AQI']:
        value.append(each)
    for order, quality in zip(fbook['order'], fbook['quality']):
        if quality == "中度污染":
            index = order-1
            break
    geo = Geo("全国空气质量指数", "Data from AQI", title_color="#fff", width=1000, height=600, \
              background_color='#404a59')
    geo.add("空气质量指数", city, value, visual_range=[1, 60], maptype='china', type='effectScatter', \
            visual_text_color="#fff", effect_scale=5, symbol_size=15, is_visualmap=True, is_random=True, is_roam=False)
    geo.render(path="全国空气质量指数.html")
    fig = plt.figure()
    font = {"size": 15}
    plt.rcParams['xtick.direction'] = 'in'
    plt.rcParams['ytick.direction'] = 'in'
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_linewidth(2)  #设置底部坐标轴的粗细
    ax.spines['left'].set_linewidth(2)    #设置左边坐标轴的粗细
    ax.spines['right'].set_linewidth(2)   #设置右边坐标轴的粗细
    ax.spines['top'].set_linewidth(2)     #设置上部坐标轴的粗细
    rects = ax.bar(range(len(city)-index), value[index:], width=0.08, tick_label=city[index:], color='m')
    plt.tick_params(labelsize=15)
    for rect in rects:
        height = rect.get_height();
        ax.text(rect.get_width()/2.0+rect.get_x()-0.04,height,height)
    plt.xlabel('城市', font)
    plt.ylabel('空气质量指数', font)
    plt.title("中度污染、重度污染、严重污染城市", font)
    mpl.rcParams["font.sans-serif"] = ["KaiTi"]
    mpl.rcParams["axes.unicode_minus"] = False
    plt.show()

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
爬取到的大数据如下所示:
在这里插入图片描述

评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值