使用Python爬取腾讯新闻疫情数据,并使用pyecharts可视化,绘制增长人数地图、柱状图、折线图。
文章目录
1.分析网页
- 通过腾讯新闻公布的数据进行爬取
- 网址:https://news.qq.com/zt2020/page/feiyan.htm#/
- 对于静态网页,我们只需要把网页地址栏中的url传到get请求中就可以轻松地获取到网页的数据。
2.导入模块
import time
import json
import requests
from datetime import datetime
import pandas as pd
import numpy as np
3.抓取数据
def Domestic():
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
reponse = requests.get(url=url).json()
data = json.loads(reponse['data'])
return data
domestic = Domestic()
4.提取数据并写入Excel
areaTree = domestic['areaTree']
china_data = areaTree[0]['children']
china_list = []
list_province=[] #柱状图
list_nowConfirm=[] #柱状图
list_conrirm=[] #累计确证
for a in range(len(china_data)):
province = china_data[a]['name']
confirm = china_data[a]['total']['confirm']
heal = china_data[a]['total']['heal']
dead = china_data[a]['total']['dead']
nowConfirm = confirm - heal - dead
china_dict = {}
china_dict['province'] = province
china_dict['nowConfirm'] = nowConfirm
china_dict['confirm']=confirm
china_list.append(china_dict)
list_province.append(province)
list_nowConfirm.append(nowConfirm)
list_conrirm.append(confirm)
china_data = pd.DataFrame(china_list)
china_data.to_excel('疫情数据.xlsx',index=False) #存储为EXCEL文件
china_data.head()
5.国内各地区现有确诊人数地图
import pyecharts.options as opts
from pyecharts.charts import Map
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
m = Map()
m.add("", [
list(z)
for z in zip(list(china_data["province"]), list(china_data["nowConfirm"]))
],
maptype="china",
is_map_symbol_show=False)
m.set_global_opts(
title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数地图"),
visualmap_opts=opts.VisualMapOpts(
is_piecewise=True,
pieces=[
{
"min": 5000,
"label": '>5000',
"color": "#893448"
}, # 不指定 max,表示 max 为无限大
{
"min": 1000,
"max": 4999,
"label": '1000-4999',
"color": "#ff585e"
},
{
"min": 500,
"max": 999,
"label": '500-1000',
"color": "#fb8146"
},
{
"min": 101,
"max": 499,
"label": '101-499',
"color": "#ffA500"
},
{
"min": 10,
"max": 100,
"label": '10-100',
"color": "#ffb248"
},
{
"min": 1,
"max": 9,
"label": '1-9',
"color": "#fff2d1"
},
{
"max": 1,
"label": '0',
"color": "#ffffff"
}
]))
m.render("地图.html")
6.国内各地区现有确诊人数柱状图
from pyecharts import options as opts
from pyecharts.charts import Bar
bar = (
Bar()
.add_xaxis(list_province)
.add_yaxis("柱状图", list_nowConfirm)
.set_global_opts(
title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数树状图"),
yaxis_opts=opts.AxisOpts(name="现有确证病例"),
xaxis_opts=opts.AxisOpts(name="地区"),
datazoom_opts=opts.DataZoomOpts(type_="slider")
)
)
bar.render("柱状图.html")
7.国内各地区现有确诊人数折线图
import pyecharts.options as opts
from pyecharts.charts import Line
line=(
Line()
.add_xaxis(xaxis_data=list_province)
.add_yaxis(
series_name="折线图",
y_axis=list_nowConfirm,
is_connect_nones=True
)
.set_global_opts(title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数折线图"),
yaxis_opts=opts.AxisOpts(name="现有确证病例"),
xaxis_opts=opts.AxisOpts(name="地区"),
datazoom_opts=opts.DataZoomOpts(type_="slider")
)
)
line.render("折线图.html")
8.国内各地区累计确诊人数地图
```python
```python
```python
```python
import pyecharts.options as opts
from pyecharts.charts import Map
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
m1 = Map()
m1.add("", [
list(z)
for z in zip(list(china_data["province"]), list(china_data["nowConfirm"]))
],
maptype="china",
is_map_symbol_show=False)
m1.set_global_opts(
title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数地图"),
visualmap_opts=opts.VisualMapOpts(
is_piecewise=True,
pieces=[
{
"min": 5000,
"label": '>5000',
"color": "#893448"
}, # 不指定 max,表示 max 为无限大
{
"min": 1000,
"max": 4999,
"label": '1000-4999',
"color": "#ff585e"
},
{
"min": 500,
"max": 999,
"label": '500-1000',
"color": "#fb8146"
},
{
"min": 101,
"max": 499,
"label": '101-499',
"color": "#ffA500"
},
{
"min": 10,
"max": 100,
"label": '10-100',
"color": "#ffb248"
},
{
"min": 1,
"max": 9,
"label": '1-9',
"color": "#fff2d1"
},
{
"max": 1,
"label": '0',
"color": "#ffffff"
}
]))
m1.render("地图.html")
9.国内各地区累计确诊人数柱状图
from pyecharts import options as opts
from pyecharts.charts import Bar
bar = (
Bar()
.add_xaxis(list_province)
.add_yaxis("累计确诊柱状图", list_conrirm)
.set_global_opts(
title_opts=opts.TitleOpts(title="COVID-19中国现有地区累计确诊人数树状图"),
yaxis_opts=opts.AxisOpts(name="累计确诊病例"),
xaxis_opts=opts.AxisOpts(name="地区"),
datazoom_opts=opts.DataZoomOpts(type_="slider")
)
)
bar.render("累计确诊柱状图.html")
10.国内各地区累计确诊人数折线图
import pyecharts.options as opts
from pyecharts.charts import Line
line=(
Line()
.add_xaxis(xaxis_data=list_province)
.add_yaxis(
series_name="累计确诊折线图",
y_axis=list_conrirm,
is_connect_nones=True
)
.set_global_opts(title_opts=opts.TitleOpts(title="COVID-19中国地区累计现有确诊人数折线图"),
yaxis_opts=opts.AxisOpts(name="累计确诊病例"),
xaxis_opts=opts.AxisOpts(name="地区"),
datazoom_opts=opts.DataZoomOpts(type_="slider")
)
)
line.render("累计确诊折线图.html")
11.完整代码
import time
import json
import requests
from datetime import datetime
import pandas as pd
import numpy as np
import pyecharts.options as opts
from pyecharts.charts import Map
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
from pyecharts import options as opts
from pyecharts.charts import Bar
import pyecharts.options as opts
from pyecharts.charts import Line
def Domestic():
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
reponse = requests.get(url=url).json()
data = json.loads(reponse['data'])
return data
domestic = Domestic()
areaTree = domestic['areaTree']
china_data = areaTree[0]['children']
china_list = []
list_province=[] #柱状图
list_nowConfirm=[] #柱状图
list_conrirm=[] #累计确证
for a in range(len(china_data)):
province = china_data[a]['name']
confirm = china_data[a]['total']['confirm']
heal = china_data[a]['total']['heal']
dead = china_data[a]['total']['dead']
nowConfirm = confirm - heal - dead
china_dict = {}
china_dict['province'] = province
china_dict['nowConfirm'] = nowConfirm
china_dict['confirm']=confirm
china_list.append(china_dict)
list_province.append(province)
list_nowConfirm.append(nowConfirm)
list_conrirm.append(confirm)
china_data = pd.DataFrame(china_list)
china_data.to_excel('疫情数据.xlsx',index=False) #存储为EXCEL文件
china_data.head()
m = Map()
m.add("", [
list(z)
for z in zip(list(china_data["province"]), list(china_data["nowConfirm"]))
],
maptype="china",
is_map_symbol_show=False)
m.set_global_opts(
title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数地图"),
visualmap_opts=opts.VisualMapOpts(
is_piecewise=True,
pieces=[
{
"min": 5000,
"label": '>5000',
"color": "#893448"
}, # 不指定 max,表示 max 为无限大
{
"min": 1000,
"max": 4999,
"label": '1000-4999',
"color": "#ff585e"
},
{
"min": 500,
"max": 999,
"label": '500-1000',
"color": "#fb8146"
},
{
"min": 101,
"max": 499,
"label": '101-499',
"color": "#ffA500"
},
{
"min": 10,
"max": 100,
"label": '10-100',
"color": "#ffb248"
},
{
"min": 1,
"max": 9,
"label": '1-9',
"color": "#fff2d1"
},
{
"max": 1,
"label": '0',
"color": "#ffffff"
}
]))
m.render("地图.html")
bar = (
Bar()
.add_xaxis(list_province)
.add_yaxis("柱状图", list_nowConfirm)
.set_global_opts(
title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数树状图"),
yaxis_opts=opts.AxisOpts(name="现有确证病例"),
xaxis_opts=opts.AxisOpts(name="地区"),
datazoom_opts=opts.DataZoomOpts(type_="slider")
)
)
bar.render("柱状图.html")
line=(
Line()
.add_xaxis(xaxis_data=list_province)
.add_yaxis(
series_name="折线图",
y_axis=list_nowConfirm,
is_connect_nones=True
)
.set_global_opts(title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数折线图"),
yaxis_opts=opts.AxisOpts(name="现有确诊病例"),
xaxis_opts=opts.AxisOpts(name="地区"),
datazoom_opts=opts.DataZoomOpts(type_="slider")
)
)
line.render("折线图.html")
m1 = Map()
m1.add("", [
list(z)
for z in zip(list(china_data["province"]), list(china_data["confirm"]))
],
maptype="china",
is_map_symbol_show=False)
m1.set_global_opts(
title_opts=opts.TitleOpts(title="COVID-19中国地区累计确诊人数地图"),
visualmap_opts=opts.VisualMapOpts(
is_piecewise=True,
pieces=[
{
"min": 5000,
"label": '>5000',
"color": "#893448"
}, # 不指定 max,表示 max 为无限大
{
"min": 1000,
"max": 4999,
"label": '1000-4999',
"color": "#ff585e"
},
{
"min": 500,
"max": 999,
"label": '500-1000',
"color": "#fb8146"
},
{
"min": 101,
"max": 499,
"label": '101-499',
"color": "#ffA500"
},
{
"min": 10,
"max": 100,
"label": '10-100',
"color": "#ffb248"
},
{
"min": 1,
"max": 9,
"label": '1-9',
"color": "#fff2d1"
},
{
"max": 1,
"label": '0',
"color": "#ffffff"
}
]))
m1.render("累计确诊地图.html")
bar = (
Bar()
.add_xaxis(list_province)
.add_yaxis("累计确诊柱状图", list_conrirm)
.set_global_opts(
title_opts=opts.TitleOpts(title="COVID-19中国地区累计确诊人数树状图"),
yaxis_opts=opts.AxisOpts(name="累计确诊病例"),
xaxis_opts=opts.AxisOpts(name="地区"),
datazoom_opts=opts.DataZoomOpts(type_="slider")
)
)
bar.render("累计确诊柱状图.html")
line=(
Line()
.add_xaxis(xaxis_data=list_province)
.add_yaxis(
series_name="累计确诊折线图",
y_axis=list_conrirm,
is_connect_nones=True
)
.set_global_opts(title_opts=opts.TitleOpts(title="COVID-19中国地区累计现有确诊人数折线图"),
yaxis_opts=opts.AxisOpts(name="累计确诊病例"),
xaxis_opts=opts.AxisOpts(name="地区"),
datazoom_opts=opts.DataZoomOpts(type_="slider")
)
)
line.render("累计确诊折线图.html")