1.本章利用python爬取2019-nCoV确诊数据并制作pyecharts可视化地图;
2.主要内容为绘制出中国各省疫情数据,疫情数据从四个维度进行可视化展示:累积确诊人数、现存确诊人数、治愈人数、死亡人数。鼠标移动到省份上方,则展示出相关数据;地图上添加的图例,用颜色深浅和界定确诊人数的数量范围直观反应各省份疫情现状况,点击图例的颜色框可以让图上的对应区域颜色显现或消失。
3.上图为丁香园数据疫情图,我们可以从腾讯新闻网站搜索相关疫情数据,附上疫情数据接口:
https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5
打开疫情数据接口链接如下图所示:**
4.密密麻麻的数据我们用Jupyter Notebook分析下疫情数据
请求疫情数据,代码如下,结果如图所示:
from fake_useragent import UserAgent
import requests
import json
ua = UserAgent()
headers = {
'User-Agent':ua.chrome
}
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
response = requests.get(url,headers)
list_json = json.loads(response.text)
print(type(list_json))
list_json
5.获取疫情数据里的’data‘数据,
代码如下,结果如图所示:
global false,null,true
false = null = true = ""
jo = list_json['data']
type(jo)
6.将获取到的’data‘字符串数据转换成字典类型,方便数据操作;
代码如下,结果如图所示:
data = eval(jo)
data
7.获取数据如下图所示
8.获取各个省份确诊人数的信息
9.疫情数据从四个维度进行可视化展示:累积确诊人数、现存确诊人数、治愈人数、死亡人数。
图中为北京疫情数据:
累积确诊人数(confirm)、
现存确诊人数(nowConfirm)、
治愈人数(heal)、
死亡人数(dead);
10.获取并打印数据
代码如下,结果如图所示:
print(data['areaTree'][0]['children'][0]['name'])
print(data['areaTree'][0]['children'][0]['total']['nowConfirm'])
print(data['areaTree'][0]['children'][0]['total']['confirm'])
print(data ['areaTree'][0]['children'][0]['total']['dead'])
print(data['areaTree'][0]['children'][0]['total']['heal'])
11.整理合并各个省份确诊人数的信息:
代码如下,结果如图所示:
# 地区
area = []
# 现存确诊
nowConfirm = []
# 累积确诊
confirm = []
# 死亡人数
dead = []
# 治愈人数
heal = []
for i in range(34):
# 获取添加中国各个省份名称
area.append(data['areaTree'][0]['children'][i]['name'])
# 获取添加中国各个省份现存确诊人数
nowConfirm.append(data['areaTree'][0]['children'][i]['total']['nowConfirm'])
# 获取添加中国各个省份累积确诊人数
confirm.append(data['areaTree'][0]['children'][i]['total']['confirm'])
# 获取添加中国各个省份死亡人数
dead.append(data['areaTree'][0]['children'][i]['total']['dead'])
# 获取添加中国各个省份治愈人数
heal.append(data['areaTree'][0]['children'][i]['total']['heal'])
# 将数据封装成[‘北京’,[325,923,9,589]]这样的形式方便于数据可视化
data_pair = []
for i in range(34):
x = []
# 添加该省的累积确诊人数
x.append(confirm[i])
# 添加该省的死亡人数
x.append(dead[i])
# 添加该省的治愈人数
x.append(heal[i])
# 添加该省的现存确诊人数
x.append(nowConfirm[i])
# 将中国各省疫情数据添加到data_pair数组里
data_pair.append(x)
testv = []
for i in range(34):
# 向中国各省与该省疫情数据匹配
testMap = [area[i],data_pair[i]]
#获取整理各个省份确诊人数的信息
testv.append(testMap)
lastUpdateTime = data['lastUpdateTime']
print(lastUpdateTime)
testv
12(桥黑板!!!重点!)
画图:获取各个省份确诊人数的信息以及最后更新的时间并制作pyecharts可视化地图
代码如下,结果如图所示:
from pyecharts import options as opts
from pyecharts.charts import Map
from pyecharts.commons.utils import JsCode
import os
c=(
Map()
#主要方法,用于添加图表的数据和设置各种配置项
.add(
#这个参数必须有,必须写,哪怕你传递一个空字符串,也要写,因为不写这个参数,会报错
series_name = "",
## 数据项
data_pair = testv,
# mapType显示地图范围,为china,显示中国地图
maptype = "china",
# 去掉地图标识小红点
is_map_symbol_show=False,
# 是否显示标签
label_opts =opts.LabelOpts(is_show = True),
)
#设置全局配置项
.set_global_opts(
title_opts = opts.TitleOpts(
title="2020中国疫情地图",
subtitle = "XinXinM"+"\n\n\n全国各省份确诊人数分布\n\n截至: "+lastUpdateTime,
),
visualmap_opts = opts.VisualMapOpts(
# 是否为分段型
is_piecewise=True,
# 自定义的每一段的范围,以及每一段的文字,以及每一段的特别的样式
pieces= [
# 不指定 max,表示 max 为无限大(Infinity)。
{"max": 0,"label":"0","color":"#ffffff"},
{"min": 1, "max": 10,"color":"#ebb4a8"},
{"min": 10, "max": 100,"color":"#e09694"},
{"min": 100, "max": 500,"color":"#cb8382"},
{"min": 500, "max": 1000,"color":"#b27372"},
{"min": 1000, "color":"#976461"},
],
# 是否反转 visualMap 组件
is_inverse=True,
# visualMap 组件离容器右侧的距离
pos_right='right',
),
tooltip_opts=opts.TooltipOpts(
# 提示框浮层的背景颜色。
background_color='white',
# 提示框浮层的边框宽。
border_width=1,
# 文字样式配置项,参考 `series_options.TextStyleOpts`
textstyle_opts=opts.TextStyleOpts(color='#00C791'),
# 回调函数,回调函数格式:
# (params: Object|Array) => string
# 参数 params 是 formatter 需要的单个数据集。
formatter=(JsCode(
"""
function(params){
return params.name + ' : '
+ '<br/>'
+ '现存确诊:'+params.data.value[3]
+ '<br/>'
+ '累积确诊:'+params.data.value[0]
+ '<br/>'
+ '死亡人数:'+params.data.value[1]
+ '<br/>'
+ '治愈人数:'+params.data.value[2];
}
"""
)
)
)
)
# 默认将会在根目录下生成一个 render.html 的文件,支持 path 参数,设置文件保存位置
.render("China_2019-nCov_map.html"),
)
#system函数可以将字符串转化成命令在服务器上运行,其会创建一个子进程在系统上执行命令行,子进程的执行结果无法影响主进程;
os.system("China_2019-nCov_map.html")
完整代码:
from fake_useragent import UserAgent
import requests
import json
from pyecharts import options as opts
from pyecharts.charts import Map
from pyecharts.commons.utils import JsCode
import os
class nCov_2019:
# 伪造随机的User-Agent
ua = UserAgent()
def __init__(self):
# 伪造随机的User-Agent
ua = UserAgent()
# 设置了一个headers,在构建request时传入,在请求时,就加入了headers传送,服务器若识别了是浏览器发来的请求,就会得到响应。
self.headers = {
# 伪造Chrome浏览器用户代理
'User-Agent':ua.chrome
}
# 腾讯新闻网站疫情数据接口
self.url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
# 解析接口数据
def parse_url(self):
response = requests.get(self.url,headers=self.headers)
# 将请求到的数据转化为字典类型
list_json = json.loads(response.text)
# 返回接口数据信息
return list_json
def getDateList(self,list_json):
#将data数据类型str,转成dict类型 方便操作数据
# 当字典中,有键值为True或则False这种bool类型时,使用eval会报错NameError: name 'false' is not defined
global false,null,true
false = null = true = ""
jo = list_json['data']
# 将jo字符串数据转换为dict类型方便数据操作
data = eval(jo)
return data
def main(self):
list_json = self.parse_url()
data = self.getDateList(list_json)
return data
nCov_2019 = nCov_2019()
data = nCov_2019.main()
# 数据最新更新时间
lastUpdateTime = data['lastUpdateTime']
# 地区
area = []
# 现存确诊
nowConfirm = []
# 累积确诊
confirm = []
# 死亡人数
dead = []
# 治愈人数
heal = []
for i in range(34):
# 获取添加中国各个省份名称
area.append(data['areaTree'][0]['children'][i]['name'])
# 获取添加中国各个省份现存确诊人数
nowConfirm.append(data['areaTree'][0]['children'][i]['total']['nowConfirm'])
# 获取添加中国各个省份累积确诊人数
confirm.append(data['areaTree'][0]['children'][i]['total']['confirm'])
# 获取添加中国各个省份死亡人数
dead.append(data['areaTree'][0]['children'][i]['total']['dead'])
# 获取添加中国各个省份治愈人数
heal.append(data['areaTree'][0]['children'][i]['total']['heal'])
# 将数据封装成[‘北京’,[325,923,9,589]]这样的形式方便于数据可视化
data_pair = []
for i in range(34):
x = []
# 添加该省的累积确诊人数
x.append(confirm[i])
# 添加该省的死亡人数
x.append(dead[i])
# 添加该省的治愈人数
x.append(heal[i])
# 添加该省的现存确诊人数
x.append(nowConfirm[i])
# 将中国各省疫情数据添加到data_pair数组里
data_pair.append(x)
testv = []
for i in range(34):
# 向中国各省与该省疫情数据匹配
testMap = [area[i],data_pair[i]]
#获取整理各个省份确诊人数的信息
testv.append(testMap)
c=(
Map()
#主要方法,用于添加图表的数据和设置各种配置项
.add(
#这个参数必须有,必须写,哪怕你传递一个空字符串,也要写,因为不写这个参数,会报错
series_name = "",
## 数据项
data_pair = testv,
# mapType显示地图范围,为china,显示中国地图
maptype = "china",
# 去掉地图标识小红点
is_map_symbol_show=False,
# 是否显示标签
label_opts =opts.LabelOpts(is_show = True),
)
#设置全局配置项
.set_global_opts(
title_opts = opts.TitleOpts(
title="2020中国疫情地图",
subtitle = "XinXinM"+"\n\n\n全国各省份确诊人数分布\n\n截至: "+lastUpdateTime,
),
visualmap_opts = opts.VisualMapOpts(
# 是否为分段型
is_piecewise=True,
# 自定义的每一段的范围,以及每一段的文字,以及每一段的特别的样式
pieces= [
# 不指定 max,表示 max 为无限大(Infinity)。
{"max": 0,"label":"0","color":"#ffffff"},
{"min": 1, "max": 10,"color":"#ebb4a8"},
{"min": 10, "max": 100,"color":"#e09694"},
{"min": 100, "max": 500,"color":"#cb8382"},
{"min": 500, "max": 1000,"color":"#b27372"},
{"min": 1000, "color":"#976461"},
],
# 是否反转 visualMap 组件
is_inverse=True,
# visualMap 组件离容器右侧的距离
pos_right='right',
),
tooltip_opts=opts.TooltipOpts(
# 提示框浮层的背景颜色。
background_color='white',
# 提示框浮层的边框宽。
border_width=1,
# 文字样式配置项,参考 `series_options.TextStyleOpts`
textstyle_opts=opts.TextStyleOpts(color='#00C791'),
# 回调函数,回调函数格式:
# (params: Object|Array) => string
# 参数 params 是 formatter 需要的单个数据集。
formatter=(JsCode(
"""
function(params){
return params.name + ' : '
+ '<br/>'
+ '现存确诊:'+params.data.value[3]
+ '<br/>'
+ '累积确诊:'+params.data.value[0]
+ '<br/>'
+ '死亡人数:'+params.data.value[1]
+ '<br/>'
+ '治愈人数:'+params.data.value[2];
}
"""
)
)
)
)
# 默认将会在根目录下生成一个 render.html 的文件,支持 path 参数,设置文件保存位置
.render("China_2019-nCov_map.html"),
)
#system函数可以将字符串转化成命令在服务器上运行,其会创建一个子进程在系统上执行命令行,子进程的执行结果无法影响主进程;
os.system("China_2019-nCov_map.html")
生成的"China_2019-nCov_map.html"代码:
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Awesome-pyecharts</title>
<script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts.min.js"></script>
<script type="text/javascript" src="https://assets.pyecharts.org/assets/maps/china.js"></script>
</head>
<body>
<div id="486804f5cd794d05b9ce1fcdcef941b8" class="chart-container" style="width:900px; height:500px;"></div>
<script>
var chart_486804f5cd794d05b9ce1fcdcef941b8 = echarts.init(
document.getElementById('486804f5cd794d05b9ce1fcdcef941b8'), 'white', {renderer: 'canvas'});
var option_486804f5cd794d05b9ce1fcdcef941b8 = {
"animation": true,
"animationThreshold": 2000,
"animationDuration": 1000,
"animationEasing": "cubicOut",
"animationDelay": 0,
"animationDurationUpdate": 300,
"animationEasingUpdate": "cubicOut",
"animationDelayUpdate": 0,
"color": [
"#c23531",
"#2f4554",
"#61a0a8",
"#d48265",
"#749f83",
"#ca8622",
"#bda29a",
"#6e7074",
"#546570",
"#c4ccd3",
"#f05b72",
"#ef5b9c",
"#f47920",
"#905a3d",
"#fab27b",
"#2a5caa",
"#444693",
"#726930",
"#b2d235",
"#6d8346",
"#ac6767",
"#1d953f",
"#6950a1",
"#918597"
],
"series": [
{
"type": "map",
"label": {
"show": true,
"position": "top",
"margin": 8
},
"mapType": "china",
"data": [
{
"name": "\u5317\u4eac",
"value": [
928,
9,
595,
324
]
},
{
"name": "\u9999\u6e2f",
"value": [
1268,
7,
1156,
105
]
},
{
"name": "\u4e0a\u6d77",
"value": [
716,
7,
681,
28
]
},
{
"name": "\u56db\u5ddd",
"value": [
596,
3,
581,
12
]
},
{
"name": "\u7518\u8083",
"value": [
167,
2,
154,
11
]
},
{
"name": "\u9655\u897f",
"value": [
320,
3,
311,
6
]
},
{
"name": "\u6cb3\u5317",
"value": [
349,
6,
337,
6
]
},
{
"name": "\u8fbd\u5b81",
"value": [
156,
2,
150,
4
]
},
{
"name": "\u5e7f\u4e1c",
"value": [
1643,
8,
1631,
4
]
},
{
"name": "\u53f0\u6e7e",
"value": [
449,
7,
438,
4
]
},
{
"name": "\u5929\u6d25",
"value": [
199,
3,
194,
2
]
},
{
"name": "\u91cd\u5e86",
"value": [
582,
6,
574,
2
]
},
{
"name": "\u798f\u5efa",
"value": [
363,
1,
360,
2
]
},
{
"name": "\u6fb3\u95e8",
"value": [
46,
0,
45,
1
]
},
{
"name": "\u6d59\u6c5f",
"value": [
1269,
1,
1267,
1
]
},
{
"name": "\u5185\u8499\u53e4",
"value": [
239,
1,
237,
1
]
},
{
"name": "\u4e91\u5357",
"value": [
186,
2,
183,
1
]
},
{
"name": "\u6c5f\u82cf",
"value": [
654,
0,
653,
1
]
},
{
"name": "\u9ed1\u9f99\u6c5f",
"value": [
947,
13,
934,
0
]
},
{
"name": "\u897f\u85cf",
"value": [
1,
0,
1,
0
]
},
{
"name": "\u5e7f\u897f",
"value": [
254,
2,
252,
0
]
},
{
"name": "\u65b0\u7586",
"value": [
76,
3,
73,
0
]
},
{
"name": "\u5409\u6797",
"value": [
155,
2,
153,
0
]
},
{
"name": "\u5c71\u897f",
"value": [
198,
0,
198,
0
]
},
{
"name": "\u6e56\u5317",
"value": [
68135,
4512,
63623,
0
]
},
{
"name": "\u6e56\u5357",
"value": [
1019,
4,
1015,
0
]
},
{
"name": "\u6cb3\u5357",
"value": [
1276,
22,
1254,
0
]
},
{
"name": "\u5b81\u590f",
"value": [
75,
0,
75,
0
]
},
{
"name": "\u8d35\u5dde",
"value": [
147,
2,
145,
0
]
},
{
"name": "\u6d77\u5357",
"value": [
171,
6,
165,
0
]
},
{
"name": "\u9752\u6d77",
"value": [
18,
0,
18,
0
]
},
{
"name": "\u5b89\u5fbd",
"value": [
991,
6,
985,
0
]
},
{
"name": "\u5c71\u4e1c",
"value": [
792,
7,
785,
0
]
},
{
"name": "\u6c5f\u897f",
"value": [
932,
1,
931,
0
]
}
],
"roam": true,
"zoom": 1,
"showLegendSymbol": false,
"emphasis": {}
}
],
"legend": [
{
"data": [
""
],
"selected": {
"": true
},
"show": true,
"padding": 5,
"itemGap": 10,
"itemWidth": 25,
"itemHeight": 14
}
],
"tooltip": {
"show": true,
"trigger": "item",
"triggerOn": "mousemove|click",
"axisPointer": {
"type": "line"
},
"formatter": function(params){ return params.name + ' : ' + '<br/>' + '\u73b0\u5b58\u786e\u8bca\uff1a'+params.data.value[3] + '<br/>' + '\u7d2f\u79ef\u786e\u8bca\uff1a'+params.data.value[0] + '<br/>' + '\u6b7b\u4ea1\u4eba\u6570\uff1a'+params.data.value[1] + '<br/>' + '\u6cbb\u6108\u4eba\u6570\uff1a'+params.data.value[2]; } ,
"textStyle": {
"color": "#00C791"
},
"backgroundColor": "white",
"borderWidth": 1
},
"title": [
{
"text": "2020\u4e2d\u56fd\u75ab\u60c5\u5730\u56fe",
"subtext": "XinXinM\n\n\n\u5168\u56fd\u5404\u7701\u4efd\u786e\u8bca\u4eba\u6570\u5206\u5e03\n\n\u622a\u81f3: 2020-07-05 19:41:18",
"padding": 5,
"itemGap": 10
}
],
"visualMap": {
"show": true,
"type": "piecewise",
"min": 0,
"max": 100,
"inRange": {
"color": [
"#50a3ba",
"#eac763",
"#d94e5d"
]
},
"calculable": true,
"inverse": true,
"splitNumber": 5,
"orient": "vertical",
"right": "right",
"showLabel": true,
"itemWidth": 20,
"itemHeight": 14,
"borderWidth": 0,
"pieces": [
{
"max": 0,
"label": "0",
"color": "#ffffff"
},
{
"min": 1,
"max": 10,
"color": "#ebb4a8"
},
{
"min": 10,
"max": 100,
"color": "#e09694"
},
{
"min": 100,
"max": 500,
"color": "#cb8382"
},
{
"min": 500,
"max": 1000,
"color": "#b27372"
},
{
"min": 1000,
"color": "#976461"
}
]
}
};
chart_486804f5cd794d05b9ce1fcdcef941b8.setOption(option_486804f5cd794d05b9ce1fcdcef941b8);
</script>
</body>
</html>
结果如图所示,爬取2019-nCoV确诊数据并制作pyecharts可视化地图与丁香园疫情图数据相符合:
可点击地图旁边的图例,点击颜色框框选择是否要在地图该区域范围渲染上颜色: