利用Python获取国内疫情数据并进行数据可视化
数据收集
- 利用request获取国内疫情相关数据
- 利用josn模块获取解析HTML数据,并保存为json格式文件
import requests
import json
china_url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
headers = {
'referer':
'https://news.qq.com/zt2020/page/feiyan.htm'
'user-agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
response = requests.get(url=china_url, headers=headers).json()
print(type(response))
data = json.loads(response['data'])
with open('./国内疫情数据.json', 'w', encoding='utf-8') as f:
f.write(json.dumps(data, ensure_ascii=False, indent=2))
<class 'dict'>
获取我们需要的数据
- 分别获取国家、省、市名称及其相应的数据
- 利用pandas转化为dateframe数据格式
- 拆解today和total两个字段
- 保存为Excel格式文件
import pandas as pd
from openpyxl import load_workbook
with open('./国内疫情数据.json', 'r', encoding='utf-8') as f:
data = f.read()
type(data)
data = json.loads(data)
type(data)
ChinaArea = data["areaTree"][0]
provincearea = ChinaArea['children']
city_list = []
for x in range(len(provincearea)):
province = provincearea[x]['name']
cityarea = provincearea[x]['children']
for y in range(len(cityarea)):
city = cityarea[y]['name']
today = cityarea[y]['today']
total = cityarea[y]['total']
city_dict = {'province': province, 'city': city,
'today': today, 'total': total}
city_list.append(city_dict)
df = pd.DataFrame(city_list)
confirmlist = []
suspectlist = []
deadlist = []
deadratelist = []
heallist = []
healratelist = []
for value in df['total'].values.tolist():
confirmlist.append(value['confirm'])
suspectlist.append(value['suspect'])
deadlist.append(value['dead'])
deadratelist.append(value['deadRate'])
heallist.append(value['heal'])
healratelist.append(value['healRate'])
df['confirm'] = confirmlist
df['suspect'] = suspectlist
df['dead'] = deadlist
df['deadrate'] = deadratelist
df['heal'] = heallist
df['healrate'] = healratelist
today_confirmlist = []
today_confirmcutslist = []
for value in df['today'].values.tolist():
today_confirmlist.append(value['confirm'])
today_confirmcutslist.append(value['confirmCuts'])
df['today_confirm'] = today_confirmlist
df['today_confirmCuts'] = today_confirmcutslist
df.drop(['total', 'today'], axis=1, inplace=True)
df
book = load_workbook('国内疫情.xlsx')
writer = pd.ExcelWriter('国内疫情.xlsx', engine='openpyxl')
writer.book = book
'''writer.sheets语句主要是确保每次运行时把数据写到已有的相应sheets表格中,如果原有表格没有就创建新的
表格,如果取消此语句就会导致每次运行都会生成一个新的sheet表格来存放数据 '''
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
lastUpdateTime = data['lastUpdateTime']
sheet_name = lastUpdateTime[:lastUpdateTime.find(' ')]
df.to_excel(writer, index=False,
sheet_name=sheet_name)
writer.save()
writer.close()
数据分析和可视化
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
plt.rcParams['font.family'] = 'SimHei'
plt.figure(figsize=(12, 15), dpi=80)
province_confirm = df.groupby(by='province')['confirm'].sum().sort_values()
province_heal = df.groupby(by='province')['heal'].sum().sort_values()
index=np.arange(len(province_heal))
bar_width=0.4
plt.barh(index,province_confirm.values.tolist(), height=bar_width, color='b',label='累计确诊人数')
plt.barh(index+bar_width+0.1,province_heal.values.tolist(), height=bar_width, color='r',label='累计治愈人数')
plt.yticks(index+(bar_width+0.1)/2,province_heal.index)
plt.title('国内疫情累计确诊和治愈人数')
plt.xlabel('人数')
plt.ylabel('省份')
plt.ylim(0,34)
plt.legend(loc='best')
plt.show()
使用pyecharts数据可视化-map
from pyecharts import options as opts
import pandas as pd
from pyecharts.charts import Map
data = pd.read_excel('./国内疫情.xlsx', sheet_name=sheet_name)
data_groupby = data.groupby(by=['province'], as_index=False).sum()
data_groupby
data_groupby_list = list(
zip(data_groupby['province'].values.tolist(),
data_groupby['confirm'].values.tolist()))
data_groupby_list
def china_map():
c=(
Map()
.add(series_name='确诊病例',data_pair=data_groupby_list,maptype='china')
.set_global_opts(
title_opts=opts.TitleOpts(title='疫情地图'),visualmap_opts=opts.VisualMapOpts(is_piecewise=True,
pieces=[{'max':9,'min':0,'label':'0-9','color':'#FFE4E1'},
{'max':99,'min':10,'label':'10-99','color':'#FF7F50'},
{'max':499,'min':100,'label':'100-499','color':'#F08080'},
{'max':999,'min':500,'label':'500-999','color':'#CD5C5C'},
{'max':9999,'min':1000,'label':'1000-9999','color':'#990000'},
{'max':99999,'min':10000,'label':'>10000','color':'#660000'},
])
)
)
return c
d_map =china_map()
d_map.render('国内疫情确诊地图.html')
d_map.render_notebook()
<div id="99085df893a346e1973efbeca526ce2d" style="width:900px; height:500px;"></div>