python:疫情数据可视化

数据来源:BlankerL
截止日期:4/10
json数据转 csv

import json
import time
import csv

file = open('DXYArea-TimeSeries.json','r',encoding='utf-8')

infos = json.load(file)
with open('data.csv','a',newline='') as f:
	writer = csv.DictWriter(f,fieldnames=['updateTime','provinceName', 
										'currentConfirmedCount','confirmedCount',
										'suspectedCount','curedCount',
										'deadCount','locationId',
										 'statisticsData','countryName','countryEnglishName'])
										
	writer.writeheader()
	for info in infos:
		 result = dict()
		 result["updateTime"] =  time.strftime("%Y-%m-%d",time.localtime(int(str(info.get("updateTime"))[:-3])))
		 result["provinceName"] = info.get("provinceName")
		 result["currentConfirmedCount"] = info.get("currentConfirmedCount")
		 result["confirmedCount"] = info.get("confirmedCount")
		 result["suspectedCount"] = info.get("confirmedCount")
		 result["curedCount"] = info.get("curedCount")
		 result["deadCount"] = info.get("deadCount")
		 result["locationId"] = info.get("locationId")
		 result["statisticsData"] = info.get("statisticsData")
		 result["countryName"] = info.get("countryName")
		 result["countryEnglishName"] = info.get("countryEnglishName")
		 writer.writerow(result) 

数据拆分中国和世界两部分

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

filename = r'C:\Users\liuhao\Desktop\新建文件夹\DXY-COVID-19-Data-master\json\data.csv'
data = pd.read_csv(filename, sep=',',encoding='utf-8',header=0)
test = data.copy()

test['updateTime'] = test['updateTime'].map(lambda x:x[5:])
# 去掉年份,方便坐标轴数据展示

china = test[test['countryName'] == '中国']
chn_province = china[china['locationId'] != 951001]
chn_province = chn_province.sort_values(['provinceName','confirmedCount','updateTime'],ascending=False)
chn_province_unique = chn_province.drop_duplicates(subset=['provinceName','updateTime'],keep='first',inplace=False)
# 发现采集数据时写入了同一地区当天的不同时间点的数据
# 将数据排序后,去重,保留一天内的最大值
chn_province_unique['currentConfirmedCount'] = chn_province_unique['confirmedCount'] - chn_province_unique['curedCount'] - chn_province_unique['deadCount']
# 这一列部分数据缺失,计算填充

def func(x):
    s = x.replace('省','')
    s = s.replace('市','')
    s = s.replace('自治区','')
    s = s.replace('壮族','')
    s = s.replace('回族','')
    s = s.replace('维吾尔','')
    return s
chn_province_unique['provinceName'] = chn_province_unique['provinceName'].map(func)
# 对城市名清洗,因为地图Map默认不支持这样的后缀

world = test.drop(chn_province.index.values)
world = world.sort_values(['provinceName','confirmedCount','updateTime'],ascending=False)
world_unique = world.drop_duplicates(subset=['provinceName','updateTime'],keep='first',inplace=False)
world_unique['currentConfirmedCount'] = world_unique['confirmedCount'] - world_unique['curedCount'] - world_unique['deadCount']

world_unique.to_csv(r'C:\Users\liuhao\Desktop\新建文件夹\DXY-COVID-19-Data-master\json\world.csv',sep=',',index=0,encoding='utf-8-sig')
chn_province_unique.to_csv(r'C:\Users\liuhao\Desktop\新建文件夹\DXY-COVID-19-Data-master\json\chn_province.csv',sep=',',index=0,encoding='utf-8-sig')

观察数据发现有些地区的时间序列不全,补全完整的时间,地区序列

import pandas as pd
import numpy as np

filename = r'C:\Users\liuhao\Desktop\新建文件夹\DXY-COVID-19-Data-master\json\chn_province.csv'
data = pd.read_csv(filename, sep=',',header=0)
test = data.copy()

def fill_data(test):
	"""这里填充用的遍历循环,数据量大的话有些慢,但没有找到更好的方法"""
	time_series = list(set(test['updateTime'].tolist()))
	city_series = set(test['provinceName'].tolist())
	
	# 遍历每个时间点中未采集到的地区,加入到数据集中
	for i in time_series:
	    c = test['provinceName'][test['updateTime'] == i].values.tolist()
	    diff = list(city_series.difference(set(c)))
	    
	    for j in diff:
	        new = pd.DataFrame({'updateTime':[i],'provinceName':[j]})
	        test = test.append(new,ignore_index=False)
	
	test.sort_values(by=['provinceName','updateTime'],ascending=False,inplace=True)
	# 降序排列
	partfilled = test[test['updateTime']=='01-22'].fillna(0)
	# 将各地区最早时间点填充为0 
	test[test['updateTime']=='01-22'] = partfilled
	# 这里过渡一下,试着直接填充,结果不行
	test.fillna(method = 'bfill',inplace=True)
	# 向后填充,即填充前一天的数据
	return test
	
test.to_csv(r'C:\Users\liuhao\Desktop\新建文件夹\DXY-COVID-19-Data-master\json\chn_province_new.csv',index=0,encoding='utf-8-sig')

pyecharts画图

import pandas as pd
import numpy as np
from pyecharts.charts import Map,Line,Bar,Timeline
import pyecharts.options as opts

filename = r'C:\Users\liuhao\Desktop\新建文件夹\DXY-COVID-19-Data-master\json\chn_province_new.csv'
data = pd.read_csv(filename, sep=',',header=0)

test = data.copy()
test.sort_values(by='updateTime',ascending=True,inplace=True)

def day_data(data):
    return test[[ 'provinceName', 'currentConfirmedCount', 'confirmedCount','curedCount', 'deadCount']][test['updateTime'] == data]
def city_data(city):
    return test[[ 'updateTime','provinceName', 'currentConfirmedCount', 'confirmedCount','curedCount', 'deadCount']][test['provinceName'] == city]

data_series = test['updateTime'].unique()
city_series = test['provinceName'].unique()

tl = Timeline(init_opts=opts.InitOpts(width="1400px", height="800px"))
for i in data_series:
    dta = day_data(i)
    dta1 = dta[dta['provinceName'] != '湖北'].sort_values(by = 'currentConfirmedCount',ascending=True)
    # 这里去掉了湖北的数据,湖北的数据跟其他地区数据不在一个数量级上,加入后图表不成比例
    bar = Bar().add_xaxis(dta1['provinceName'].to_list())
    bar.add_yaxis('现存确诊',dta1['currentConfirmedCount'].to_list(),stack="stack1")
    bar.add_yaxis('治愈数',dta1['curedCount'].to_list(), stack="stack1")
    bar.add_yaxis('死亡数',dta1['deadCount'].to_list(), stack="stack1")
    bar.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    bar.set_global_opts(xaxis_opts=opts.AxisOpts(is_show=False),title_opts=opts.TitleOpts("{0}".format(i)))
    bar.reversal_axis()
    tl.add(bar, "{0}".format(i))
tl.render('timeline_bar_reversal.html')

tl1 = Timeline(init_opts=opts.InitOpts(width="1400px", height="800px"))
for i in city_series:
    dta = city_data(i)
    x_data = dta['updateTime'].sort_values(ascending=True).to_list()
    line = Line().add_xaxis(xaxis_data=x_data)
    line.add_yaxis(series_name = '现存确诊数',y_axis=dta['currentConfirmedCount'].to_list(),label_opts=opts.LabelOpts(is_show=False))
    line.add_yaxis(series_name = '累计确诊数',y_axis=dta['confirmedCount'].to_list(),label_opts=opts.LabelOpts(is_show=False))
    line.add_yaxis(series_name = '治愈数',y_axis=dta['curedCount'].to_list(),label_opts=opts.LabelOpts(is_show=False))
    line.add_yaxis(series_name = '死亡数',y_axis=dta['deadCount'].to_list(),label_opts=opts.LabelOpts(is_show=False))
    line.set_global_opts(
            title_opts=opts.TitleOpts(title="{0}疫情图".format(i)),
            tooltip_opts=opts.TooltipOpts(trigger="axis"),
            yaxis_opts=opts.AxisOpts(
                type_="value",
                axistick_opts=opts.AxisTickOpts(is_show=True),
                splitline_opts=opts.SplitLineOpts(is_show=True),
            ),
            xaxis_opts=opts.AxisOpts(type_="category", boundary_gap=False),
        )
    tl1.add(line, "{0}".format(i))
tl1.render('timeline_city.html')


tl2 = Timeline(init_opts=opts.InitOpts(width="1400px", height="800px"))
for i in data_series:
    day_dta = day_data(i)
    province = day_dta['provinceName'].to_list()
    data_paire = [list(z) for z in zip(province,day_dta['confirmedCount'])]
    data_paire1 = [list(z) for z in zip(province,day_dta['currentConfirmedCount'])]
    map = Map()
    map.add('累计确诊',data_paire,'china',is_map_symbol_show=False)
    map.add('现存确诊',data_paire1,'china',is_map_symbol_show=False)
    map.set_global_opts(
            tooltip_opts=opts.TooltipOpts(
                trigger="item", formatter="{a}:{c}"
            ),
            visualmap_opts=opts.VisualMapOpts(
                max_=1000000, 
                is_piecewise=True,
                pieces=[
                        {"max": 0, "min": 0, "label": "0"},
                        {"max": 100, "min": 0, "label": "0-100"},
                        {"max": 500, "min": 100, "label": "100-500"},
                        {"max": 1000, "min": 500, "label": "500-1000"},
                        {"max": 2000, "min": 1000, "label": "1000-2000"},
                        {"max": 1000000, "min": 2000, "label": ">2000"},
                        ],
                is_calculable=True,
                range_color=["lightskyblue", "yellow", "orangered"],
            ),
        )
    tl2.add(map,'{0}'.format(i))
tl2.render('timeline_map.html')

本来想在地图中能像折线图中一样,悬浮提示文本显示该点的所有系列数据。但是 Map 中 TooltipOpts.formatter怎么设置都不对,字符串不行,回调函数写的不对也没用。
(ps:希望有人能指点一下)

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

更新

关于TooltipOpts.formatter自定义提示框已经找到解决办法,详情参考 pyecharts 维护人员的文章:

pyecharts 之 JsCode 的妙用

  • 7
    点赞
  • 50
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值