还差地图后续更新
新冠肺炎感染人数数据分析
库函数引入
#引入库函数
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns
# Looking at the data
data = pd.read_csv("./archive/covid_19_data.csv")
data.head()
# keep looking at the data but deeply
data.info()
数据过滤
# first way of filtering
x = data["Deaths"]>100000
data[x]
# second way
data[np.logical_and(data["Deaths"]>100000, data["Confirmed"]>1000000)]
# data[(data["Deaths"]>100000) & (data["Confirmed"]>1000000)]
数据清洗
data['Province/State'] = data['Province/State'].fillna("Unknown")
#使用fillna()函数填充缺失值
data.isnull().sum()
data = data.rename(columns = {"Province/State":"State"})
data = data.rename(columns = {"Country/Region":"Country"})
data[['Confirmed','Deaths','Recovered']] = data[['Confirmed','Deaths','Recovered']].astype(int)
data['Active'] = data['Confirmed']-data['Deaths']-data['Recovered']
data.columns
data['Date'] = data['ObservationDate'].copy()
数据可视化
Covid-19 Cases in World
#table
from pyecharts.components import Table
from pyecharts.options import ComponentTitleOpts
table = Table()
headers = ["Total Confirmed", "Total Deaths", "Total Recovered", "Total Active Cases"]
rows = [
[sum(data['Confirmed']),sum(data['Deaths']),sum(data['Recovered']),sum(data['Active'])]
]
table.add(headers, rows)
table.set_global_opts(
title_opts=ComponentTitleOpts(title="World-Covid-19 Cases"
)
)
table.render_notebook()
from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.faker import Faker
labels = ["Active Cases","Recovered Cases","Death Cases"]
sumactive= sum(data['Active'])
sumrecovered = sum(data['Recovered'])
sumdeaths = sum(data['Deaths'])
c = (
Pie()
.add(
"The percentage of Covid-19 Cases",
[(labels[0],sumactive),(labels[1],sumrecovered),(labels[2],sumdeaths)],
center=["35%", "50%"],
)
.set_global_opts(
title_opts=opts.TitleOpts(title="The percentage of Covid-19 Cases in the world"),
legend_opts=opts.LegendOpts(pos_left="50%"),
)
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)
c.render_notebook()
data['Date'] = pd.to_datetime(data['Date'])
data = data.sort_values("Date")
datatime = data.groupby("Date")[["Confirmed","Active","Recovered","Deaths"]].sum().reset_index()
c = (
Bar()
.add_xaxis(list(datatime.Date))
.add_yaxis("Active Cases",list(datatime.Active), color = 'DarkRed')
.set_global_opts(
title_opts=opts.TitleOpts(title="Active Cases - Daily",subtitle="纵坐标感染病例,横坐标为感染时间"),
datazoom_opts=[
opts.DataZoomOpts(orient="vertical"), # 设置操作图表缩放功能,orient="vertical" 为Y轴
opts.DataZoomOpts(type_="inside") # 设置内部可以滑动功能
],
)
)
c.render_notebook()
c = (
Bar()
.add_xaxis(list(datatime.Date))
.add_yaxis("Deaths Cases",list(datatime.Deaths), color = 'DarkSlateGray')
.set_global_opts(
title_opts=opts.TitleOpts(title="Deaths Cases - Daily",subtitle="纵坐标感染病例,横坐标为感染时间"),
datazoom_opts=[
opts.DataZoomOpts(orient="vertical"), # 设置操作图表缩放功能,orient="vertical" 为Y轴
opts.DataZoomOpts(type_="inside") # 设置内部可以滑动功能
],
)
)
c.render_notebook()
data1 = data.groupby("Country")["Confirmed"].sum().sort_values(ascending = False).reset_index().head(30)
import pyecharts.options as opts
from pyecharts.charts import Line
c = (
Line()
.add_xaxis(list(data1.Country))
.add_yaxis("Confirmed", list(data1.Confirmed), is_smooth=True,color='Purple')
.set_series_opts(
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=False),
)
.set_global_opts(
title_opts=opts.TitleOpts(title="Top 30 Countries with the most Confirmed Cases"),
xaxis_opts=opts.AxisOpts(
axistick_opts=opts.AxisTickOpts(is_align_with_label=True),
is_scale=False,
boundary_gap=False,
),
legend_opts=opts.LegendOpts(pos_left="50%"),
)
)
c.render_notebook()
data2 = data.groupby("Country")["Recovered"].sum().sort_values(ascending = False).reset_index().head(30)
c = (
Line()
.add_xaxis(list(data2.Country))
.add_yaxis("Recovered", list(data2.Recovered), is_smooth=True,color='DarkGreen')
.set_series_opts(
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=False),
)
.set_global_opts(
title_opts=opts.TitleOpts(title="Top 30 Countries with the most Recovered Cases"),
xaxis_opts=opts.AxisOpts(
axistick_opts=opts.AxisTickOpts(is_align_with_label=True),
is_scale=False,
boundary_gap=False,
),
legend_opts=opts.LegendOpts(pos_left="50%"),
)
)
c.render_notebook()
Covid-19 Cases in China
data['Country'] = data['Country'].replace('Mainland China', 'China')
data_ch = data[(data['Country'] == 'China') ].reset_index()
#table
from pyecharts.components import Table
from pyecharts.options import ComponentTitleOpts
table = Table()
headers = ["Total Confirmed", "Total Deaths", "Total Recovered", "Total Active Cases"]
rows = [
[sum(data_ch['Confirmed']),sum(data_ch['Deaths']),sum(data_ch['Recovered']),sum(data_ch['Active'])]
]
table.add(headers, rows)
table.set_global_opts(
title_opts=ComponentTitleOpts(title="China-Covid-19 Cases"
)
)
table.render_notebook()
from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.faker import Faker
labels = ["Active Cases","Recovered Cases","Death Cases"]
sumactive= sum(data_ch['Active'])
sumrecovered = sum(data_ch['Recovered'])
sumdeaths = sum(data_ch['Deaths'])
c = (
Pie()
.add(
"The percentage of Covid-19 Cases",
[(labels[0],sumactive),(labels[1],sumrecovered),(labels[2],sumdeaths)],
center=["35%", "50%"],
)
.set_global_opts(
title_opts=opts.TitleOpts(title="The percentage of Covid-19 Cases in the China"),
legend_opts=opts.LegendOpts(pos_left="50%"),
)
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)
c.render_notebook()
data_ch1 = data_ch.groupby("Date")["Confirmed","Deaths","Recovered","Active"].sum().reset_index().sort_values("Date").reset_index()
from pyecharts import options as opts
from pyecharts.charts import Scatter
from pyecharts.faker import Faker
c = (
Scatter()
.add_xaxis(list(data_ch1.Date))
.add_yaxis("Confirmed", list(data_ch1.Confirmed))
.add_yaxis("Active", list(data_ch1.Active))
.add_yaxis("Deaths", list(data_ch1.Deaths))
.add_yaxis("Recovered", list(data_ch1.Recovered))
.set_global_opts(
title_opts=opts.TitleOpts(title="Covid-19 Cases over the time in the China"),
visualmap_opts=opts.VisualMapOpts(type_="size", max_=1000000, min_=20),
legend_opts=opts.LegendOpts(pos_left="50%"),
)
)
c.render_notebook()
state_ch= data_ch.groupby(["State"])["Confirmed","Active","Deaths"].sum().sort_values("Confirmed",ascending=False).reset_index()
from pyecharts.charts import Pie
c = (
Pie()
.add(
"",
[
list(z)
for z in zip(
state_ch.State,
state_ch.Confirmed + state_ch.Active + state_ch.Deaths,
)
],
center=["40%", "60%"],
)
.set_global_opts(
title_opts=opts.TitleOpts(title="Confirmed Cases in China by States"),
legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%", orient="vertical"),
)
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)
c.render_notebook()
本文用到的博客链接
法1将Dataframe转化为字典
法2将Dataframe转化为字典
python地图可视化
pandas中遍历DataFrame行
pandas DataFrame 数据转化为 列表 list的方法