Wuhan Coronavirus Data Analytics

Wuhan Coronavirus Data Analytics

"""
Created on Mon Mar  2 11:00:00 2020

@author: xiaoyao
"""

部分输出内容省略

# 显示当前工作目录
%pwd
# 导入必要的库
import numpy as np 
import pandas as pd 

# 可视化库
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from plotly.offline import init_notebook_mode, iplot 
import plotly.graph_objs as go
import plotly.offline as py
import pycountry
py.init_notebook_mode(connected=True)
import folium 
from folium import plugins

# Graphics in retina format 
%config InlineBackend.figure_format = 'retina' 

# 增加默认的图像尺寸,设置配色方案
plt.rcParams['figure.figsize'] = 8, 5
#plt.rcParams['image.cmap'] = 'viridis'


import os
for dirname, _, filenames in os.walk('./2019_nCoV_data'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# 禁用警告
import warnings
warnings.filterwarnings('ignore')
# 读取数据集
data= pd.read_csv("./2019_nCoV_data/2019_nCoV_data.csv")
data.head()

在这里插入图片描述

data.info()

在这里插入图片描述

print(type(data))
# 在这里我导入常用的数据分析库
import pandas_profiling
pandas_profiling.ProfileReport(data)

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

# 将生成的数据分析报告生成文件,存储到本地
report = pandas_profiling.ProfileReport(data)
report.to_file("./reportfile.html")
# 如下转换之之前的数据格式
print(type(data["Date"]))
# 日期一列格式进行转换
data['Date'] = data['Date'].apply(pd.to_datetime)
# 删除编号一列
data.drop(['Sno'],axis=1,inplace=True)
data.head()

在这里插入图片描述

截止目前有哪些被新冠病毒影响的国家
# 受影响的国家进行输出,同时去掉重复值,转为列表形式输出
countries = data['Country'].unique().tolist()
print(countries)
# 总共受到影响的国家数量
print("\n共计受影响的国家数量: ",len(countries))
['China', 'US', 'Japan', 'Thailand', 'South Korea', 'Mainland China', 'Hong Kong', 'Macau', 'Taiwan', 'Singapore', 'Philippines', 'Malaysia', 'Vietnam', 'Australia', 'Mexico', 'Brazil', 'France', 'Nepal', 'Canada', 'Cambodia', 'Sri Lanka', 'Ivory Coast', 'Germany', 'Finland', 'United Arab Emirates', 'India', 'Italy', 'Sweden', 'Russia', 'Spain', 'UK', 'Belgium', 'Others', 'Egypt']

共计受影响的国家数量:  34
#很明显,上述的输出种含有“China”和"Mainland China"把后者替换为“China”
data['Country'].replace({'Mainland China':'China'},inplace=True)
countries = data['Country'].unique().tolist()
print(countries)
print("\n共计受影响的国家数量: ",len(countries))
['China', 'US', 'Japan', 'Thailand', 'South Korea', 'Hong Kong', 'Macau', 'Taiwan', 'Singapore', 'Philippines', 'Malaysia', 'Vietnam', 'Australia', 'Mexico', 'Brazil', 'France', 'Nepal', 'Canada', 'Cambodia', 'Sri Lanka', 'Ivory Coast', 'Germany', 'Finland', 'United Arab Emirates', 'India', 'Italy', 'Sweden', 'Russia', 'Spain', 'UK', 'Belgium', 'Others', 'Egypt']

共计受影响的国家数量:  33
# 首先将日期数据转为str类型,进行“年月日”拆分之后转为int类型
d = data['Date'][-1:].astype('str')
year = int(d.values[0].split('-')[0])
month = int(d.values[0].split('-')[1])
day = int(d.values[0].split('-')[2].split()[0])

from datetime import date
data_latest = data[data['Date'] > pd.Timestamp(date(year,month,day))]
data_latest.head()

在这里插入图片描述

Number_of_countries = len(data_latest['Country'].value_counts())

cases = pd.DataFrame(data_latest.groupby('Country')['Confirmed'].sum())
cases['Country'] = cases.index
cases.index=np.arange(1,Number_of_countries+1)

global_cases = cases[['Country','Confirmed']]
#global_cases.sort_values(by=['Confirmed'],ascending=False)
global_cases

在这里插入图片描述
在这里插入图片描述

# 载入world_coordinates dataset数据集合
world_coordinates = pd.read_csv('./2019_nCoV_data/world_coordinates.csv')
world_coordinates.head()

在这里插入图片描述

# 表合并
world_data = pd.merge(world_coordinates,global_cases,on='Country')
world_data.head()

在这里插入图片描述

当前世界各地疫情的可视化
# 使用folium绘制地图
world_map = folium.Map(location=[10, -20], zoom_start=2.3,tiles='Stamen Toner')

for lat, lon, value, name in zip(world_data['latitude'], world_data['longitude'], world_data['Confirmed'], world_data['Country']):
    folium.CircleMarker([lat, lon],
                        radius=10,
                        popup = ('<strong>Country</strong>: ' + str(name).capitalize() + '<br>'
                                '<strong>Confirmed Cases</strong>: ' + str(value) + '<br>'),
                        color='red',
                        
                        fill_color='red',
                        fill_opacity=0.7 ).add_to(world_map)
world_map

在这里插入图片描述

# 全球确诊,死亡,确诊的人数
print('Globally Confirmed Cases: ',data_latest['Confirmed'].sum())
print('Global Deaths: ',data_latest['Deaths'].sum())
print('Globally Recovered Cases: ',data_latest['Recovered'].sum())
Globally Confirmed Cases:  71226
Global Deaths:  1770
Globally Recovered Cases:  10865
data_latest.groupby(['Country','Province/State']).sum()
data_latest.groupby('Country')['Deaths'].sum().sort_values(ascending=False)[:5]
Country
China          1765
Taiwan            1
France            1
Hong Kong         1
Philippines       1
Name: Deaths, dtype: int64
data_latest.groupby('Country')['Recovered'].sum().sort_values(ascending=False)[:5]
Country
China          10748
Singapore         18
Thailand          14
Japan             12
South Korea        9
Name: Recovered, dtype: int64
China = data_latest[data_latest['Country']=='China']
China['Province/State']=China['Province/State'].map(lambda x:x.lower())
China
f, ax = plt.subplots(figsize=(12, 8))

sns.set_color_codes("pastel")
sns.barplot(x="Confirmed", y="Province/State", data=China[1:],
            label="Confirmed", color="r")

sns.set_color_codes("muted")
sns.barplot(x="Recovered", y="Province/State", data=China[1:],
            label="Recovered", color="g")

# Add a legend and informative axis label
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(xlim=(0, 400), ylabel="",
       xlabel="Stats")
sns.despine(left=True, bottom=True)
latitude = 39.91666667
longitude = 116.383333
 
china_map = folium.Map(location=[latitude, longitude], zoom_start=12)

china_coordinates= pd.read_csv("./2019_nCoV_data/china_Province_coordinates.csv")
china_coordinates.rename(columns={'name3':'Province/State'},inplace=True)
china_coordinates.head()
df_china_virus = China.merge(china_coordinates)
df_china_virus.head()
data = pd.DataFrame({
   'name':list(df_china_virus['name']),
   'lat':list(df_china_virus['lat']),
   'lon':list(df_china_virus['lon']),
   'Confirmed':list(df_china_virus['Confirmed']),
   'Recovered':list(df_china_virus['Recovered']),
   'Deaths':list(df_china_virus['Deaths'])
})

data.head()
china_map1 = folium.Map(location=[latitude, longitude], zoom_start=4,tiles='Stamen Toner')

for lat, lon, value, name in zip(data['lat'], data['lon'], data['Confirmed'], data['name']):
    folium.CircleMarker([lat, lon],
                        radius=13,
                        popup = ('Province: ' + str(name).capitalize() + '<br>'
                        'Confirmed: ' + str(value) + '<br>'),
                        color='red',
                        
                        fill_color='red',
                        fill_opacity=0.7 ).add_to(china_map1)
    folium.Map(titles='jj', attr="attribution")    
china_map1

在这里插入图片描述

china_map = folium.Map(location=[latitude, longitude], zoom_start=4,tiles='Stamen Toner')

for lat, lon, value, name in zip(data['lat'], data['lon'], data['Deaths'], data['name']):
    folium.CircleMarker([lat, lon],
                        radius=13,
                        popup = ('Province: ' + str(name).capitalize() + '<br>'
                        'Deaths: ' + str(value) + '<br>'),
                        color='black',
                        
                        fill_color='red',
                        fill_opacity=0.7 ).add_to(china_map)
    folium.Map(titles='jj', attr="attribution")    
china_map

在这里插入图片描述

china_map = folium.Map(location=[latitude, longitude], zoom_start=4,tiles='Stamen Toner')

for lat, lon, value, name in zip(data['lat'], data['lon'], data['Recovered'], data['name']):
    folium.CircleMarker([lat, lon],
                        radius=10,
                        popup = ('Province: ' + str(name).capitalize() + '<br>'
                        'Recovered: ' + str(value) + '<br>'),
                        color='green',
                        
                        fill_color='green',
                        fill_opacity=0.7 ).add_to(china_map)
       
china_map

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值