第8章 图形化显示地震危机数据(海地)
import pandas as pd
import numpy as np
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
data = pd.read_csv(r'E:\pydata-book-2nd-edition\pydata-book-2nd-edition\datasets\haiti\haiti.csv')
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3593 entries, 0 to 3592
Data columns (total 10 columns):
Serial 3593 non-null int64
INCIDENT TITLE 3593 non-null object
INCIDENT DATE 3593 non-null object
LOCATION 3592 non-null object
DESCRIPTION 3593 non-null object
CATEGORY 3587 non-null object
LATITUDE 3593 non-null float64
LONGITUDE 3593 non-null float64
APPROVED 3593 non-null object
VERIFIED 3593 non-null object
dtypes: float64(2), int64(1), object(7)
memory usage: 280.8+ KB
data.describe()
Out[16]:
Serial LATITUDE LONGITUDE
count 3593.000000 3593.000000 3593.000000
mean 2080.277484 18.611495 -72.322680
std 1171.100360 0.738572 3.650776
min 4.000000 18.041313 -74.452757
25% 1074.000000 18.524070 -72.417500
50% 2163.000000 18.539269 -72.335000
75% 3088.000000 18.561820 -72.293570
max 4052.000000 50.226029 114.174287
data = data[(data.LATITUDE > 18)&(data.LATITUDE < 20)&(data.LONGITUDE > -75)&(data.LONGITUDE < -70)&data.CATEGORY.notnull()]
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3569 entries, 0 to 3592
Data columns (total 10 columns):
Serial 3569 non-null int64
INCIDENT TITLE 3569 non-null object
INCIDENT DATE 3569 non-null object
LOCATION 3568 non-null object
DESCRIPTION 3569 non-null object
CATEGORY 3569 non-null object
LATITUDE 3569 non-null float64
LONGITUDE 3569 non-null float64
APPROVED 3569 non-null object
VERIFIED 3569 non-null object
dtypes: float64(2), int64(1), object(7)
memory usage: 306.7+ KB
def to_cat_list(catstr):
stripped = (x.strip() for x in catstr.split(','))
return [x for x in stripped if x]
def get_all_categories(cat_series):
cat_sets = (set(to_cat_list(x)) for x in cat_series)
return sorted(set.union(*cat_sets))
def get_english(cat):
code,names = cat.split('.')
if '|' in names:
names = names.split(' | ')[1]
return code,names.strip()
all_cats = get_all_categories(data.CATEGORY)
english_mapping = dict(get_english(x) for x in all_cats)
def get_code(seq):
return [x.split('.')[0] for x in seq if x]
all_codes = get_code(all_cats)
code_index = pd.Index(np.unique(all_codes))
dummy_frame = DataFrame(np.zeros((len(data),len(code_index))),index = data.index,columns = code_index)
for row,cat in zip(data.index,data.CATEGORY):
codes = get_code(to_cat_list(cat))
dummy_frame.ix[row,codes] = 1
data = data.join(dummy_frame.add_prefix('catagory_'))
def basic_haiti_map(ax = None,lllat=17.25,urlat=20.25,lllon=-75,urlon=-71):
m = Basemap(ax=ax,projection='stere',lon_0=(urlon+lllon)/2,lat_0=(urlat+lllat)/2,
llcrnrlat=lllat,urcrnrlat=urlat,
llcrnrlon=lllon,urcrnrlon=urlon,resolution='f')
m.drawcoastlines()
m.drawstates()
m.drawcountries()
return m
fig,axes = plt.subplots(nrows=2,ncols=2,figsize=(12,10))
fig.subplots_adjust(hspace = 0.05,wspace = 0.05)
to_plot=['2a','1','3c','7a']
lllat = 17.25
urlat = 20.25
lllon = -75
urlon = -71
for code,ax in zip(to_plot,axes.flat):
m = basic_haiti_map(ax,lllat=lllat,urlat=urlat,lllon=lllon,urlon=urlon)
cat_data = data[data['catagory_%s' % code] == 1]
x,y = m(np.array(cat_data.LONGITUDE),np.array(cat_data.LATITUDE))
m.plot(x,y,'k.',alpha=0.5)
ax.set_title('%s: %s' % (code,english_mapping[code]))
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
lllat = 18.43;urlat = 18.69;lllon = -72.57;urlon = -72.08
m = basic_haiti_map(ax,lllat = lllat,urlat=urlat,lllon=lllon,urlon=urlon)
shapefile_path='E:\pydata-book-2nd-edition\pydata-book-2nd-edition\datasets\haiti\PortAuPrince_Roads\PortAuPrince_Roads'
m.readshapefile(shapefile_path,'road')
code = '2a'
cat_data = data[data['catagory_%s' % code]==1]
x,y = m(np.array(cat_data.LONGITUDE),np.array(cat_data.LATITUDE))
m.plot(x,y,'k.',alpha = 0.5)
ax.set_title('%s reported in Port-au-Prince' % english_mapping[code])
plt.savefig(r'E:\pycode\data\food_shortage.png',dpi=400,bbox_inches = 'tight')