python数据分析二一：图形化显示海地地震危机数据_海底地震危机数据haiti.vsc-CSDN博客

本文链接：https://blog.csdn.net/qq_38788128/article/details/80817160

知识点回顾

zip()
>>>a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b)     # 打包为元组的列表
[(1, 4), (2, 5), (3, 6)]
>>> zip(a,c)              # 元素个数与最短的列表一致
[(1, 4), (2, 5), (3, 6)]
>>> zip(*zipped)          # 与 zip 相反，*zipped 可理解为解压，返回二维矩阵式
[(1, 2, 3), (4, 5, 6)]

add_prefix() 与join()合并使用添加别名

  #去重union()  排序sorted()，  返回的列表  不加*报错descriptor 'union' requires a 'set' object but received a 'generator'

 #strip()和tirm（）一个功能

# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from datetime import datetime


'''
图形化显示海地地震危机数据
'''
#1.获取原始数据
data=pd.read_csv('C:\\tools\\pydata-book-master\\ch08\\Haiti.csv')
#列信息
print(data.dtypes)
# Serial              int64
# INCIDENT TITLE     object
# INCIDENT DATE      object
# LOCATION           object
# DESCRIPTION        object
# CATEGORY           object
# LATITUDE          float64
# LONGITUDE         float64
# APPROVED           object
# VERIFIED           object

#2.处理数据
#Category信息的处理
print(data['CATEGORY'][:6])
# 0          1. Urgences | Emergency, 3. Public Health,
# 1    1. Urgences | Emergency, 2. Urgences logistiqu...
# 2    2. Urgences logistiques | Vital Lines, 8. Autr...
# 3                            1. Urgences | Emergency,
# 4                            1. Urgences | Emergency,
# 5                       5e. Communication lines down,

#看一下数据的描述
print(data.describe())
#           Serial     LATITUDE    LONGITUDE
# count  3593.000000  3593.000000  3593.000000
# mean   2080.277484    18.611495   -72.322680
# std    1171.100360     0.738572     3.650776
# min       4.000000    18.041313   -74.452757
# 25%    1074.000000    18.524070   -72.417500
# 50%    2163.000000    18.539269   -72.335000
# 75%    3088.000000    18.561820   -72.293570
# max    4052.000000    50.226029   114.174287

#存在缺失数据，进行数据筛选，方法！！
data=data[(data.LATITUDE>18)&(data.LATITUDE<20)&(data.LONGITUDE>-75)&(data.LONGITUDE<70)&data.CATEGORY.notnull()]

#方法一
def to_cat_list(catstr):
    #strip()和tirm（）一个功能
    stripped=(x.strip() for x in catstr.split(','))
    #返回列表，除去了空字符串
    return [x for x in stripped if x]


print(to_cat_list('a,as,ff,,'))#['a', 'as', 'ff']

#方法二
def get_all_categories(cat_series):
    cat_sets=(set(to_cat_list(x)) for x in cat_series)

    #去重union()  排序sorted()，  返回的列表  不加*报错descriptor 'union' requires a 'set' object but received a 'generator'
    return sorted(set.union(*cat_sets))

print(get_all_categories(['a,as,ff,,','a,c,d,f,g']))#['a', 'as', 'c', 'd', 'f', 'ff', 'g']

#方法三：将分类信息拆分为编码和英文名称
def get_english(cat):
    code,names=cat.split('.')
    if '|' in names:
        names=names.split('|')[1]
    #返回编码和英文名火车的元组
    return code,names.strip();

print(get_english('1. Urgences | Emergency'))#('1', 'Emergency')

#元组—>字典
truple=('a','b')
# print(dict(get_english('1. Urgences | Emergency')))

#获取CATEGORY的列表
all_cats=get_all_categories(data.CATEGORY)
#print(all_cats)
#将元组转换为字典
english_mapping=dict(get_english(x) for x in all_cats)
#print(english_mapping)
print(english_mapping['2a'])

def get_code(seq):
    return [x.split('.')[0] for x in seq if x]

#获取所有的编号
all_codes=get_code(all_cats)
print(all_codes)

#获取所有编号下的索引
code_index=pd.Index(np.unique(all_codes))
print(code_index)

#创建一个全零的DataFrame，数据结构与data数据一致
dummy_frame=pd.DataFrame(np.zeros((len(data),len(code_index))),index=data.index,columns=code_index)

#print(dummy_frame)
#    1   1a   1b   1c   1d    2   2a ...    7h    8   8a   8c   8d   8e   8f
# 0     0.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   0.0  0.0  0.0  0.0  0.0  0.0  0.0
# 4     0.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   0.0  0.0  0.0  0.0  0.0  0.0  0.0
# 5     0.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   0.0  0.0  0.0  0.0  0.0  0.0  0.0


print(dummy_frame.ix[:,:6])

#3.处理后的矩阵与原矩阵进行赋值，与原矩阵对应的项设置为一
#zip()
# add_prefix() 与join()合并使用添加别名
for row,cat in zip(data.index,data.CATEGORY):
    codes=get_code(to_cat_list(cat))
    dummy_frame.ix[row,codes]=1

data=data.join(dummy_frame.add_prefix('category_'))
print(data.ix[:,10:15])
#     category_1  category_1a     ...       category_1c  category_1d
# 0            1.0          0.0     ...               0.0          0.0
# 4            1.0          0.0     ...               0.0          0.0
# 5            0.0          0.0     ...               0.0          0.0
# 6            0.0          0.0     ...               0.0          0.0

#from mpl_toolkits.basemap import Basemap
import mpl_toolkits as mpl

def basic_haiti_map(ax=None, lllat = 17.25,urlat = 20.25,lllon = -75,urlon=-71):
    m = mpl.basemap.Basemap(ax=ax,projection = 'stere',
                lon_0=(urlon+lllon)/2,
                lat_0 = (urlat+lllat)/2,
                llcrnrlat = lllat,urcrnrlat=urlat,
                llcrnrlon = lllon,urcrnrlon = urlon,
                resolution = 'f')
    m.drawcoastlines()
    m.drawstates()
    m.drawcountries()
    return m

fig,axes = plt.subplots(nrows = 2,ncols =2,figsize=(12,10))
fig.subplots_adjust(hspace =0.05,wspace=0.05)
to_plot = ['2a','1','3c','7a']
lllat =17.25
urlat=20.25
lllon =-75
urlon=-71
for code,ax in zip(to_plot,axes.flat):
    m = basic_haiti_map(ax,lllat=lllat,urlat=urlat,lllon=lllon,urlon=urlon)
    cat_data=data[data['category_%s' % code] ==1]
    #x, y =m(cat_data.LONGITUDE,cat_data.LATITUDE)
    x, y = m(cat_data.LONGITUDE.values, cat_data.LATITUDE.values)
    m.plot(x,y,'k.',alpha = 0.5)
    ax.set_title('%s:%s' % (code, english_mapping[code]))