1.911分类案例_代码示例
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import font_manager
my_font = font_manager.FontProperties(fname="c:/Windows/Fonts/simkai.ttf")
df = pd.read_csv("./911.csv")
# print(df.info())
'''
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249737 entries, 0 to 249736
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 lat 249737 non-null float64
1 lng 249737 non-null float64
2 desc 249737 non-null object
3 zip 219391 non-null float64
4 title 249737 non-null object
5 timeStamp 249737 non-null object
6 twp 249644 non-null object
7 addr 249737 non-null object
8 e 249737 non-null int64
dtypes: float64(3), int64(1), object(5)
memory usage: 17.1+ MB
'''
time_data = pd.DataFrame(df['timeStamp'])
# print(time_data)
'''
timeStamp
0 2015-12-10 17:10:52
1 2015-12-10 17:29:21
2 2015-12-10 14:39:21
... ...
249735 2017-09-20 19:42:05
249736 2017-09-20 19:42:29
[249737 rows x 1 columns]
'''
#获取分类
# print(type(df["title"])) # <class 'pandas.core.series.Series'>
tempt_list = df["title"].str.split(':').tolist()
# print(tempt_list)
'''
RESCUE - ELEVATOR'],
[['Traffic', ' DISABLED VEHICLE -'],
['Fire', ' FIRE ALARM'],
['Traffic', ' ROAD OBSTRUCTION -'],
···
['Fire', ' FIRE INVESTIGATION'],
['EMS', ' UNKNOWN MEDICAL EMERGENCY'],
['Traffic', ' ROAD OBSTRUCTION -']]
'''
# cate_list = set([i[0] for i in tempt_list]) # set()保证元素不重复
# print(cate_list)
'''{'Traffic', 'EMS', 'Fire'} '''
cate_list = list(set([i[0] for i in tempt_list]))
# print(cate_list)
'''['Traffic', 'EMS', 'Fire']'''
#构造全为0的数组
zero_array = pd.DataFrame(np.zeros((df.shape[0], len(cate_list))), columns=cate_list)
# print(zero_array)
'''
EMS Fire Traffic
0 0.0 0.0 0.0
1 0.0 0.0 0.0
2 0.0 0.0 0.0
... ... ... ...
249735 0.0 0.0 0.0
249736 0.0 0.0 0.0
[249737 rows x 3 columns]
'''
#赋值
# print(df["title"].str.contains('EMS'))
'''
0 True
1 Tru