1. 时间序列
1.1 生成时间序列
pd.date_range(start="20171230", end="20180131", freq="D")
freq="10D" 每10天生成一个 M每月生成一个
1.2 重采样:将时间序列从一个频率转化为另一个频率
df.resample("M")
2. 案例:911电话
统计911紧急电话中不同类型的紧急情况的次数。
统计不同月份紧急电话次数的变化情况。
统计不同月份、不同类型紧急电话次数的变化情况。
链接:https://pan.baidu.com/s/1BKNdZ8p7SfHOmTscNQldoA
提取码:utwq
import pandas as pd
import numpy as np
df = pd.read_csv("911.csv")
temp_list = df["title"].str.split(":").tolist()
cate_list = list(set(i[0] for i in temp_list))
zeros_df = pd.DataFrame(np.zeros((df.shape[0], len(cate_list))), columns=cate_list)
for cate in cate_list:
zeros_df[cate][df["title"].str.contains(cate)] = 1
sum_ = zeros_df.sum()
print(sum_)
import pandas as pd
from matplotlib import pyplot as plt
df = pd.read_csv("911.csv")
df["timeStamp"] = pd.to_datetime(df["timeStamp"])
df = df.set_index("timeStamp")
count_by_month = df.resample("M").count()["title"]
_x = [i.strftime("%Y%m") for i in count_by_month.index]
_y = count_by_month.values
plt.plot(range(len(_x)), _y)
plt.xticks(range(len(_x)), _x, rotation=45)
plt.show()
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
df = pd.read_csv("911.csv")
df["timeStamp"] = pd.to_datetime(df["timeStamp"])
df = df.set_index("timeStamp")
temp_list = df["title"].str.split(":").tolist()
cate_list = [i[0] for i in temp_list]
df["cate"]=cate_list
for group_name, group_data in df.groupby(by="cate"):
count_by_month = group_data.resample("M").count()["title"]
_x = [i.strftime("%Y%m") for i in count_by_month.index]
_y = count_by_month.values
plt.plot(range(len(_x)), _y)
plt.xticks(range(len(_x)), _x, rotation=45)
plt.show()