1.pandas时间序列1
- 获取某一列的分类:三种方法
- 时间序列
构建数据
df = pd.DataFrame({'a':range(7),'Brand':range(7,0,-1),'title':['EMS:one','EMS:one','Traffic:one','Fire:two','Traffic:two','Fire:two','Traffic:two'],'City':list('hjkllno')})
df =df.set_index(['a'])
print(df)
获取分类
# print(df['title'].tolist())
temp_list = df['title'].str.split(':')#.tolist()
cate_list = list(set([i[0] for i in temp_list]))
print(cate_list)
[‘Fire’, ‘Traffic’, ‘EMS’]
构造全0数组
zeros_df = pd.DataFrame(np.zeros((df.shape[0],len(cate_list))),columns=cate_list)
# zeros_df = pd.DataFrame(np.zeros((df.shape[0],len(cate_list))),columns=list({'EMS', 'Fire', 'Traffic'}))#这个也可以
print(zeros_df)
----------方法1:对行数进行循环-----------
for i in range(df.shape[0]):
temp = df['title'].str.split(':')[i][0]
zeros_df.loc[i, temp] =1
print(zeros_df)
---------方法2:对类别进行循环--------------
for cate in cate_list:
zeros_df[cate][df['title'].str.contains(cate)] = 1
# zeros_df.loc[df['title'].str.contains(cate),cate] = 1#这种方式也可以
# break
print(zeros_df)
print(zeros_df.sum(axis=0))#获取每个类别的数量
------方法3:重新建立一DataFrame列实现--------------
temp_list = df['title'].str.split(":").tolist()
cate_list = [i[0]for i in temp_list]
# print(cate_list)
cate_df = pd.DataFrame(np.array(cate_list).reshape(df.shape[0],1))
df['cate'] = cate_df
print(df['cate'])
print(df.groupby(by=['cate']).count()['title'])
完整代码:
#构建数据
df = pd.DataFrame({'a':range(7),'Brand':range(7,0,-1),'title':['EMS:one','EMS:one','Traffic:one','Fire:two','Traffic:two','Fire:two','Traffic:two'],'City':list('hjkllno')})
df =df.set_index(['a'])
print(df)
#获取分类
# print(df['title'].tolist())
temp_list = df['title'].str.split(':')#.tolist()
cate_list = list(set([i[0] for i in temp_list]))
print(cate_list)
#构造全0数组
zeros_df = pd.DataFrame(np.zeros((df.shape[0],len(cate_list))),columns=cate_list)
# zeros_df = pd.DataFrame(np.zeros((df.shape[0],len(cate_list))),columns=list({'EMS', 'Fire', 'Traffic'}))#这个也可以
print(zeros_df)
# ----------方法1:对行数进行循环-----------
for i in range(df.shape[0]):
temp = df['title'].str.split(':')[i][0]
zeros_df.loc[i, temp] =1
print(zeros_df)
#---------方法2:对类别进行循环--------------
for cate in cate_list:
zeros_df[cate][df['title'].str.contains(cate)] = 1
# zeros_df.loc[df['title'].str.contains(cate),cate] = 1#这种方式也可以
# break
print(zeros_df)
print(zeros_df.sum(axis=0))#获取每个类别的数量
# ------方法3:重新建立一DataFrame列实现--------------
temp_list = df['title'].str.split(":").tolist()
cate_list = [i[0]for i in temp_list]
# print(cate_list)
cate_df = pd.DataFrame(np.array(cate_list).reshape(df.shape[0],1))
df['cate'] = cate_df
print(df['cate'])
print(df.groupby(by=['cate']).count()['title'])
data = pd.date_range(start="20190930",end="20191010",freq="D")
print(data)
data = pd.date_range(start="20190930",end="20191030",freq="10D")
print(data)
data = pd.date_range(start="20190930",periods=10,freq="H")
print(data)
data = pd.date_range(start='20190930',periods=10,freq='MS')
print(data)
data = pd.date_range(start='20190930',periods=10,freq='2MS')
print(data)
2.pandas时间序列2
时间处理:
动手1:
#构建数据
df = pd.DataFrame({'a':range(7),'Brand':range(7,0,-1),'title':['EMS:one','EMS:one','Traffic:one','Fire:two','Traffic:two','Fire:two','Traffic:two'],'City':list('hjkllno')})
print(df)
index = pd.date_range(start='20190930',periods=7,freq='BMS')
df['date'] = pd.DataFrame(index)
df.loc[2:4,'date'] = df.loc[1,'date']
print(df)
df['date'] = pd.to_datetime(df['date'])
print(df)
df = df.set_index('date',drop=True)
print(df)
#计算每月某项发生的次数的次数
count_by_month = df.resample('M').count()['title']
print(count_by_month)
#构建数据
df = pd.DataFrame({'a':range(7),'Brand':range(7,0,-1),'title':['EMS:one','EMS:one','Traffic:one','Fire:two','Traffic:two','Fire:two','Traffic:two'],'City':list('hjkllno')})
print(df)
index = pd.date_range(start='20190930',periods=7,freq='BMS')
df['date'] = pd.DataFrame(index)
df.loc[2:4,'date'] = df.loc[1,'date']
print(df)
df['date'] = pd.to_datetime(df['date'])
print(df)
df = df.set_index('date',drop=True)
print(df)
#计算每月某项发生的次数的次数
count_by_month = df.resample('M').count()['title']
print(count_by_month)
# 画图
_x = count_by_month.index
_y = count_by_month.values
#时间格式化,去掉时间,保留年月日
_x = [i.strftime("%Y-%m-%d") for i in _x]
# print(dir(_x))
from matplotlib import pyplot as plt
plt.figure(figsize=(20,8),dpi=80)
plt.plot(range(len(_x)),_y)
# plt.bar(range(len(_x)),_y)
plt.xticks(range(len(_x)),_x)
plt.show()
动手2:对分两个组的进行计算
1.先建立“不同类型”列的cate列
#添加列,表示分类
temp_list = df['title'].str.split(":").tolist()
cate_list = [i[0] for i in temp_list]
df['cate'] = pd.DataFrame(np.array(cate_list).reshape(df.shape[0],1))
print(cate_list)
print(df)
2.然后分出不同类型的groupby组:
df.groupby(by='cate')
3。最后计算不同月不同类型下的数量
计算不同月的数量需要在前面讲年月日时间序列变成索引,然后就可以采用resample进行计算。
#不同分组下不同月的数量
count_by_month = group_data.resample('M').count()['title']
完整代码:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
#构建数据
df = pd.DataFrame({'a':range(7),'Brand':range(7,0,-1),'title':['EMS:one','EMS:one','Traffic:one','Fire:two','Traffic:two','Fire:two','Traffic:two'],'City':list('hjkllno')})
index = pd.date_range(start='20190930',periods=7,freq='BMS')
df['date'] = pd.DataFrame(index)
df.loc[2:4,'date'] = df.loc[1,'date']
print(df)
#添加列,表示分类
temp_list = df['title'].str.split(":").tolist()
cate_list = [i[0] for i in temp_list]
df['cate'] = pd.DataFrame(np.array(cate_list).reshape(df.shape[0],1))
print(cate_list)
print(df)
#把时间字符串转化为事件类型,并设置为索引
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date',drop=True)
print(df)
plt.figure(figsize=(20,8),dpi=80)
#分组--对不同的分类都进行绘图
for group_name,group_data in df.groupby(by='cate'):
#不同分组下不同月的数量
count_by_month = group_data.resample('M').count()['title']
_x = count_by_month.index
_y = count_by_month.values
# print(_y)
# 时间格式化,去掉时间,保留年月日
_x = [i.strftime("%Y-%m-%d") for i in _x]
plt.plot(range(len(_x)), _y, label=group_name)
plt.xticks(range(len(_x)), _x, rotation=45)
plt.legend(loc = 'best')
plt.show()