整个思路比较简单:
- 根据
dataframe.loc[]
来框一个子dataframe
- 由于
.loc[]
是左闭右闭,所以如果希望哪一边开的话,需要删除哪一边的数据
使用的工具函数如下:
def split_dataframe_by_index_list(dataframe: pd.DataFrame, index_list: list, left=False, right=False):
"""按照index的列表切分dataframe
:param dataframe: 待切分的dataframe
:param index_list: 切分的index的列表,注意需要相对于dataframe的index是有序的
:param left: 是否包含左边的值
:param right: 是否包含右边的值
"""
if len(index_list) == 0:
yield dataframe
else:
if dataframe.index.values[0] not in index_list:
index_list.insert(0, dataframe.index.values[0])
if dataframe.index.values[-1] not in index_list:
index_list.append(dataframe.index.values[-1])
for index in range(len(index_list) - 1):
this_df: pd.DataFrame = dataframe.loc[index_list[index]:index_list[index + 1], :]
if not left:
this_df = this_df.drop(index_list[index], axis=0)
if not right:
this_df = this_df.drop(index_list[index + 1], axis=0)
if this_df.shape[0] > 1:
yield this_df
示例代码
import pandas as pd
import numpy as np
def split_dataframe_by_index_list(dataframe: pd.DataFrame, index_list: list, left=False, right=False):
"""按照index的列表切分dataframe
:param dataframe: 待切分的dataframe
:param index_list: 切分的index的列表,注意需要相对于dataframe的index是有序的
:param left: 是否包含左边的值
:param right: 是否包含右边的值
"""
if len(index_list) == 0:
yield dataframe
else:
if dataframe.index.values[0] not in index_list:
index_list.insert(0, dataframe.index.values[0])
if dataframe.index.values[-1] not in index_list:
index_list.append(dataframe.index.values[-1])
for index in range(len(index_list) - 1):
this_df: pd.DataFrame = dataframe.loc[index_list[index]:index_list[index + 1], :]
if not left:
this_df = this_df.drop(index_list[index], axis=0)
if not right:
this_df = this_df.drop(index_list[index + 1], axis=0)
if this_df.shape[0] > 1:
yield this_df
if __name__ == '__main__':
df = pd.DataFrame(data=np.random.randint(0, 100, size=(50, 3)))
for sub_df in split_dataframe_by_index_list(df, [11, 22, 33, 44]):
print(sub_df.shape)
效果:
(10, 3) 1 10
(10, 3) 12 21
(10, 3) 23 32
(10, 3) 34 43
(4, 3) 45 48