Pandas 的 reset_index
函数主要分为 Series 对象的 reset_index()
和 DataFrame 对象的 reset_index()
,功能是用来重置索引的,二者的用法有所不同。
一、Series 的 reset_index()
1. 数据准备
import pandas as pd
examinations = pd.DataFrame({
"student_id": [1, 1, 1, 2, 1, 1, 13, 13, 13, 2, 1],
"subject_name": ["Math", "Physics", "Programming", "Programming", "Physics", "Math", "Math", "Programming",
"Physics", "Math", "Math"]
})
print(df)
student_id subject_name
0 1 Math
1 1 Physics
2 1 Programming
3 2 Programming
4 1 Physics
5 1 Math
6 13 Math
7 13 Programming
8 13 Physics
9 2 Math
10 1 Math
2. 方法声明
def reset_index(
self,
level: IndexLabel | None = None, # 复合索引时,指定删除哪一级,默认删除所有
*,
drop: bool = False, # 是否删除原索引列
name: Level = lib.no_default, # 重命名 Series 数值列
inplace: bool = False, # 是否在原对象上操作
allow_duplicates: bool = False, # 是否允许重复
) -> DataFrame | Series | None
3. 基本使用
# 1.将 DataFrame 聚合成 Series
res = examinations.groupby(by=["student_id", "subject_name"]).size()
print(res, type(res)) # MultiIndex(names=['student_id', 'subject_name'])
student_id subject_name
1 Math 3
Physics 2
Programming 1
2 Math 1
Programming 1
13 Math 1
Physics 1
Programming 1
dtype: int64 <class 'pandas.core.series.Series'>
# 2.重置索引,使用默认参数
print(res.reset_index(), type(res.reset_index()))
student_id subject_name 0
0 1 Math 3
1 1 Physics 2
2 1 Programming 1
3 2 Math 1
4 2 Programming 1
5 13 Math 1
6 13 Physics 1
7 13 Programming 1
<class 'pandas.core.frame.DataFrame'>
# 3.重置索引,并命名数值列
print(res.reset_index(name="attended_exams"))
student_id subject_name attended_exams
0 1 Math 3
1 1 Physics 2
2 1 Programming 1
3 2 Math 1
4 2 Programming 1
5 13 Math 1
6 13 Physics 1
7 13 Programming 1
# 4.重置索引,并删除原索引列
print(res.reset_index(drop=True), type(res.reset_index(drop=True)))
0 3
1 2
2 1
3 1
4 1
5 1
6 1
7 1
dtype: int64 <class 'pandas.core.series.Series'>
# 5.指定重置复合索引中的某一级索引
print(res.reset_index(level="student_id"))
student_id 0
subject_name
Math 1 3
Physics 1 2
Programming 1 1
Math 2 1
Programming 2 1
Math 13 1
Physics 13 1
Programming 13 1
二、DataFrame 的 reset_index()
1. 数据准备
import pandas as pd
df = pd.DataFrame(data={
"Jan": [1, 2, 3],
"Feb": [4, 5, 6],
"Mar": [7, 8, 9]
}, index=["a", "b", "c"])
print(df)
Jan Feb Mar
a 1 4 7
b 2 5 8
c 3 6 9
2. 方法声明
def reset_index(
self,
level: IndexLabel | None = None, # 复合索引时,指定删除哪一级,默认删除所有
*,
drop: bool = False, # 是否删除原索引列
inplace: bool = False, # 是否在原对象上操作
col_level: Hashable = 0,
col_fill: Hashable = "",
allow_duplicates: bool | lib.NoDefault = lib.no_default,
names: Hashable | Sequence[Hashable] | None = None, # 重命名原索引列,默认名为 Index
) -> DataFrame | None
3. 基本使用
# 1.重置索引,使用默认参数
print(df.reset_index())
index Jan Feb Mar
0 a 1 4 7
1 b 2 5 8
2 c 3 6 9
# 2.重置索引,并删除原索引列
print(df.reset_index(drop=True))
Jan Feb Mar
0 1 4 7
1 2 5 8
2 3 6 9
# 3.重置索引,并重命名原索引列
print(df.reset_index(names="A"))
A Jan Feb Mar
0 a 1 4 7
1 b 2 5 8
2 c 3 6 9