import numpy as np
import pandas as pd
from pandas import Series,DataFrame
创建多级index的series
s1 = Series(np.random.randn(6))
s1
0 0.001150
1 -0.019820
2 0.922708
3 0.656468
4 -0.719109
5 0.957127
dtype: float64
s1 = Series(np.random.randn(6),index=[["1","1","1","2","2","2"],["a","b","c","a","b","c"]])
s1 #此时s1就有两级index ,是一个二维结构,因此可以向dataframe转换
1 a 0.925230
b 0.766318
c 0.687740
2 a -0.622509
b -0.593884
c -0.119193
dtype: float64
s1["1"]
a 0.925230
b 0.766318
c 0.687740
dtype: float64
type(s1["1"]) #可以看到其实这个series是由s1["1"],s1["2"]这两个series组成
pandas.core.series.Series
s1["1"]["a"]
0.92522961872552423
s1[:,"a"]
1 0.925230
2 -0.622509
dtype: float64
多级index的series与dataframe的互换
df1 = s1.unstack()
df1
a | b | c | |
---|---|---|---|
1 | 0.925230 | 0.766318 | 0.687740 |
2 | -0.622509 | -0.593884 | -0.119193 |
df2 = DataFrame([s1["1"],s1["2"]])
df2
a | b | c | |
---|---|---|---|
0 | 0.925230 | 0.766318 | 0.687740 |
1 | -0.622509 | -0.593884 | -0.119193 |
s2 = df1.unstack()
s2
a 1 0.925230
2 -0.622509
b 1 0.766318
2 -0.593884
c 1 0.687740
2 -0.119193
dtype: float64
s2 = df1.T.unstack()
s2
1 a 0.925230
b 0.766318
c 0.687740
2 a -0.622509
b -0.593884
c -0.119193
dtype: float64
创建多级index的dataframe
df = DataFrame(np.arange(16).reshape(4,4))
df
0 | 1 | 2 | 3 | |
---|---|---|---|---|
0 | 0 | 1 | 2 | 3 |
1 | 4 | 5 | 6 | 7 |
2 | 8 | 9 | 10 | 11 |
3 | 12 | 13 | 14 | 15 |
df = DataFrame(np.arange(16).reshape(4,4),index=[["a","a","b","b"],["1","2","1","2"]])
df
0 | 1 | 2 | 3 | ||
---|---|---|---|---|---|
a | 1 | 0 | 1 | 2 | 3 |
2 | 4 | 5 | 6 | 7 | |
b | 1 | 8 | 9 | 10 | 11 |
2 | 12 | 13 | 14 | 15 |
df = DataFrame(np.arange(16).reshape(4,4),index=[["a","a","b","b"],["1","2","1","2"]],columns=[["BJ","BJ","SH","GZ"],[8,9,8,8]])
df
BJ | SH | GZ | |||
---|---|---|---|---|---|
8 | 9 | 8 | 8 | ||
a | 1 | 0 | 1 | 2 | 3 |
2 | 4 | 5 | 6 | 7 | |
b | 1 | 8 | 9 | 10 | 11 |
2 | 12 | 13 | 14 | 15 |
df["BJ"]
8 | 9 | ||
---|---|---|---|
a | 1 | 0 | 1 |
2 | 4 | 5 | |
b | 1 | 8 | 9 |
2 | 12 | 13 |
type(df["BJ"])
pandas.core.frame.DataFrame
df["BJ"][8]
a 1 0
2 4
b 1 8
2 12
Name: 8, dtype: int32
df["BJ"][8]["a"]
1 0
2 4
Name: 8, dtype: int32
df["BJ"][8]["a"]["1"] #多级index的dataframe的引用顺序 一级列名->二级列名->一级index名->二级index名
0
df["BJ"][8]["a"][1] #多级index的dataframe的引用顺序 一级列名->二级列名->一级index名->二级index
4