分层索引
import pandas as pd
import numpy as np
生成数据
data = pd.Series(np.random.randn(9),index=[['a','a','a','b','b','c','c','d','d'],[1,2,3,1,3,1,2,2,3]])
data
a 1 0.269045
2 -0.885298
3 0.158806
b 1 0.378847
3 0.301012
c 1 -1.255018
2 -1.453824
d 2 -0.088124
3 -1.746533
dtype: float64
data.index
MultiIndex([('a', 1),
('a', 2),
('a', 3),
('b', 1),
('b', 3),
('c', 1),
('c', 2),
('d', 2),
('d', 3)],
)
分层索引
data['b']
1 0.378847
3 0.301012
dtype: float64
data['b':'c']
b 1 0.378847
3 0.301012
c 1 -1.255018
2 -1.453824
dtype: float64
data.loc[['b','c']]
b 1 0.378847
3 0.301012
c 1 -1.255018
2 -1.453824
dtype: float64
在内部层级中进行选择
data.loc[:, 2]
a -0.885298
c -1.453824
d -0.088124
dtype: float64
使用unstack将数据重新排列
data.unstack()
1 | 2 | 3 | |
---|---|---|---|
a | 0.269045 | -0.885298 | 0.158806 |
b | 0.378847 | NaN | 0.301012 |
c | -1.255018 | -1.453824 | NaN |
d | NaN | -0.088124 | -1.746533 |
unstack的反操作为stack
data.unstack().stack()
a 1 0.269045
2 -0.885298
3 0.158806
b 1 0.378847
3 0.301012
c 1 -1.255018
2 -1.453824
d 2 -0.088124
3 -1.746533
dtype: float64
分层索引
frame = pd.DataFrame(np.arange(12).reshape((4,3)),index=[['a','a','b','b'],[1,2,1,2]],columns=[['Ohio','Ohio','Colorado'],['Green','Red','Gerrn']])
frame
Ohio | Colorado | |||
---|---|---|---|---|
Green | Red | Gerrn | ||
a | 1 | 0 | 1 | 2 |
2 | 3 | 4 | 5 | |
b | 1 | 6 | 7 | 8 |
2 | 9 | 10 | 11 |
添加索引名称
frame.index.names = ['key1','key2']
frame.columns.names = ['state','color']
frame
state | Ohio | Colorado | ||
---|---|---|---|---|
color | Green | Red | Gerrn | |
key1 | key2 | |||
a | 1 | 0 | 1 | 2 |
2 | 3 | 4 | 5 | |
b | 1 | 6 | 7 | 8 |
2 | 9 | 10 | 11 |
列索引
frame['Ohio']
color | Green | Red | |
---|---|---|---|
key1 | key2 | ||
a | 1 | 0 | 1 |
2 | 3 | 4 | |
b | 1 | 6 | 7 |
2 | 9 | 10 |
from pandas import MultiIndex
使用MultiIndex创造带有层级的列
MultiIndex.from_arrays([['Ohio','Ohio','Colorado'],['Green','Red','Gerrn']],names=['state','color'])
MultiIndex([( 'Ohio', 'Green'),
( 'Ohio', 'Red'),
('Colorado', 'Gerrn')],
names=['state', 'color'])