机器学习—数据科学包三
pandas索引
多级索引
a = [['a','a','a','b','b','c','c'],[1,2,3,1,2,2,3]]
t = list(zip(*a))
t
output:
[('a', 1), ('a', 2), ('a', 3), ('b', 1), ('b', 2), ('c', 2), ('c', 3)]
index = pd.MultiIndex.from_tuples(t, names=['level1','level2'])
index
output:
MultiIndex(levels=[['a', 'b', 'c'], [1, 2, 3]],
labels=[[0, 0, 0, 1, 1, 2, 2], [0, 1, 2, 0, 1, 1, 2]],
names=['level1', 'level2'])
s = pd.Series(np.random.rand(7),index=index)
s
output:
level1 level2
a 1 0.693942
2 0.363457
3 0.896145
b 1 0.542912
2 0.803361
c 2 0.399862
3 0.181466
dtype: float64
s['b']
output:
level2
1 0.542912
2 0.803361
dtype: float64
s['b':'c']
output:
level1 level2
b 1 0.542912
2 0.803361
c 2 0.399862
3 0.181466
dtype: float64
s[['a','c']]
output:
level1 level2
a 1 0.693942
2 0.363457
3 0.896145
c 2 0.399862
3 0.181466
dtype: float64
s[:,2]
output:
level1
a 0.363457
b 0.803361
c 0.399862
dtype: float64
df = pd.DataFrame(np.random.randint(1,10,(4,3)),
index=[['a','a','b','b'],[1,2,1,2]],
columns=[['one','one','two'],['blue','red','blue']])
df.index.names = ['row-1','row-2']
df.columns.names = ['col-1','col-2']
df
output:
col-1 one two
col-2 blue red blue
row-1 row-2
a 1 3 9 5
2 9 6 6
b 1 1 1 7
2 1 1 2
df.loc['a']
output:
col-1 one two
col-2 blue red blue
row-2
1 3 9 5
2 9 6 6
df.loc['a',1]
output:
col-1 col-2
one blue 3
red 9
two blue 5
Name: (a, 1), dtype: int64