import numpy as np
import pandas as pd
from pandas import Series, DataFrame
# 多级Series
s1 = Series(np.random.randn(6), index=[['1','1','1','2','2','2'],['a','b','c','a','b','c']])
s1
Out[6]:
1 a 2.901314
b -1.064644
c -0.896902
2 a 0.257884
b 0.460760
c -1.207748
dtype: float64
# 下面一级也是Series
s1['1']
Out[7]:
a 2.901314
b -1.064644
c -0.896902
dtype: float64
type(s1['1'])
Out[8]: pandas.core.series.Series
# 访问
s1['1']['a']
Out[9]: 2.9013138285917472
# 获取第二级Series
s1[:,'a']
Out[10]:
1 2.901314
2 0.257884
dtype: float64
type(s1[:,'a'])
Out[11]: pandas.core.series.Series
# 与DataFrame转换
df1 = s1.unstack()
df1
Out[13]:
a b c
1 2.901314 -1.064644 -0.896902
2 0.257884 0.460760 -1.207748
df2 = DataFrame([s1['1'],s1['2']])
# DataFrame也可以有多个Series组成
df2
Out[15]:
a b c
0 2.901314 -1.064644 -0.896902
1 0.257884 0.460760 -1.207748
# DataFrame转化为Series
n [16]: s2 = df1.unstack()
s2
Out[17]:
a 1 2.901314
2 0.257884
b 1 -1.064644
2 0.460760
c 1 -0.896902
2 -1.207748
dtype: float64
s2 = df1.T.unstack()
s2
Out[19]:
1 a 2.901314
b -1.064644
c -0.896902
2 a 0.257884
b 0.460760
c -1.207748
dtype: float64
# 多级DataFrame
df = DataFrame(np.arange(16).reshape(4,4))
df
Out[21]:
0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
3 12 13 14 15
# 多级index
df = DataFrame(np.arange(16).reshape(4,4), index=[['a','a','b','b'],[1,2,1,2]])
df
Out[23]:
0 1 2 3
a 1 0 1 2 3
2 4 5 6 7
b 1 8 9 10 11
2 12 13 14 15
# 多级colunms
df = DataFrame(np.arange(16).reshape(4,4), index=[['a','a','b','b'],[1,2,1,2]], columns=[['BJ','BJ','SH','GZ'],[8,9,8,8]])
df
Out[26]:
BJ SH GZ
8 9 8 8
a 1 0 1 2 3
2 4 5 6 7
b 1 8 9 10 11
2 12 13 14 15
# 访问
df['BJ']
Out[27]:
8 9
a 1 0 1
2 4 5
b 1 8 9
2 12 13
# 下一级还是DataFrame
type(df['BJ'])
Out[28]: pandas.core.frame.DataFrame
df['BJ'][8]
Out[29]:
a 1 0
2 4
b 1 8
2 12
Name: 8, dtype: int32
---------------------