>>> frame = DataFrame(np.random.randn(4,3),columns=list('bde'),index=['Utah','Ohio','Texas','Oregon'])
>>> frame
b d e
Utah -0.418643 -1.614332 -0.137721
Ohio 0.280040 0.788998 -0.402196
Texas -0.990565 1.650905 1.458356
Oregon -0.662952 0.054194 1.225682
>>> f = lambda x:x.max()-x.min()
>>> frame.apply(f)
b 1.270605
d 3.265237
e 1.860553
dtype: float64
>>> frame.apply(f,axis=1)
Utah 1.476611
Ohio 1.191195
Texas 2.641470
Oregon 1.888635
dtype: float64
>>> def f(x):
... return Series([x.min(),x.max()],index=['min','max'])
...
>>> frame.apply(f)
b d e
min -0.990565 -1.614332 -0.402196
max 0.280040 1.650905 1.458356
// frame排序
>>> frame = DataFrame(np.random.randn(3,4),index=['one','two','three'],columns=['a','b','e','d'])
>>> frame
a b e d
one 1.231774 -0.462460 0.243845 1.073060
two -0.768736 1.157504 -0.866007 -0.147753
three 0.508047 1.046841 0.916512 -0.222779
>>> frame.sort_index()
a b e d
one 1.231774 -0.462460 0.243845 1.073060
three 0.508047 1.046841 0.916512 -0.222779
two -0.768736 1.157504 -0.866007 -0.147753
>>> frame.sort_index(axis=1)
a b d e
one 1.231774 -0.462460 1.073060 0.243845
two -0.768736 1.157504 -0.147753 -0.866007
three 0.508047 1.046841 -0.222779 0.916512
>>> frame.sort_index(axis=1,ascending=False)
e d b a
one 0.243845 1.073060 -0.462460 1.231774
two -0.866007 -0.147753 1.157504 -0.768736
three 0.916512 -0.222779 1.046841 0.508047
>>> frame.sort_values(by='b')
a b e d
one 1.231774 -0.462460 0.243845 1.073060
three 0.508047 1.046841 0.916512 -0.222779
two -0.768736 1.157504 -0.866007 -0.147753
>>> frame.loc['two','a']=0.5
>>> frame.loc['three','a']=0.5
>>> frame
a b e d
one 1.231774 -0.462460 0.243845 1.073060
two 0.500000 1.157504 -0.866007 -0.147753
three 0.500000 1.046841 0.916512 -0.222779
>>> frame.sort_values(by=['a','b'])
a b e d
three 0.500000 1.046841 0.916512 -0.222779
two 0.500000 1.157504 -0.866007 -0.147753
one 1.231774 -0.462460 0.243845 1.073060
// DataFrame过滤缺失数据
>>> df = DataFrame(np.random.randn(7,3))
>>> df.ix[:4,1]=np.nan
>>> df.ix[:2,2]=np.nan
>>> df
0 1 2
0 -0.555661 NaN NaN
1 0.756682 NaN NaN
2 -0.830897 NaN NaN
3 0.932049 NaN 0.787962
4 -0.591421 NaN 2.315367
5 -1.270288 0.681385 0.401054
6 2.029551 -0.254169 0.336718
>>> df.dropna(thresh=3)
0 1 2
5 -1.270288 0.681385 0.401054
6 2.029551 -0.254169 0.336718
>>> df.dropna(thresh=2)
0 1 2
3 0.932049 NaN 0.787962
4 -0.591421 NaN 2.315367
5 -1.270288 0.681385 0.401054
6 2.029551 -0.254169 0.336718
>>> df.dropna(how='all')
0 1 2
0 -0.555661 NaN NaN
1 0.756682 NaN NaN
2 -0.830897 NaN NaN
3 0.932049 NaN 0.787962
4 -0.591421 NaN 2.315367
5 -1.270288 0.681385 0.401054
6 2.029551 -0.254169 0.336718
>>> df.ix[0,0]=np.nan
>>> df
0 1 2
0 NaN NaN NaN
1 0.756682 NaN NaN
2 -0.830897 NaN NaN
3 0.932049 NaN 0.787962
4 -0.591421 NaN 2.315367
5 -1.270288 0.681385 0.401054
6 2.029551 -0.254169 0.336718
>>> df.dropna(how='all')
0 1 2
1 0.756682 NaN NaN
2 -0.830897 NaN NaN
3 0.932049 NaN 0.787962
4 -0.591421 NaN 2.315367
5 -1.270288 0.681385 0.401054
6 2.029551 -0.254169 0.336718
>>> df.ix[0:6,1]=np.nan
>>> df
0 1 2
0 NaN NaN NaN
1 0.756682 NaN NaN
2 -0.830897 NaN NaN
3 0.932049 NaN 0.787962
4 -0.591421 NaN 2.315367
5 -1.270288 NaN 0.401054
6 2.029551 NaN 0.336718
>>> df.dropna(axis=1,how='all')
0 2
0 NaN NaN
1 0.756682 NaN
2 -0.830897 NaN
3 0.932049 0.787962
4 -0.591421 2.315367
5 -1.270288 0.401054
6 2.029551 0.336718
// DataFrame处理填补缺失数据
>>> df.fillna({1:0.5,2:-1})
0 1 2
0 NaN 0.5 -1.000000
1 0.756682 0.5 -1.000000
2 -0.830897 0.5 -1.000000
3 0.932049 0.5 0.787962
4 -0.591421 0.5 2.315367
5 -1.270288 0.5 0.401054
6 2.029551 0.5 0.336718
>>> df.fillna(0)
0 1 2
0 0.000000 0.0 0.000000
1 0.756682 0.0 0.000000
2 -0.830897 0.0 0.000000
3 0.932049 0.0 0.787962
4 -0.591421 0.0 2.315367
5 -1.270288 0.0 0.401054
6 2.029551 0.0 0.336718
>>> df.fillna(method='ffill')
0 1 2
0 NaN NaN NaN
1 0.756682 NaN NaN
2 -0.830897 NaN NaN
3 0.932049 NaN 0.787962
4 -0.591421 NaN 2.315367
5 -1.270288 NaN 0.401054
6 2.029551 NaN 0.336718
>>> df.ix[0,1]=0.233
>>> df
0 1 2
0 NaN 0.233 NaN
1 0.756682 NaN NaN
2 -0.830897 NaN NaN
3 0.932049 NaN 0.787962
4 -0.591421 NaN 2.315367
5 -1.270288 NaN 0.401054
6 2.029551 NaN 0.336718
>>> df.fillna(method='ffill')
0 1 2
0 NaN 0.233 NaN
1 0.756682 0.233 NaN
2 -0.830897 0.233 NaN
3 0.932049 0.233 0.787962
4 -0.591421 0.233 2.315367
5 -1.270288 0.233 0.401054
6 2.029551 0.233 0.336718
pandas基础知识学习(2)-数据基本操作
最新推荐文章于 2024-09-09 15:48:35 发布