pandas系列之-DataFrame(1)

常见DataFrame数据处理

	import numpy as np
	import pandas as pd

	from numpy.random import randn
	np.random.seed(101)
df = pd.DataFrame(randn(5,4),index='A B C D E'.split(),columns='W X Y Z'.split())
df
WXYZ
A0.9079690.5038260.651118-0.319318
B-0.8480770.605965-2.0181680.740122
C0.528813-0.5890010.188695-0.758872
D-0.9332370.9550570.1907941.978757
E2.6059670.6835090.3026651.693723
df['W']
A    0.907969
B   -0.848077
C    0.528813
D   -0.933237
E    2.605967
Name: W, dtype: float64
df[['Y','Z']]
YZ
A0.651118-0.319318
B-2.0181680.740122
C0.188695-0.758872
D0.1907941.978757
E0.3026651.693723
df[1:3]
WXYZ
B-0.8480770.605965-2.0181680.740122
C0.528813-0.5890010.188695-0.758872
df.loc[['A','B'],['Y','Z']]
YZ
A0.651118-0.319318
B-2.0181680.740122
df.iloc[1]
W   -0.848077
X    0.605965
Y   -2.018168
Z    0.740122
Name: B, dtype: float64
df.iloc[1:3]
WXYZ
B-0.8480770.605965-2.0181680.740122
C0.528813-0.5890010.188695-0.758872
df>0
WXYZ
ATrueTrueTrueFalse
BFalseTrueFalseTrue
CTrueFalseTrueFalse
DFalseTrueTrueTrue
ETrueTrueTrueTrue
type(df>0)
pandas.core.frame.DataFrame
df[df>0]
WXYZ
A0.9079690.5038260.651118NaN
BNaN0.605965NaN0.740122
C0.528813NaN0.188695NaN
DNaN0.9550570.1907941.978757
E2.6059670.6835090.3026651.693723
df[df['W']>0]
WXYZ
A0.9079690.5038260.651118-0.319318
C0.528813-0.5890010.188695-0.758872
E2.6059670.6835090.3026651.693723
df[df['W']>0][['Z','Y']]
ZY
A-0.3193180.651118
C-0.7588720.188695
E1.6937230.302665
df[(df['W']>0) & (df['Y'] > 0.5)]
WXYZ
A0.9079690.5038260.651118-0.319318
df.W

A    0.907969
B   -0.848077
C    0.528813
D   -0.933237
E    2.605967
Name: W, dtype: float64
df['new'] = df['W'] + df['Y']
df
WXYZnew
A0.9079690.5038260.651118-0.3193181.559087
B-0.8480770.605965-2.0181680.740122-2.866245
C0.528813-0.5890010.188695-0.7588720.717509
D-0.9332370.9550570.1907941.978757-0.742443
E2.6059670.6835090.3026651.6937232.908633
df.drop('new',axis = 1,inplace=True)
df
WXYZ
A0.9079690.5038260.651118-0.319318
B-0.8480770.605965-2.0181680.740122
C0.528813-0.5890010.188695-0.758872
D-0.9332370.9550570.1907941.978757
E2.6059670.6835090.3026651.693723
df.drop('E',axis=0,inplace = True)
df
WXYZ
A0.9079690.5038260.651118-0.319318
B-0.8480770.605965-2.0181680.740122
C0.528813-0.5890010.188695-0.758872
D-0.9332370.9550570.1907941.978757
df.loc['A']
W    0.907969
X    0.503826
Y    0.651118
Z   -0.319318
Name: A, dtype: float64
df.loc[['A','B'],['W','Y']]
WY
A0.9079690.651118
B-0.848077-2.018168
df.reset_index()
indexWXYZ
0A0.9079690.5038260.651118-0.319318
1B-0.8480770.605965-2.0181680.740122
2C0.528813-0.5890010.188695-0.758872
3D-0.9332370.9550570.1907941.978757
df.reindex()
indexnewWXYZ
00SH0.9079690.5038260.651118-0.319318
11BJ-0.8480770.605965-2.0181680.740122
22CD0.528813-0.5890010.188695-0.758872
33WU-0.9332370.9550570.1907941.978757
newcolume = 'SH BJ CD WU'.split()
df['new'] = newcolume
df

indexnewWXYZ
00SH0.9079690.5038260.651118-0.319318
11BJ-0.8480770.605965-2.0181680.740122
22CD0.528813-0.5890010.188695-0.758872
33WU-0.9332370.9550570.1907941.978757

df.set_index('new')
indexWXYZ
new
SH00.9079690.5038260.651118-0.319318
BJ1-0.8480770.605965-2.0181680.740122
CD20.528813-0.5890010.188695-0.758872
WU3-0.9332370.9550570.1907941.978757
df
indexnewWXYZ
00SH0.9079690.5038260.651118-0.319318
11BJ-0.8480770.605965-2.0181680.740122
22CD0.528813-0.5890010.188695-0.758872
33WU-0.9332370.9550570.1907941.978757
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)
hier_index
MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
df = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])
df
AB
G11-1.706086-1.159119
2-0.1348410.390528
30.1669050.184502
G210.8077060.072960
20.6387870.329646
3-0.497104-0.754070
df.loc['G1']
AB
1-1.706086-1.159119
2-0.1348410.390528
30.1669050.184502
df.index.names
FrozenList([None, None])
df.index.names = ['Group','Num']
df
AB
GroupNum
G11-1.706086-1.159119
2-0.1348410.390528
30.1669050.184502
G210.8077060.072960
20.6387870.329646
3-0.497104-0.754070
df.xs('G1')
AB
Num
1-1.706086-1.159119
2-0.1348410.390528
30.1669050.184502
df.xs(['G1',1])
A   -1.706086
B   -1.159119
Name: (G1, 1), dtype: float64
df.xs(1,level='Num')
AB
Group
G1-1.706086-1.159119
G20.8077060.072960

处理缺失数据##

import numpy as np
import pandas as pd
df = pd.DataFrame({'A':[1,2,np.nan],
                  'B':[5,np.nan,np.nan],
                  'C':[1,2,3]})
df
ABC
01.05.01
12.0NaN2
2NaNNaN3
df.dropna()
ABC
01.05.01
df.dropna(axis=1)
C
01
12
23
df.dropna(thresh=2)
ABC
01.05.01
12.0NaN2
df.fillna(value='FILL VALUE')
ABC
0151
12FILL VALUE2
2FILL VALUEFILL VALUE3
df['A'].fillna(value=df['A'].mean())
0    1.0
1    2.0
2    1.5
Name: A, dtype: float64
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值