数据缺失值处理
import pandas as pd
import numpy as np
date1 = pd.date_range("20170813" ,periods=6 )
df = pd.DataFrame(data=np.random.randint(3 ,9 ,size=(6 ,9 )),index=date1,columns=np.arange(9 ))
df.iloc[4 ,5 ] = np.nan
df.iloc[5 ,7 ] = np.nan
pprint(df)
0 1 2 3 4 5 6 7 8
2017 -08 -13 8 3 5 3 3 3.0 4 3.0 4
2017 -08 -14 8 6 8 8 8 5.0 6 5.0 6
2017 -08 -15 4 6 5 5 4 6.0 7 6.0 8
2017 -08 -16 3 3 3 3 6 7.0 4 7.0 5
2017 -08 -17 6 6 8 3 8 NaN 7 7.0 6
2017 -08 -18 4 7 4 4 3 4.0 3 NaN 6
print(df.dropna(axis=0 ,how="any" ))
0 1 2 3 4 5 6 7 8
2017 -08 -13 8 3 5 3 3 3.0 4 3.0 4
2017 -08 -14 8 6 8 8 8 5.0 6 5.0 6
2017 -08 -15 4 6 5 5 4 6.0 7 6.0 8
2017 -08 -16 3 3 3 3 6 7.0 4 7.0 5
print(df.dropna(axis=1 ,how="any" ))
0 1 2 3 4 6 8
2017 -08 -13 8 3 5 3 3 4 4
2017 -08 -14 8 6 8 8 8 6 6
2017 -08 -15 4 6 5 5 4 7 8
2017 -08 -16 3 3 3 3 6 4 5
2017 -08 -17 6 6 8 3 8 7 6
2017 -08 -18 4 7 4 4 3 3 6
print(df.fillna(value=0 ))
0 1 2 3 4 5 6 7 8
2017 -08 -13 8 3 5 3 3 3.0 4 3.0 4
2017 -08 -14 8 6 8 8 8 5.0 6 5.0 6
2017 -08 -15 4 6 5 5 4 6.0 7 6.0 8
2017 -08 -16 3 3 3 3 6 7.0 4 7.0 5
2017 -08 -17 6 6 8 3 8 0.0 7 7.0 6
2017 -08 -18 4 7 4 4 3 4.0 3 0.0 6
print(df.isnull())
0 1 2 3 4 5 6 7 8
2017 -08 -13 False False False False False False False False False
2017 -08 -14 False False False False False False False False False
2017 -08 -15 False False False False False False False False False
2017 -08 -16 False False False False False False False False False
2017 -08 -17 False False False False False True False False False
2017 -08 -18 False False False False False False False True False
print(np.any(df.isnull()))
True