dates = pd.date_range('20130101',periods=6)
df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])
df.iloc[0,1]= np.nan
df.iloc[1,2]= np.nan
print(df)#out:
A B C D
2013-01-010 NaN 2.032013-01-0245.0 NaN 72013-01-0389.010.0112013-01-041213.014.0152013-01-051617.018.0192013-01-062021.022.023
dropna处理NULL数据
print(df.dropna(axis=0,how='any'))#去掉存在值为空的行 #how={'any','all'} all:行或列数据全部为Nan时才丢掉#out:
A B C D
2013-01-0389.010.0112013-01-041213.014.0152013-01-051617.018.0192013-01-062021.022.023
fillna填充NULL数据
print(df.fillna(value=0))#给空的地方填入0
A B C D
2013-01-0100.02.032013-01-0245.00.072013-01-0389.010.0112013-01-041213.014.0152013-01-051617.018.0192013-01-062021.022.023
isnull寻找NULL数据
print(df.isnull())
out:
A B C D
2013-01-01FalseTrueFalseFalse2013-01-02FalseFalseTrueFalse2013-01-03FalseFalseFalseFalse2013-01-04FalseFalseFalseFalse2013-01-05FalseFalseFalseFalse2013-01-06FalseFalseFalseFalse
Pandas文件导入、导出
data = pd.read_excel('test.xls')#文件导入print(data)#out:
Student ID name age gender
00 kelly 11 Female
11 lory 12 Female
22 dlsaj 11 Male
33 sddsds 11 Male
44 sdsd 11 Male
55 sds 11 Female
66 dsds 11 Female
77 sdsd 11 Male
88 sdsdsds 22 Male
data.to_pickle('student.pickle')#文件导出
Pandas学习笔记(4)-Pandas处理丢失数据dates = pd.date_range('20130101',periods=6)df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])df.iloc[0,1] = np.nandf.iloc[1,2] = np.nanprint(df)#out: A B C D2013-01-