1.删除指定的行或列
t1 = pd.DataFrame(np.arange(12).reshape((3,4)).astype("int"), index = list("abc"), columns = list("ABCD"))
t1.drop('a', inplace = False)
t1.drop('A', axis = 1, inplace = False)
t1.drop(['A', 'B'], axis = 1, inplace = False)
2.空缺值处理
t1 = pd.DataFrame(np.arange(12).reshape((3,4)).astype("int"), index = list("abc"), columns = list("ABCD"))
t1.iloc[0,0] = np.nan
pd.isnull(t1)
pd.notnull(t1)
t1[pd.notnull(t1['A'])]
t1.dropna(axis = 0, how = "any", inplace =False)
t1.dropna(axis = 0, how = "all", inplace = False)
t1.fillna(t1.mean())
3.重复值处理
df= pd.DataFrame({'k1': [ 's1']* 3 + ['s2']* 5,'k2' : [1, 2, 3, 4, 5, 6, 4,4]})
print(df)
df.duplicated()
df.duplicated('k2')
df.duplicated(['k1', 'k2'])
df.drop_duplicates(inplace = False)
df.drop_duplicates('k2',inplace = False)
df.drop_duplicates(['k1', 'k2'],inplace = False)