>> > import pandas as pd
>> > import numpy as np
>> > position= pd. read_csv( 'test.csv' , encoding= 'gbk' )
>> > position
studentId name course score 0 1 jack chinese 88.0 1 1 jack math 76.0 2 1 jack english 93.0 3 2 nacy chinese 68.0 4 2 nacy math NaN 5 2 nacy english 82.0 6 3 alice chinese 87.0 7 3 alice math 89.0 8 3 alice english 74.0 9 3 alice english 74.0
1.空值
position. loc[ position. name== 'jack' , 'name' ] = np. NaN
position
studentId name course score 0 1 NaN chinese 88.0 1 1 NaN math 76.0 2 1 NaN english 93.0 3 2 nacy chinese 68.0 4 2 nacy math NaN 5 2 nacy english 82.0 6 3 alice chinese 87.0 7 3 alice math 89.0 8 3 alice english 74.0 9 3 alice english 74.0
1)填充
>> > position. fillna( 'jack' )
studentId name course score 0 1 jack chinese 88 1 1 jack math 76 2 1 jack english 93 3 2 nacy chinese 68 4 2 nacy math jack 5 2 nacy english 82 6 3 alice chinese 87 7 3 alice math 89 8 3 alice english 74 9 3 alice english 74
>> > position. name= position. name. fillna( 'jack' )
>> > position
studentId name course score 0 1 jack chinese 88.0 1 1 jack math 76.0 2 1 jack english 93.0 3 2 nacy chinese 68.0 4 2 nacy math NaN 5 2 nacy english 82.0 6 3 alice chinese 87.0 7 3 alice math 89.0 8 3 alice english 74.0 9 3 alice english 74.0
2)删除
>> > position. dropna( )
studentId name course score 0 1 jack chinese 88.0 1 1 jack math 76.0 2 1 jack english 93.0 3 2 nacy chinese 68.0 5 2 nacy english 82.0 6 3 alice chinese 87.0 7 3 alice math 89.0 8 3 alice english 74.0 9 3 alice english 74.0
position. dropna( axis= 1 )
studentId name course 0 1 jack chinese 1 1 jack math 2 1 jack english 3 2 nacy chinese 4 2 nacy math 5 2 nacy english 6 3 alice chinese 7 3 alice math 8 3 alice english 9 3 alice english
2.重复值
>> > position. duplicated( )
0 False
1 False
2 False
3 False
4 False
5 False
6 False
7 False
8 False
9 True
dtype: bool
>> > position[ position. duplicated( ) ]
studentId name course score 9 3 alice english 74.0
>> > position[ ~ position. duplicated( ) ]
studentId name course score 0 1 NaN chinese 88.0 1 1 NaN math 76.0 2 1 NaN english 93.0 3 2 nacy chinese 68.0 4 2 nacy math NaN 5 2 nacy english 82.0 6 3 alice chinese 87.0 7 3 alice math 89.0 8 3 alice english 74.0
>> > position. drop_duplicates( )
studentId name course score 0 1 NaN chinese 88.0 1 1 NaN math 76.0 2 1 NaN english 93.0 3 2 nacy chinese 68.0 4 2 nacy math NaN 5 2 nacy english 82.0 6 3 alice chinese 87.0 7 3 alice math 89.0 8 3 alice english 74.0