import pandas as pd
import numpy as np
df = pd. read_csv( 'data/table_missing.csv' )
df. head( )
School
Class
ID
Gender
Address
Height
Weight
Math
Physics
0
S_1
C_1
NaN
M
street_1
173
NaN
34.0
A+
1
S_1
C_1
NaN
F
street_2
192
NaN
32.5
B+
2
S_1
C_1
1103.0
M
street_2
186
NaN
87.2
B+
3
S_1
NaN
NaN
F
street_2
167
81.0
80.4
NaN
4
S_1
C_1
1105.0
NaN
street_4
159
64.0
84.8
A-
df[ 'Physics' ] . isna( ) . head( )
0 False
1 False
2 False
3 True
4 False
Name: Physics, dtype: bool
df[ 'Physics' ] . notna( ) . head( )
0 True
1 True
2 True
3 False
4 True
Name: Physics, dtype: bool
df. isna( ) . head( )
School
Class
ID
Gender
Address
Height
Weight
Math
Physics
0
False
False
True
False
False
False
True
False
False
1
False
False
True
False
False
False
True
False
False
2
False
False
False
False
False
False
True
False
False
3
False
True
True
False
False
False
False
False
True
4
False
False
False
True
False
False
False
False
False
df. isna( ) . sum ( )
School 0
Class 4
ID 6
Gender 7
Address 0
Height 0
Weight 13
Math 5
Physics 4
dtype: int64
df. info( )
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 School 35 non-null object
1 Class 31 non-null object
2 ID 29 non-null float64
3 Gender 28 non-null object
4 Address 35 non-null object
5 Height 35 non-null int64
6 Weight 22 non-null float64
7 Math 30 non-null float64
8 Physics 31 non-null object
dtypes: float64(3), int64(1), object(5)
memory usage: 2.6+ KB
df[ df[ 'Physics' ] . isna( ) ]
School
Class
ID
Gender
Address
Height
Weight
Math
Physics
3
S_1
NaN
NaN
F
street_2
167
81.0
80.4
NaN
8
S_1
C_2
1204.0
F
street_5
162
63.0
33.8
NaN
13
S_1
C_3
1304.0
NaN
street_2
195
70.0
85.2
NaN
22
S_2
C_2
2203.0
M
street_4
155
91.0
73.8
NaN
df[ df. notna( ) . all ( 1 ) ]
School
Class
ID
Gender
Address
Height
Weight
Math
Physics
5
S_1
C_2
1201.0
M
street_5
159
68.0
97.0
A-
6
S_1
C_2
1202.0
F
street_4
176
94.0
63.5
B-
12
S_1
C_3
1303.0
M
street_7
188
82.0
49.7
B
17
S_2
C_1
2103.0
M
street_4
157
61.0
52.5
B-
21
S_2
C_2
2202.0
F
street_7
194
77.0
68.5
B+
25
S_2
C_3
2301.0
F
street_4
157
78.0
72.3
B+
27
S_2
C_3
2303.0
F
street_7
190
99.0
65.9
C
28
S_2
C_3
2304.0
F
street_6
164
81.0
95.5
A-
29
S_2
C_3
2305.0
M
street_4
187
73.0
48.9
B
np. nan == np. nan
False
np. nan == 0
False
np. nan == None
False
df. equals( df)
True
type ( np. nan)
float
pd. Series( [ 1 , 2 , 3 ] ) . dtype
dtype('int64')
pd. Series( [ 1 , np. nan, 3 ] ) . dtype
dtype('float64')
pd. Series( [ 1 , np. nan, 3 ] , dtype= 'bool' )
0 True
1 True
2 True
dtype: bool
s = pd. Series( [ True , False ] , dtype= 'bool' )
s[ 1 ] = np. nan
s
0 1.0
1 NaN
dtype: float64
df[ 'ID' ] . dtype
dtype('float64')
df[ 'Math' ] . dtype
dtype('float64')
df[ 'Class' ] . dtype
dtype('O')