eles = pd.read_csv("/home/aistudio/data/data20507/elements.csv")
eles.isna().any()
atomic number False
symbol False
name False
atomic mass False
CPK False
electronic configuration False
electronegativity True
atomic radius True
ion radius True
van der Waals radius True
IE-1 True
EA True
standard state True
bonding type True
melting point True
boiling point True
density True
metal False
year discovered False
group False
period False
dtype: bool
(eles.shape[0]- eles.count())/ eles.shape[0]
atomic number 0.000000
symbol 0.000000
name 0.000000
atomic mass 0.000000
CPK 0.000000
electronic configuration 0.000000
electronegativity 0.177966
atomic radius 0.398305
ion radius 0.220339
van der Waals radius 0.677966
IE-1 0.135593
EA 0.279661
standard state 0.161017
bonding type 0.169492
melting point 0.144068
boiling point 0.203390
density 0.186441
metal 0.000000
year discovered 0.000000
group 0.000000
period 0.000000
dtype: float64
eles_nona = eles.dropna()
eles_nona.isna().any()
atomic number False
symbol False
name False
atomic mass False
CPK False
electronic configuration False
electronegativity False
atomic radius False
ion radius False
van der Waals radius False
IE-1 False
EA False
standard state False
bonding type False
melting point False
boiling point False
density False
metal False
year discovered False
group False
period False
dtype: bool
persons = pd.read_csv("/home/aistudio/data/data20507/Person.csv")# 为了适应平台要求,数据的名称与教材中的稍有差异
pdf = persons.sample(20)# ①
pdf['Height-na']= np.where(pdf['Height']%5==0, np.nan, pdf['Height'])# ②
pdf
Gender
Height
Weight
Index
Height-na
64
Male
175
135
5
NaN
225
Female
155
144
5
NaN
484
Female
188
115
4
188.0
293
Female
165
83
4
NaN
102
Male
161
155
5
161.0
282
Female
147
94
5
147.0
139
Male
159
124
5
159.0
66
Female
172
96
4
172.0
365
Male
141
80
5
141.0
397
Male
169
136
5
169.0
18
Male
144
145
5
144.0
172
Male
167
151
5
167.0
443
Male
152
146
5
152.0
358
Female
180
58
1
NaN
447
Female
176
121
4
176.0
251
Male
140
143
5
NaN
360
Female
193
61
1
193.0
346
Female
191
68
2
191.0
5
Male
189
104
3
189.0
294
Female
168
143
5
168.0
pdf['Height-na'].fillna(pdf['Height-na'].mean(), inplace=True)
pdf
Gender
Height
Weight
Index
Height-na
64
Male
175
135
5
167.8
225
Female
155
144
5
167.8
484
Female
188
115
4
188.0
293
Female
165
83
4
167.8
102
Male
161
155
5
161.0
282
Female
147
94
5
147.0
139
Male
159
124
5
159.0
66
Female
172
96
4
172.0
365
Male
141
80
5
141.0
397
Male
169
136
5
169.0
18
Male
144
145
5
144.0
172
Male
167
151
5
167.0
443
Male
152
146
5
152.0
358
Female
180
58
1
167.8
447
Female
176
121
4
176.0
251
Male
140
143
5
167.8
360
Female
193
61
1
193.0
346
Female
191
68
2
191.0
5
Male
189
104
3
189.0
294
Female
168
143
5
168.0
pdf['Height'].describe()
count 20.000000
mean 166.600000
std 16.740748
min 140.000000
25% 154.250000
50% 167.500000
75% 177.000000
max 193.000000
Name: Height, dtype: float64
pdf['Height-na'].describe()
count 20.000000
mean 167.800000
std 14.882699
min 141.000000
25% 160.500000
50% 167.800000
75% 173.000000
max 193.000000
Name: Height-na, dtype: float64
/home/aistudio/external-libraries/missingpy/utils.py:124: RuntimeWarning: invalid value encountered in sqrt
return distances if squared else np.sqrt(distances, out=distances)