>>>import pandas as pd
>>>import numpy as np
>>>position=pd.read_csv('test.csv',encoding='gbk')
>>>position
| studentId | name | course | score |
---|
0 | 1 | jack | chinese | 88.0 |
---|
1 | 1 | jack | math | 76.0 |
---|
2 | 1 | jack | english | 93.0 |
---|
3 | 2 | nacy | chinese | 68.0 |
---|
4 | 2 | nacy | math | NaN |
---|
5 | 2 | nacy | english | 82.0 |
---|
6 | 3 | alice | chinese | 87.0 |
---|
7 | 3 | alice | math | 89.0 |
---|
8 | 3 | alice | english | 74.0 |
---|
9 | 3 | alice | english | 74.0 |
---|
1.空值
position.loc[position.name=='jack','name']=np.NaN
position
| studentId | name | course | score |
---|
0 | 1 | NaN | chinese | 88.0 |
---|
1 | 1 | NaN | math | 76.0 |
---|
2 | 1 | NaN | english | 93.0 |
---|
3 | 2 | nacy | chinese | 68.0 |
---|
4 | 2 | nacy | math | NaN |
---|
5 | 2 | nacy | english | 82.0 |
---|
6 | 3 | alice | chinese | 87.0 |
---|
7 | 3 | alice | math | 89.0 |
---|
8 | 3 | alice | english | 74.0 |
---|
9 | 3 | alice | english | 74.0 |
---|
1)填充
>>>position.fillna('jack')
| studentId | name | course | score |
---|
0 | 1 | jack | chinese | 88 |
---|
1 | 1 | jack | math | 76 |
---|
2 | 1 | jack | english | 93 |
---|
3 | 2 | nacy | chinese | 68 |
---|
4 | 2 | nacy | math | jack |
---|
5 | 2 | nacy | english | 82 |
---|
6 | 3 | alice | chinese | 87 |
---|
7 | 3 | alice | math | 89 |
---|
8 | 3 | alice | english | 74 |
---|
9 | 3 | alice | english | 74 |
---|
>>>position.name=position.name.fillna('jack')
>>>position
| studentId | name | course | score |
---|
0 | 1 | jack | chinese | 88.0 |
---|
1 | 1 | jack | math | 76.0 |
---|
2 | 1 | jack | english | 93.0 |
---|
3 | 2 | nacy | chinese | 68.0 |
---|
4 | 2 | nacy | math | NaN |
---|
5 | 2 | nacy | english | 82.0 |
---|
6 | 3 | alice | chinese | 87.0 |
---|
7 | 3 | alice | math | 89.0 |
---|
8 | 3 | alice | english | 74.0 |
---|
9 | 3 | alice | english | 74.0 |
---|
2)删除
>>>position.dropna()
| studentId | name | course | score |
---|
0 | 1 | jack | chinese | 88.0 |
---|
1 | 1 | jack | math | 76.0 |
---|
2 | 1 | jack | english | 93.0 |
---|
3 | 2 | nacy | chinese | 68.0 |
---|
5 | 2 | nacy | english | 82.0 |
---|
6 | 3 | alice | chinese | 87.0 |
---|
7 | 3 | alice | math | 89.0 |
---|
8 | 3 | alice | english | 74.0 |
---|
9 | 3 | alice | english | 74.0 |
---|
position.dropna(axis=1)
| studentId | name | course |
---|
0 | 1 | jack | chinese |
---|
1 | 1 | jack | math |
---|
2 | 1 | jack | english |
---|
3 | 2 | nacy | chinese |
---|
4 | 2 | nacy | math |
---|
5 | 2 | nacy | english |
---|
6 | 3 | alice | chinese |
---|
7 | 3 | alice | math |
---|
8 | 3 | alice | english |
---|
9 | 3 | alice | english |
---|
2.重复值
>>>position.duplicated()
0 False
1 False
2 False
3 False
4 False
5 False
6 False
7 False
8 False
9 True
dtype: bool
>>>position[position.duplicated()]
| studentId | name | course | score |
---|
9 | 3 | alice | english | 74.0 |
---|
>>>position[~position.duplicated()]
| studentId | name | course | score |
---|
0 | 1 | NaN | chinese | 88.0 |
---|
1 | 1 | NaN | math | 76.0 |
---|
2 | 1 | NaN | english | 93.0 |
---|
3 | 2 | nacy | chinese | 68.0 |
---|
4 | 2 | nacy | math | NaN |
---|
5 | 2 | nacy | english | 82.0 |
---|
6 | 3 | alice | chinese | 87.0 |
---|
7 | 3 | alice | math | 89.0 |
---|
8 | 3 | alice | english | 74.0 |
---|
>>>position.drop_duplicates()
| studentId | name | course | score |
---|
0 | 1 | NaN | chinese | 88.0 |
---|
1 | 1 | NaN | math | 76.0 |
---|
2 | 1 | NaN | english | 93.0 |
---|
3 | 2 | nacy | chinese | 68.0 |
---|
4 | 2 | nacy | math | NaN |
---|
5 | 2 | nacy | english | 82.0 |
---|
6 | 3 | alice | chinese | 87.0 |
---|
7 | 3 | alice | math | 89.0 |
---|
8 | 3 | alice | english | 74.0 |
---|