import pandas as pd
# DateFrame中,index为行索引,columns为列索引
pd.set_option('display.unicode.east_asian_width', True)
s1 = 'H:\pythonProject\COD1.csv'
s2 = pd.read_csv(s1, index_col=0) # 指定第一列为行索引
print(s2) # 输出原始数据
##插入数据
s2.loc['s14','b7'] = 34
print('*****---'*10)
print(s2) # 输出原始数据
#通过info函数查看缺失值
print('*****---'*10)
print(s2.info()) #输出结果为每列非空的个数
##判断是都存在缺失值
print(s2.isnull())
print('*****---'*10)
print(s2.notnull())
#缺失值删除处理
s3 = s2
s3 = s3.dropna()#降缺失值删除
print('*****---'*10)
print(s3) # 输出原始数据
# #填充缺失值,使用函数fillna
s2['b7'] = s2['b7'].fillna(2)
print('*****---'*10)
print(s2) # 输出原始数据
#增加一行同样的数据
s2.loc['s15']=[12.581818, 1.046677, 10.067138, 10.071816, 10.052377, 10.082932, 12.0]
print('*****---'*10)
print(s2) # 输出原始数据
##重复值处理
n1 = s2.duplicated()#判断每一行数据是否重复
print('*****---'*10)
print(n1)
##去除指定列的重复数据
n2 = s2.drop_duplicates(['COD'])
print('*****---'*10)
print(n2)
结果为:
H:\pythonProject\venv\Scripts\python.exe H:/pythonProject/main.py
COD b1 b2 b3 b4 b5
s1 6.246465 0.033064 0.044745 0.063753 0.046467 0.061651
s2 7.300000 0.032765 0.040027 0.060715 0.047964 0.062193
s3 7.151515 0.034787 0.044034 0.068569 0.047349 0.062583
s4 5.858586 0.038918 0.054270 0.070237 0.049240 0.063075
s5 7.458586 0.037524 0.047527 0.065471 0.046837 0.060580
s6 7.458586 0.044111 0.055397 0.075133 0.052282 0.067838
s7 7.022222 0.043152 0.056629 0.072561 0.052936 0.070106
s8 7.846465 0.044698 0.061596 0.073882 0.053898 0.073508
s9 10.561616 0.042522 0.060696 0.069076 0.051668 0.080740
s10 2.828283 0.048858 0.057816 0.077516 0.056419 0.081748
s11 8.492929 0.041209 0.058360 0.070019 0.053007 0.095129
s12 12.581818 0.046677 0.067138 0.071816 0.052377 0.082932
s11 8.492929 0.041209 0.058360 0.070019 0.053007 0.095129
*****---*****---*****---*****---*****---*****---*****---*****---*****---*****---
COD b1 b2 b3 b4 b5 b7
s1 6.246465 0.033064 0.044745 0.063753 0.046467 0.061651 NaN
s2 7.300000 0.032765 0.040027 0.060715 0.047964 0.062193 NaN
s3 7.151515 0.034787 0.044034 0.068569 0.047349 0.062583 NaN
s4 5.858586 0.038918 0.054270 0.070237 0.049240 0.063075 NaN
s5 7.458586 0.037524 0.047527 0.065471 0.046837 0.060580 NaN
s6 7.458586 0.044111 0.055397 0.075133 0.052282 0.067838 NaN
s7 7.022222 0.043152 0.056629 0.072561 0.052936 0.070106 NaN
s8 7.846465 0.044698 0.061596 0.073882 0.053898 0.073508 NaN
s9 10.561616 0.042522 0.060696 0.069076 0.051668 0.080740 NaN
s10 2.828283 0.048858 0.057816 0.077516 0.056419 0.081748 NaN
s11 8.492929 0.041209 0.058360 0.070019 0.053007 0.095129 NaN
s12 12.581818 0.046677 0.067138 0.071816 0.052377 0.082932 NaN
s11 8.492929 0.041209 0.058360 0.070019 0.053007 0.095129 NaN
s14 NaN NaN NaN NaN NaN NaN 34.0
*****---*****---*****---*****---*****---*****---*****---*****---*****---*****---
<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, s1 to s14
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 COD 13 non-null float64
1 b1 13 non-null float64
2 b2 13 non-null float64
3 b3 13 non-null float64
4 b4 13 non-null float64
5 b5 13 non-null float64
6 b7 1 non-null float64
dtypes: float64(7)
memory usage: 1.4+ KB
None
COD b1 b2 b3 b4 b5 b7
s1 False False False False False False True
s2 False False False False False False True
s3 False False False False False False True
s4 False False False False False False True
s5 False False False False False False True
s6 False False False False False False True
s7 False False False False False False True
s8 False False False False False False True
s9 False False False False False False True
s10 False False False False False False True
s11 False False False False False False True
s12 False False False False False False True
s11 False False False False False False True
s14 True True True True True True False
*****---*****---*****---*****---*****---*****---*****---*****---*****---*****---
COD b1 b2 b3 b4 b5 b7
s1 True True True True True True False
s2 True True True True True True False
s3 True True True True True True False
s4 True True True True True True False
s5 True True True True True True False
s6 True True True True True True False
s7 True True True True True True False
s8 True True True True True True False
s9 True True True True True True False
s10 True True True True True True False
s11 True True True True True True False
s12 True True True True True True False
s11 True True True True True True False
s14 False False False False False False True
*****---*****---*****---*****---*****---*****---*****---*****---*****---*****---
Empty DataFrame
Columns: [COD, b1, b2, b3, b4, b5, b7]
Index: []
*****---*****---*****---*****---*****---*****---*****---*****---*****---*****---
COD b1 b2 b3 b4 b5 b7
s1 6.246465 0.033064 0.044745 0.063753 0.046467 0.061651 2.0
s2 7.300000 0.032765 0.040027 0.060715 0.047964 0.062193 2.0
s3 7.151515 0.034787 0.044034 0.068569 0.047349 0.062583 2.0
s4 5.858586 0.038918 0.054270 0.070237 0.049240 0.063075 2.0
s5 7.458586 0.037524 0.047527 0.065471 0.046837 0.060580 2.0
s6 7.458586 0.044111 0.055397 0.075133 0.052282 0.067838 2.0
s7 7.022222 0.043152 0.056629 0.072561 0.052936 0.070106 2.0
s8 7.846465 0.044698 0.061596 0.073882 0.053898 0.073508 2.0
s9 10.561616 0.042522 0.060696 0.069076 0.051668 0.080740 2.0
s10 2.828283 0.048858 0.057816 0.077516 0.056419 0.081748 2.0
s11 8.492929 0.041209 0.058360 0.070019 0.053007 0.095129 2.0
s12 12.581818 0.046677 0.067138 0.071816 0.052377 0.082932 2.0
s11 8.492929 0.041209 0.058360 0.070019 0.053007 0.095129 2.0
s14 NaN NaN NaN NaN NaN NaN 34.0
*****---*****---*****---*****---*****---*****---*****---*****---*****---*****---
COD b1 b2 b3 b4 b5 b7
s1 6.246465 0.033064 0.044745 0.063753 0.046467 0.061651 2.0
s2 7.300000 0.032765 0.040027 0.060715 0.047964 0.062193 2.0
s3 7.151515 0.034787 0.044034 0.068569 0.047349 0.062583 2.0
s4 5.858586 0.038918 0.054270 0.070237 0.049240 0.063075 2.0
s5 7.458586 0.037524 0.047527 0.065471 0.046837 0.060580 2.0
s6 7.458586 0.044111 0.055397 0.075133 0.052282 0.067838 2.0
s7 7.022222 0.043152 0.056629 0.072561 0.052936 0.070106 2.0
s8 7.846465 0.044698 0.061596 0.073882 0.053898 0.073508 2.0
s9 10.561616 0.042522 0.060696 0.069076 0.051668 0.080740 2.0
s10 2.828283 0.048858 0.057816 0.077516 0.056419 0.081748 2.0
s11 8.492929 0.041209 0.058360 0.070019 0.053007 0.095129 2.0
s12 12.581818 0.046677 0.067138 0.071816 0.052377 0.082932 2.0
s11 8.492929 0.041209 0.058360 0.070019 0.053007 0.095129 2.0
s14 NaN NaN NaN NaN NaN NaN 34.0
s15 12.581818 1.046677 10.067138 10.071816 10.052377 10.082932 12.0
*****---*****---*****---*****---*****---*****---*****---*****---*****---*****---
s1 False
s2 False
s3 False
s4 False
s5 False
s6 False
s7 False
s8 False
s9 False
s10 False
s11 False
s12 False
s11 True
s14 False
s15 False
dtype: bool
*****---*****---*****---*****---*****---*****---*****---*****---*****---*****---
COD b1 b2 b3 b4 b5 b7
s1 6.246465 0.033064 0.044745 0.063753 0.046467 0.061651 2.0
s2 7.300000 0.032765 0.040027 0.060715 0.047964 0.062193 2.0
s3 7.151515 0.034787 0.044034 0.068569 0.047349 0.062583 2.0
s4 5.858586 0.038918 0.054270 0.070237 0.049240 0.063075 2.0
s5 7.458586 0.037524 0.047527 0.065471 0.046837 0.060580 2.0
s7 7.022222 0.043152 0.056629 0.072561 0.052936 0.070106 2.0
s8 7.846465 0.044698 0.061596 0.073882 0.053898 0.073508 2.0
s9 10.561616 0.042522 0.060696 0.069076 0.051668 0.080740 2.0
s10 2.828283 0.048858 0.057816 0.077516 0.056419 0.081748 2.0
s11 8.492929 0.041209 0.058360 0.070019 0.053007 0.095129 2.0
s12 12.581818 0.046677 0.067138 0.071816 0.052377 0.082932 2.0
s14 NaN NaN NaN NaN NaN NaN 34.0
s15 12.581818 1.046677 10.067138 10.071816 10.052377 10.082932 12.0
进程已结束,退出代码为 0