- from pandas import Series,DataFrame
import pandas as pd
import numpy as np - df1.isnull()
- df1.notnull()
- df1.isnull().sum()
- df1.isnull().sum().sum()
- df1.info() 按照字段统计
- df1.dropna()
df2.iloc[2,:]=np.nan df2[3]=np.nan df2
- df2.dropna(how=‘all’)
10.df2.fillna({1:6,3:0},inplace=True) - df2.fillna(method=‘ffill’) 向前填充
- df2[0] = df2[0].fillna(df2[0].mean())
- df2.fillna?
- df1.replace([’’,2001],[‘不详’,2002])
- df1.replace({’’:‘不详’,2001:2002})
- df1.duplicated() 查看重复
- df1.drop_duplicates([‘sex’,‘year’],keep=‘last’) 删除重复项
data = {
'name':['张三','李四','王五','小明'],
'math':[79,52,63,92]
}
df2 = DataFrame(data)
df2
def f(x):
if x>90:
return '优秀'
elif 70<=x<90:
return '良好'
elif 60<=x<70:
return '合格'
else:
return '不合格'
df2['class'] = df2['math'].map(f)
df2
创建一个函数,然后用map方法逐个处理
- df3 = DataFrame(np.arange(10),columns=[‘X’])
df3[‘Y’]= 2*df3[‘X’] + 0.5
df3.iloc[9,1] = 185
df3
df3.plot(kind=‘scatter’,x=‘X’,y=‘Y’)
检测异常值 - lambda表达式,通常是在需要一个函数,但是又不想费神去命名一个函数的场合下使用,也就是指匿名函数。
- dummies = df2[‘朝向’].apply(lambda x:Series(x.split(’/’)).value_counts())
dummies
dummies = dummies.fillna(0).astype(int)
dummies - price = DataFrame({
‘fruit’:[‘apple’,‘banana’,‘orange’],
‘price’:[23,32,45]
})
amount = DataFrame({
‘fruit’:[‘apple’,‘banana’,‘apple’,‘apple’,‘banana’,‘pear’],
‘amount’:[5,3,6,3,5,7]
})
pd.merge(amount,price) 按列合并 交集
pd.merge(amount,price,on=‘fruit’)
pd.merge(amount,price,left_on=‘fruit’,right_on=‘fruit’)
pd.merge(amount,price,how=‘outer’) 并集
pd.merge(amount,price,how=‘left’) - left = DataFrame({
‘key1’:[‘one’,‘one’,‘two’],
‘key2’:[‘a’,‘b’,‘a’],
‘val1’:[‘2’,‘3’,‘4’]
})
right = DataFrame({
‘key1’:[‘one’,‘one’,‘two’,‘two’],
‘key2’:[‘a’,‘a’,‘a’,‘b’],
‘val2’:[‘5’,‘6’,‘7’,‘8’]
})
pd.merge(left,right,on=[‘key1’,‘key2’],how=‘outer’) 多键链接,传入list
pd.merge(left,right,on=‘key1’)
pd.merge(left,right,on=‘key1’,suffixes=(’_left’,’_right’)) - pd.merge(left2,right2,left_on=‘key’,right_index=True) #索引作为连接
- left3.join(right3,how=‘outer’) #jion方法快速链接