df1 = pd.DataFrame({'A':[1,1],'B':[4,4]})
df2 = pd.DataFrame({'B':[3,3],'C':[1,1],}, index=[1,2])
df1
Out[74]:
A B
014114
df2
Out[75]:
B C
131231
take_smaller =lambda x, y: x if x.sum()< y.sum()else y
"""对于 df 中不存在的列,结果为 NaN"""
df1.combine(df2, take_smaller, overwrite=False)# df1 中不存在 C,返回 NaN
Out[78]:
A B C
01.0 NaN NaN
11.03.0 NaN
2 NaN 3.0 NaN
df2.combine(df1, talke_smaller, overwrite=False)# df2 中不存在 A,返回 NaN
Out[79]:
A B C
0 NaN NaN NaN
1 NaN 3.01.02 NaN 3.01.0
"""
参数 overwrite:默认为 True
当 df 中存在、other 中不存在时,
可以通过该参数确定是否使用 other 的值填充 df
"""
df1.combine(df2, talke_smaller)# df2 中 A 为空,所以结果中 A 为 NaN
Out[80]:
A B C
0 NaN NaN NaN
1 NaN 3.0 NaN
2 NaN 3.0 NaN
df2.combine(df1, talke_smaller)# df1 中 C 为空,所以结果中 C 为 NaN
Out[81]:
A B C
0 NaN NaN NaN
1 NaN 3.0 NaN
2 NaN 3.0 NaN
"""
参数 fill_value:默认为 None
用于填充缺失的值,以及结果中 “除去” 设置了 overwrite=False 列的空值
"""
df1.combine(df2, talke_smaller, overwrite=False, fill_value=999)
Out[93]:
A B C
01.0999.0999.011.03.01.02 NaN 3.01.0
df2.combine(df1, talke_smaller, overwrite=False, fill_value=999)
Out[90]:
A B C
01.0999.0 NaN
11.03.01.02999.03.01.0#################################################################################
df1.combine(df2, talke_smaller, fill_value=999)
Out[94]:
A B C
01.0999.0999.011.03.01.02999.03.01.0
df2.combine(df1, talke_smaller, fill_value=999)
Out[96]:
A B C
01.0999.0999.011.03.01.02999.03.01.0