left = pd.DataFrame({'key':['K0','K1','K2','K3'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key':['K0','K1','K2','K3'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})print(left)print(right)
A B key
0 A0 B0 K0
1 A1 B1 K1
2 A2 B2 K2
3 A3 B3 K3
C D key
0 C0 D0 K0
1 C1 D1 K1
2 C2 D2 K2
3 C3 D3 K3
res = pd.merge(left,right,on='key')#依据两个表都有的key将其进行合并
res
A
B
key
C
D
0
A0
B0
K0
C0
D0
1
A1
B1
K1
C1
D1
2
A2
B2
K2
C2
D2
3
A3
B3
K3
C3
D3
left = pd.DataFrame({'key1':['K0','K0','K1','K2'],'key2':['K0','K1','K0','K1'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K1','K1','K3'],'key2':['K0','K0','K0','K0'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})print(left)print(right)
#how = ['left','right','inner','outer']
res = pd.merge(left,right,on=['key1','key2'],how='outer')#how默认inner outer是双方若不一致就记为NAN尽量将所有的值都合并进来
res
A
B
key1
key2
C
D
0
A0
B0
K0
K0
C0
D0
1
A1
B1
K0
K1
NaN
NaN
2
A2
B2
K1
K0
C1
D1
3
A2
B2
K1
K0
C2
D2
4
A3
B3
K2
K1
NaN
NaN
5
NaN
NaN
K3
K0
C3
D3
#how = ['left','right','inner','outer']
res = pd.merge(left,right,on=['key1','key2'],how='inner')#how默认inner inner双方若不一致就删除不显示
res
A
B
key1
key2
C
D
0
A0
B0
K0
K0
C0
D0
1
A2
B2
K1
K0
C1
D1
2
A2
B2
K1
K0
C2
D2
#how = ['left','right','inner','outer'] left 只考虑左边的dataframe
res = pd.merge(left,right,on=['key1','key2'],how='left')#how默认inner 根据left左边的进行合并
res
A
B
key1
key2
C
D
0
A0
B0
K0
K0
C0
D0
1
A1
B1
K0
K1
NaN
NaN
2
A2
B2
K1
K0
C1
D1
3
A2
B2
K1
K0
C2
D2
4
A3
B3
K2
K1
NaN
NaN
#how = ['left','right','inner','outer'] indicator 描述数据来源,是左边,右边还是都有
res = pd.merge(left,right,on=['key1','key2'],how='outer',indicator=True)#显示merge信息
res
A
B
key1
key2
C
D
_merge
0
A0
B0
K0
K0
C0
D0
both
1
A1
B1
K0
K1
NaN
NaN
left_only
2
A2
B2
K1
K0
C1
D1
both
3
A2
B2
K1
K0
C2
D2
both
4
A3
B3
K2
K1
NaN
NaN
left_only
5
NaN
NaN
K3
K0
C3
D3
right_only
#how = ['left','right','inner','outer']
res = pd.merge(left,right,on=['key1','key2'],how='outer',indicator='indicator_column')#显示merge信息
res
A
B
key1
key2
C
D
indicator_column
0
A0
B0
K0
K0
C0
D0
both
1
A1
B1
K0
K1
NaN
NaN
left_only
2
A2
B2
K1
K0
C1
D1
both
3
A2
B2
K1
K0
C2
D2
both
4
A3
B3
K2
K1
NaN
NaN
left_only
5
NaN
NaN
K3
K0
C3
D3
right_only
left = pd.DataFrame({'A':['A0','A1','A2'],'B':['B0','B1','B2']},
index =['K0','K1','K2'])
right = pd.DataFrame({'C':['C0','C2','C3'],'D':['D0','D2','D3']},
index=['K0','K2','K3'])print(left)print(right)
A B
K0 A0 B0
K1 A1 B1
K2 A2 B2
C D
K0 C0 D0
K2 C2 D2
K3 C3 D3
res = pd.merge(left,right,left_index=True,right_index=True,how='outer')
res
A
B
C
D
K0
A0
B0
C0
D0
K1
A1
B1
NaN
NaN
K2
A2
B2
C2
D2
K3
NaN
NaN
C3
D3
boys = pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls = pd.DataFrame({'k':['K0','K0','K3'],'age':[4,5,6]})print(boys)print(girls)
age k
0 1 K0
1 2 K1
2 3 K2
age k
0 4 K0
1 5 K0
2 6 K3
res = pd.merge(boys,girls,on='k',suffixes=['_boy','_girl'],how='outer')
res