# 实例2.1:加入索引left_index, right_index
# join()将两个不同索引列组合
left = pd.DataFrame({'A': [ 'A1', 'A2'], 'B': [ 'B1', 'B2']}, index=['K1', 'K2'])
right = pd.DataFrame({'C': [ 'C2', 'C3'], 'D': [ 'D2', 'D3']}, index=['K2', 'K3'])
result1=result = left.join(right) #等效下面
result1=pd.merge(left, right, left_index=True, right_index=True, how='left')
result2=left.join(right, how='outer') #等效下面
result2=pd.merge(left, right, left_index=True, right_index=True, how='outer')
result3=left.join(right, how='inner') #等效下面
result3=pd.merge(left, right, left_index=True, right_index=True, how='inner')
# left #result1:how=left
A B C D
A B K1 A1 B1 NaN NaN
K1 A1 B1 K2 A2 B2 C2 D2
K2 A2 B2 #result2:how='outer'
A B C D
right K1 A1 B1 NaN NaN
K2 A2 B2 C2 D2
C D K3 NaN NaN C3 D3
K2 C2 D2 #result3: how='inner'
K3 C3 D3 A B C D
K2 A2 B2 C2 D2
============================================================
# 实例2.2:连接索引上的键列left_on='key', right_index=True
left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3'],'key': ['K0', 'K1', 'K0', 'K1']})
right = pd.DataFrame({'C': ['C0', 'C1'], 'D': ['D0', 'D1']}, index=['K0', 'K1'])
left.join(right, on='key')
pd.merge(left, right, left_on='key', right_index=True,how='left', sort=False)
left
A B key
0 A0 B0 K0 A B key C D
1 A1 B1 K1 0 A0 B0 K0 C0 D0
2 A2 B2 K0 1 A1 B1 K1 C1 D1
3 A3 B3 K1 2 A2 B2 K0 C0 D0
right 3 A3 B3 K1 C1 D1
C D
K0 C0 D0
K1 C1 D1
============================================================
# 实例2.3:要连接多个键,传递的DataFrame必须具有MultiIndex:
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'], 'key2': ['K0', 'K1', 'K0', 'K1'],
'A': ['A0', 'A1', 'A2', 'A3'],'B': ['B0', 'B1', 'B2', 'B3']})
index = pd.MultiIndex.from_tuples([('K0', 'K0'), ('K1', 'K0'), ('K2', 'K0'), ('K2', 'K1')])
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}, index=index)
result = left.join(right, on=['key1', 'key2'])#等价下面
result = pd.merge(left, right, left_on=['key1','key2'], right_index=True,how='left', sort=False)
# left right result
key1 key2 A B C D A B key1 key2 C D
0 K0 K0 A0 B0 K0 K0 C0 D0 0 A0 B0 K0 K0 C0 D0
1 K0 K1 A1 B1 K1 K0 C1 D1 1 A1 B1 K0 K1 NaN NaN
2 K1 K0 A2 B2 K2 K0 C2 D2 2 A2 B2 K1 K0 C1 D1
3 K2 K1 A3 B3 K1 C3 D3 3 A3 B3 K2 K1 C3 D3
result = left.join(right, on=['key1', 'key2'], how='inner')#等价下面
result = pd.merge(left, right, left_on=['key1','key2'],right_index=True,how='inner', sort=False)
# left right result
key1 key2 A B C D A B key1 key2 C D
0 K0 K0 A0 B0 K0 K0 C0 D0 0 A0 B0 K0 K0 C0 D0
1 K0 K1 A1 B1 K1 K0 C1 D1 2 A2 B2 K1 K0 C1 D1
2 K1 K0 A2 B2 K2 K0 C2 D2 3 A3 B3 K2 K1 C3 D3
3 K2 K1 A3 B3 K1 C3 D3
============================================================
# 实例2.4:将单个索引连接到多索引
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],'B': ['B0', 'B1', 'B2']},
index=pd.Index(['K0', 'K1', 'K2'], name='key'))
index = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'),('K2', 'Y2'), ('K2', 'Y3')],names=['key', 'Y'])
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']},index=index)
result = left.join(right, how='inner') #等价下面
result = pd.merge(left, right, left_index=True,right_index=True,
how='inner', sort=False) #运行时间最短
result =pd.merge(left.reset_index(), right.reset_index(),on=['key'],
how='inner').set_index(['key','Y']) #内存效率更高/更快
# left right result
A B C D A B C D
key key Y key Y
K0 A0 B0 K0 Y0 C0 D0 K0 Y0 A0 B0 C0 D0
K1 A1 B1 K1 Y1 C1 D1 K1 Y1 A1 B1 C1 D1
K2 A2 B2 K2 Y2 C2 D2 K2 Y2 A2 B2 C2 D2
Y3 C3 D3 Y3 A2 B2 C3 D3
============================================================
# 实例2.5:加入两个多索引
index = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'), ('K1', 'X2')],names=['key', 'X'])
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']},index=index)
result = pd.merge(left.reset_index(), right.reset_index(),
on=['key'], how='inner').set_index(['key','X','Y'])
result1= pd.merge(left, right, on=['key'], how='inner')#注意和上面索引的区别
# left right result result1
key X key Y A B C D A B C D
A B C D
K0 X0 A0 B0 K0 Y0 C0 D0 key X Y key
X1 A1 B1 K1 Y1 C1 D1 K0 X0 Y0 A0 B0 C0 D0 K0 A0 B0 C0 D0
K1 X2 A2 B2 K2 Y2 C2 D2 X1 Y0 A1 B1 C0 D0 K0 A1 B1 C0 D0
Y3 C3 D3 K1 X2 Y1 A2 B2 C1 D1 K1 A2 B2 C1 D1