pandas27 merge(数据库风格合并left_index, right_index参数实例)( tcy)

# 实例2.1:加入索引left_index, right_index
# join()将两个不同索引列组合
    
left = pd.DataFrame({'A': [ 'A1', 'A2'],  'B': [ 'B1', 'B2']}, index=['K1', 'K2'])
right = pd.DataFrame({'C': [ 'C2', 'C3'], 'D': [ 'D2', 'D3']}, index=['K2', 'K3'])
    
result1=result = left.join(right)       #等效下面
result1=pd.merge(left, right, left_index=True, right_index=True, how='left')
result2=left.join(right, how='outer')   #等效下面
result2=pd.merge(left, right, left_index=True, right_index=True, how='outer')
result3=left.join(right, how='inner')   #等效下面
result3=pd.merge(left, right, left_index=True, right_index=True, how='inner')
    
# left           #result1:how=left
                     A    B   C     D
    A    B       K1  A1  B1   NaN NaN
K1  A1  B1       K2  A2  B2   C2   D2
K2  A2  B2     #result2:how='outer'
                       A    B    C    D
right            K1   A1   B1  NaN  NaN
                 K2   A2   B2   C2   D2
    C    D       K3  NaN  NaN   C3   D3
K2  C2  D2      #result3: how='inner'
K3  C3  D3             A    B   C     D
                 K2   A2   B2   C2   D2

    
============================================================
# 实例2.2:连接索引上的键列left_on='key', right_index=True
left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3'],'key': ['K0', 'K1', 'K0', 'K1']})
right = pd.DataFrame({'C': ['C0', 'C1'], 'D': ['D0', 'D1']}, index=['K0', 'K1'])
    
left.join(right, on='key')
pd.merge(left, right, left_on='key', right_index=True,how='left', sort=False)
    
left
    A   B  key
0  A0  B0  K0         A    B key   C   D
1  A1  B1  K1      0  A0  B0  K0  C0  D0
2  A2  B2  K0      1  A1  B1  K1  C1  D1
3  A3  B3  K1      2  A2  B2  K0  C0  D0
right              3  A3  B3  K1  C1  D1
    C    D
K0  C0  D0
K1  C1  D1
    
============================================================
# 实例2.3:要连接多个键,传递的DataFrame必须具有MultiIndex:

left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'], 'key2': ['K0', 'K1', 'K0', 'K1'],
                                  'A': ['A0', 'A1', 'A2', 'A3'],'B': ['B0', 'B1', 'B2', 'B3']})
index = pd.MultiIndex.from_tuples([('K0', 'K0'), ('K1', 'K0'), ('K2', 'K0'), ('K2', 'K1')])
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}, index=index)
    
result = left.join(right, on=['key1', 'key2'])#等价下面
result = pd.merge(left, right, left_on=['key1','key2'], right_index=True,how='left', sort=False)
    
# left                   right                     result
  key1 key2   A   B              C   D          A   B key1 key2    C   D
0   K0   K0  A0  B0      K0 K0  C0  D0      0  A0  B0   K0   K0   C0   D0
1   K0   K1  A1  B1      K1 K0  C1  D1      1  A1  B1   K0   K1  NaN   NaN
2   K1   K0  A2  B2      K2 K0  C2  D2      2  A2  B2   K1   K0   C1   D1
3   K2   K1  A3  B3         K1  C3  D3      3  A3  B3   K2   K1   C3   D3
    
result = left.join(right, on=['key1', 'key2'], how='inner')#等价下面
result = pd.merge(left, right, left_on=['key1','key2'],right_index=True,how='inner', sort=False)
    
# left                   right              result
  key1 key2   A   B              C   D         A   B key1 key2   C   D
0   K0   K0  A0  B0      K0 K0  C0  D0      0  A0  B0   K0   K0  C0  D0
1   K0   K1  A1  B1      K1 K0  C1  D1      2  A2  B2   K1   K0  C1  D1
2   K1   K0  A2  B2      K2 K0  C2  D2      3  A3  B3   K2   K1  C3  D3
3   K2   K1  A3  B3         K1  C3  D3
    
============================================================
# 实例2.4:将单个索引连接到多索引
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],'B': ['B0', 'B1', 'B2']},
                    index=pd.Index(['K0', 'K1', 'K2'], name='key'))
index = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'),('K2', 'Y2'), ('K2', 'Y3')],names=['key', 'Y'])
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']},index=index)
    
result = left.join(right, how='inner')                            #等价下面
result = pd.merge(left, right, left_index=True,right_index=True,
                             how='inner', sort=False)             #运行时间最短
result =pd.merge(left.reset_index(), right.reset_index(),on=['key'],
                            how='inner').set_index(['key','Y'])   #内存效率更高/更快
    
# left           right               result
      A   B               C   D              A   B   C   D
key              key Y               key Y
K0   A0  B0      K0  Y0  C0  D0      K0  Y0  A0  B0  C0  D0
K1   A1  B1      K1  Y1  C1  D1      K1  Y1  A1  B1  C1  D1
K2   A2  B2      K2  Y2  C2  D2      K2  Y2  A2  B2  C2  D2
                     Y3  C3  D3          Y3  A2  B2  C3  D3
    
============================================================
# 实例2.5:加入两个多索引
index = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'), ('K1', 'X2')],names=['key', 'X'])
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']},index=index)
    
result = pd.merge(left.reset_index(), right.reset_index(),
                            on=['key'], how='inner').set_index(['key','X','Y'])
result1= pd.merge(left, right, on=['key'], how='inner')#注意和上面索引的区别
    
# left              right               result                        result1
                                              
key X               key Y                          A    B   C   D          A   B   C   D        
        A    B               C   D
K0  X0  A0  B0      K0  Y0  C0  D0      key X  Y                      key
    X1  A1  B1      K1  Y1  C1  D1      K0  X0 Y0  A0  B0  C0  D0     K0   A0  B0  C0  D0
K1  X2  A2  B2      K2  Y2  C2  D2          X1 Y0  A1  B1  C0  D0     K0   A1  B1  C0  D0
                        Y3  C3  D3      K1  X2 Y1  A2  B2  C1  D1     K1   A2  B2  C1  D1

 

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值