关于Pandas的基本用法_06

import numpy as np
import pandas as pd
left = pd.DataFrame({
    'key': ['k0', 'k1', 'k2', 'k3'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']
})
right = pd.DataFrame({
    'key': ['k0', 'k1', 'k2', 'k3'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']
})
res = pd.merge(left, right, on='key')
# 基于key对照进行合并
left = pd.DataFrame({
    'key1': ['k0', 'k1', 'k2', 'k3'],
    'key2': ['k0', 'k0', 'k1', 'k2'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']
})
right = pd.DataFrame({
    'key1': ['k0', 'k1', 'k2', 'k3'],
    'key2': ['k0', 'k1', 'k1', 'k2'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']
})
res = pd.merge(left, right, on=['key1', 'key2'])
# 考虑两列进行合并 默认使用inner合并
print(left)
print(right)
print(res)
'''
  key1 key2   A   B
0   k0   k0  A0  B0
1   k1   k0  A1  B1
2   k2   k1  A2  B2
3   k3   k2  A3  B3
  key1 key2   C   D
0   k0   k0  C0  D0
1   k1   k1  C1  D1
2   k2   k1  C2  D2
3   k3   k2  C3  D3
  key1 key2   A   B   C   D
0   k0   k0  A0  B0  C0  D0
1   k2   k1  A2  B2  C2  D2
2   k3   k2  A3  B3  C3  D3

'''
res = pd.merge(left, right, on=['key1', 'key2'],how='outer')
print(res)
'''
  key1 key2    A    B    C    D
0   k0   k0   A0   B0   C0   D0
1   k1   k0   A1   B1  NaN  NaN
2   k2   k1   A2   B2   C2   D2
3   k3   k2   A3   B3   C3   D3
4   k1   k1  NaN  NaN   C1   D1

Process finished with exit code 0
'''
res = pd.merge(left, right, on=['key1', 'key2'],how='right')
print(res)
'''
   key1 key2   A   B
 0   k0   k0  A0  B0
 1   k1   k0  A1  B1
 2   k2   k1  A2  B2
 3   k3   k2  A3  B3
   key1 key2   C   D
 0   k0   k0  C0  D0
 1   k1   k1  C1  D1
 2   k2   k1  C2  D2
 3   k3   k2  C3  D3
  key1 key2    A    B   C   D
0   k0   k0   A0   B0  C0  D0
1   k1   k1  NaN  NaN  C1  D1
2   k2   k1   A2   B2  C2  D2
3   k3   k2   A3   B3  C3  D3
'''
# 使用how参数选择连接方式
res = pd.merge(left, right, how='outer', indicator=True)
print(res)
# 使用indicator参数 会在每一行后面显示展示了数据来源,默认indicator为False
'''
  key1 key2    A    B    C    D      _merge
0   k0   k0   A0   B0   C0   D0        both
1   k1   k0   A1   B1  NaN  NaN   left_only
2   k2   k1   A2   B2   C2   D2        both
3   k3   k2   A3   B3   C3   D3        both
4   k1   k1  NaN  NaN   C1   D1  right_only
'''
res = pd.merge(left, right, how="outer", indicator='indictaor_column')
# 自定义了indicator名字
'''
  key1 key2    A    B    C    D indictaor_column
0   k0   k0   A0   B0   C0   D0             both
1   k1   k0   A1   B1  NaN  NaN        left_only
2   k2   k1   A2   B2   C2   D2             both
3   k3   k2   A3   B3   C3   D3             both
4   k1   k1  NaN  NaN   C1   D1       right_only
'''
left = pd.DataFrame(
    {
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']
    },
    index = ['k0', 'k1', 'k2', 'k3'])
right = pd.DataFrame({
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']
    },
    index= ['k0', 'k1', 'k2', 'k3'])
print(left)
print(right)
res = pd.merge(left, right, left_index=True, right_index=True, how='outer')
print(res)
'''
     A   B
k0  A0  B0
k1  A1  B1
k2  A2  B2
k3  A3  B3
     C   D
k0  C0  D0
k1  C1  D1
k2  C2  D2
k3  C3  D3
     A   B   C   D
k0  A0  B0  C0  D0
k1  A1  B1  C1  D1
k2  A2  B2  C2  D2
k3  A3  B3  C3  D3'''
# 通过index进行合并
boys = pd.DataFrame({'key': ['k0', 'k1', 'k2', 'k3'],
'Age': [1, 2, 3, 4]})
girls = pd.DataFrame({'key': ['k0', 'k1', 'k2', 'k3'],
'Age': [5, 6, 7, 8]})
res = pd.merge(boys, girls,on='key', how='inner', suffixes=['_boy', '_girl'])
# 使用suffixes参数为两个矩阵中相同的列名增加别名
print(res)
'''
  key  Age_boy  Age_girl
0  k0        1         5
1  k1        2         6
2  k2        3         7
3  k3        4         8
'''

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值