Pandas中的merge语法_12

在这里插入图片描述

import numpy as np
import pandas as pd

"""
本节主要介绍pandas中的merge语法
使用的是知名的电影数据集,ratings.dat,users.dat,movies.dat
"""
df_rating = pd.read_csv(
    './rating.dat',
    sep='::',
    engine='python',
    names='UserID::MovieID::Rating::Timestamp'.split('::')
)

df_users = pd.read_csv(
    './users.dat',
    sep='::',
    engine='python',
    names='UserID::Gender::Age::Occupation::Zip-code'.split('::')
)

df_movies = pd.read_csv(
    './movies.dat',
    sep='::',
    engine='python',
    names='MovieID::Title::Genres'.split('::')
)

df_rating_users = pd.merge(df_rating,df_users,left_on='UserID',right_on='UserID',how='inner')
df_rating_users_movies = pd.merge(df_rating_users,df_movies,left_on='MovieID',right_on='MovidID',how='inner')

# 理解merge时数量的对齐关系
"""
one_to_one
one_to_many
many_to_many
"""
# 1.one_to_one
left = pd.DataFrame({
    'sno': [1,2,3,4],
    'name': ['name_a','name_b','name_c','name_d']
})

right = pd.DataFrame({
    'sno':[2,3,4,5],
    'age':['11','12','13','14']
})

one_to_one = pd.merge(left,right,on='sno')

# 2.one_to_many
left = pd.DataFrame({
    'sno': [1,2,3,4],
    'name': ['name_a','name_b','name_c','name_d']
})

right = pd.DataFrame({
    'sno':[2,2,3,3,4,5],
    'age':['11','12','15','12','13','14']
})

one_to_many = pd.merge(left,right,on='sno')

# 3.many_to_many
left = pd.DataFrame({
    'sno': [1,2,2,3,3,4],
    'name': ['name_a','name_b','name_b1','name_c','name_c1','name_d']
})

right = pd.DataFrame({
    'sno':[2,2,3,3,4,5],
    'age':['11','12','15','12','13','14']
})

many_to_many = pd.merge(left,right,on='sno')

# 理解left join、right join、inner join、outer join
left = pd.DataFrame({
    'key':['k0','k1','k2','k3','k4'],
    'A':['A0','A1','A2','A3','A4'],
    'B':['B0','B1','B2','B3','B4']
})

right = pd.DataFrame({
    'key': ['k0', 'k1', 'k2', 'k7', 'k8'],
    'C': ['C0', 'C1', 'C2', 'C3', 'C4'],
    'D': ['D0', 'D1', 'D2', 'D3', 'D4']
})

# inner join,左边和右边的key都有才会出现在结果里
inner_join = pd.merge(left,right,how='inner')

# 左边的都会出现在结果里,右边无法匹配则为null
left_join = pd.merge(left,right,how='left')

# 右边的都会出现在结果里,左边无法匹配则为null
right_join = pd.merge(left,right,how='right')

# 左边,右边的都会出现在结果里,如果无法匹配则为null
outer_join = pd.merge(left,right,how='outer')



# 理解如果出现非key的字段重名怎么办
left = pd.DataFrame({
    'key':['k0','k1','k2','k3','k4'],
    'A':['A0','A1','A2','A3','A4'],
    'B':['B0','B1','B2','B3','B4']
})

right = pd.DataFrame({
    'key': ['k0', 'k1', 'k2', 'k7', 'k8'],
    'A': ['C10', 'C11', 'C12', 'C13', 'C14'],
    'D': ['D0', 'D1', 'D2', 'D3', 'D4']
})

value_1 = pd.merge(left,right,on='key')
value_2 = pd.merge(left,right,on='key',suffixes=('_left','_right'))

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值