import pandas as pd
'''
pd.merge(
left,right,how='inner',on=None,left_on=None,
left_index=False,right_index=False,sort=True,
suffixes=('_x','_y'),copy=True,indicator=False,
validate=None
)
left,right: 左右连接方式,left 表示左数据集,right,表示右数据集
how:是否为内外连接
on: 连接条件
left_on:左连接条件
right_on:右连接条件
suffixes: 左右数据出现重复列,新数据后缀会以此后缀进行区分。
'''
"\npd.merge(\n left,right,how='inner',on=None,left_on=None,\n left_index=False,right_index=False,sort=True,\n suffixes=('_x','_y'),copy=True,indicator=False,\n validate=None\n)\nleft,right:\nhow:\non:\nleft_on:\nright_on:\n"
df1 = pd.DataFrame({'x': [1, 2], 'y': [1, 2], 'key': [1, 2]})
df2 = pd.DataFrame({'a': [2, 3, 4], 'b': [3, 2, 4], 'key': [1, 2, 3]})
print(pd.merge(df1, df2,on = 'key'))
x y key a b
0 1 1 1 2 3
1 2 2 2 3 2
df1 = pd.DataFrame({'x': [1, 2], 'y': [1, 2], 'key': [1, 2]})
df2 = pd.DataFrame({'a': [2, 3,4], 'b': [3, 2,4], 'key': [1, 2, 3]})
print(pd.merge(df1, df2,how='outer',on = 'key'))
x y key a b
0 1.0 1.0 1 2 3
1 2.0 2.0 2 3 2
2 NaN NaN 3 4 4
df3 = pd.DataFrame({'x': [1, 2], 'y': [1, 2], 'key1': [1, 2],'key2':[1,2]})
df4 = pd.DataFrame({'a': [2, 3,4], 'b': [3, 2,4], 'key1': [1, 2, 3],'key2':[1,2,3]})
print(pd.merge(df3, df4,how='inner',on=['key1','key2']))
x y key1 key2 a b
0 1 1 1 1 2 3
1 2 2 2 2 3 2
df5 = pd.DataFrame({'x': [1, 2], 'y': [1, 2]})
df6 = pd.DataFrame({'x': [1, 2, 4], 'b': [3, 2, 4]})
print(pd.merge(df5, df6,how='outer',on = 'x',indicator=True))
x y b _merge
0 1 1.0 3 both
1 2 2.0 2 both
2 4 NaN 4 right_only