import pandas as pd
df1 = pd.DataFrame({'data1':range(7),'key':list('bbacaab')})
df2 = pd.DataFrame({'data2':range(3),'key':list('abd')})
df1
Out[36]:
data1 key
0 0 b
1 1 b
2 2 a
3 3 c
4 4 a
5 5 a
6 6 b
df2
Out[37]:
data2 key
0 0 a
1 1 b
2 2 d
笛卡尔乘积
a = pd.merge(df1,df2)
print(a)
#b = pd.merge(df1,df2,left_on='key1',right_on='key2')
#print(b)
data1 key data2
0 0 b 1
1 1 b 1
2 6 b 1
3 2 a 0
4 4 a 0
5 5 a 0
都显示,没有就nan
c = pd.merge(df1,df2,how='outer')
print(c)
data1 key data2
0 0.0 b 1.0
1 1.0 b 1.0
2 6.0 b 1.0
3 2.0 a 0.0
4 4.0 a 0.0
5 5.0 a 0.0
6 3.0 c NaN
7 NaN d 2.0
df5
Out[40]:
data1 key
0 0 b
1 1 b
2 2 a
3 3 c
4 4 a
5 5 b
df6
Out[41]:
data2 key
0 0 a
1 1 b
2 2 a
3 3 b
4 4 d
以左为基准对齐
d = pd.merge(df5,df6,on='key',how='left')
print(d)
data1 key data2
0 0 b 1.0
1 0 b 3.0
2 1 b 1.0
3 1 b 3.0
4 2 a 0.0
5 2 a 2.0
6 3 c NaN
7 4 a 0.0
8 4 a 2.0
9 5 b 1.0
10 5 b 3.0
e = pd.merge(df5,df6)
print(e)
data1 key data2
0 0 b 1
1 0 b 3
2 1 b 1
3 1 b 3
4 5 b 1
5 5 b 3
6 2 a 0
7 2 a 2
8 4 a 0
9 4 a 2