合并数据

1、concat合并数据
import numpy as np
import pandas as pd

df1 = pd.DataFrame(np.ones((3,4))*1, columns=['A','B','C','D'])
df2 = pd.DataFrame(np.ones((3,4))*2, columns=['A','B','C','D'])
df3 = pd.DataFrame(np.ones((3,4))*3, columns=['A','B','C','D'])
print(df1)
print(df2)
print(df3)
     A    B    C    D
0  1.0  1.0  1.0  1.0
1  1.0  1.0  1.0  1.0
2  1.0  1.0  1.0  1.0
     A    B    C    D
0  2.0  2.0  2.0  2.0
1  2.0  2.0  2.0  2.0
2  2.0  2.0  2.0  2.0
     A    B    C    D
0  3.0  3.0  3.0  3.0
1  3.0  3.0  3.0  3.0
2  3.0  3.0  3.0  3.0
pd.concat([df1,df2,df3], axis=0, ignore_index=True)  #注意添加[],axis=0表示竖向,忽略原索引,会产生新索引
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
ABCD
01.01.01.01.0
11.01.01.01.0
21.01.01.01.0
32.02.02.02.0
42.02.02.02.0
52.02.02.02.0
63.03.03.03.0
73.03.03.03.0
83.03.03.03.0
df4 = pd.DataFrame(np.ones((3,4))*1, columns=['A','B','C','D'])
df5 = pd.DataFrame(np.ones((3,4))*2, columns=['B','C','D','E'])
print(df4)
print(df5)
     A    B    C    D
0  1.0  1.0  1.0  1.0
1  1.0  1.0  1.0  1.0
2  1.0  1.0  1.0  1.0
     B    C    D    E
0  2.0  2.0  2.0  2.0
1  2.0  2.0  2.0  2.0
2  2.0  2.0  2.0  2.0
pd.concat([df4,df5], join='outer', ignore_index=True)  #join的inner参数取交集,outer取并集
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
ABCDE
01.01.01.01.0NaN
11.01.01.01.0NaN
21.01.01.01.0NaN
3NaN2.02.02.02.0
4NaN2.02.02.02.0
5NaN2.02.02.02.0
df6 = pd.DataFrame(np.ones((3,4))*1, columns=['A','B','C','D'], index=[1,2,3])
df7 = pd.DataFrame(np.ones((3,4))*2, columns=['B','C','D','E'], index=[2,3,4])
pd.concat([df6,df7], axis=1, join_axes=[df6.index])  #若不加join_axes,行索引会有1,2,3,4
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
ABCDBCDE
11.01.01.01.0NaNNaNNaNNaN
21.01.01.01.02.02.02.02.0
31.01.01.01.02.02.02.02.0

2、append合并数据

df8 = pd.DataFrame(np.ones((3,4))*1, columns=['A','B','C','D'])
s1 = pd.Series([1,2,3,4], index=['A','B','C','D'])
df8.append(s1, ignore_index=True)   #相当于在一个表里添加一条记录
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
ABCD
01.01.01.01.0
11.01.01.01.0
21.01.01.01.0
31.02.03.04.0
df9 = pd.DataFrame(np.ones((3,4))*1, columns=['A','B','C','D'])
df10 = pd.DataFrame(np.ones((3,4))*2, columns=['A','B','C','D'])
df9.append(df10, ignore_index=True)  #合并两个表结构相同的表
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
ABCD
01.01.01.01.0
11.01.01.01.0
21.01.01.01.0
32.02.02.02.0
42.02.02.02.0
52.02.02.02.0

3、merge合并数据(更复杂)

left = pd.DataFrame({'key1':['K0','K0','K1','K2'],
                    'key2':['K0','K1','K0','K1'],
                    'A':['A0','A1','A2','A3'],
                    'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K1','K1','K2'],
                    'key2':['K0','K0','K0','K0'],
                    'C':['C0','C1','C2','C3'],
                    'D':['D0','D1','D2','D3']})
print(left)
print(right)
    A   B key1 key2
0  A0  B0   K0   K0
1  A1  B1   K0   K1
2  A2  B2   K1   K0
3  A3  B3   K2   K1
    C   D key1 key2
0  C0  D0   K0   K0
1  C1  D1   K1   K0
2  C2  D2   K1   K0
3  C3  D3   K2   K0
pd.merge(left, right, on=['key1','key2'], how='inner')  #取key1, key2组合的交集,outer取并集,left基于left数据的的key来合并,right同理
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
ABkey1key2CD
0A0B0K0K0C0D0
1A2B2K1K0C1D1
2A2B2K1K0C2D2
#有时DataFrame中的连接键位于其索引中,可以传入left_index=True或right_index=True, 表明索引应该被用作连接键。
left1 = pd.DataFrame({'key':['a','a','b','c'],
         'v1':range(4)})
right1 = pd.DataFrame({'v2':[9,8]}, index=['a','b'])
print(left1)
print(right1)
  key  v1
0   a   0
1   a   1
2   b   2
3   c   3
   v2
a   9
b   8
pd.merge(left1, right1, left_on='key', right_index=True) #表示合并两个数据,连接键以left1的key为主;但right1的索引也应该被作为连接键。
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
keyv1v2
0a09
1a19
2b28
#连接键重名的处理方式 
boys = pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls = pd.DataFrame({'k':['K0','K0','K3'],'age':[4,5,6]})
pd.merge(boys, girls, on='k', suffixes=['_boys','_girls'], how='outer')
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
age_boyskage_girls
01.0K04.0
11.0K05.0
22.0K1NaN
33.0K2NaN
4NaNK36.0
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值