# -*- coding: utf-8 -*-
import pandas as pd
from pandas import Series,DataFrame
import numpy as np
'''
轴向连接
'''
arr=np.arange(12).reshape(3,4)
print(arr)
print(np.concatenate([arr,arr],axis=1))
# [[ 0 1 2 3 0 1 2 3]
# [ 4 5 6 7 4 5 6 7]
# [ 8 9 10 11 8 9 10 11]]
print(np.concatenate([arr,arr]))
# [[ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]
# [ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]]
#Series 轴向连接
s1=Series([1,2],index=['a','b'])
s2=Series([2,3],index=['c','d'])
s3=Series([4,5],index=['e','f'])
print(pd.concat([s1,s2,s3]))
# a 1
# b 2
# c 2
# d 3
# e 4
# f 5
# dtype: int64
print(pd.concat([s1,s2,s3],axis=1))
# 0 1 2
# a 1.0 NaN NaN
# b 2.0 NaN NaN
# c NaN 2.0 NaN
# d NaN 3.0 NaN
# e NaN NaN 4.0
# f NaN NaN 5.0
s4=pd.concat([s1*5,s3])
print(s4)
# a 5
# b 10
# e 4
# f 5
# dtype: int64
#两表的并集
print(pd.concat([s1,s4],axis=1))
# 0 1
# a 1.0 5
# b 2.0 10
# e NaN 4
# f NaN 5
#取交集
print(pd.concat([s1,s4],axis=1,join='inner'))
# 0 1
# a 1 5
# b 2 10
#层次化索引
result=pd.concat([s1,s1,s3],keys=['one','two','three'])
print(result)
# one a 1
# b 2
# two a 1
# b 2
# three e 4
# f 5
# dtype: int64
#转置
print(result.unstack())
# a b e f
# one 1.0 2.0 NaN NaN
# two 1.0 2.0 NaN NaN
# three NaN NaN 4.0 5.0
'''
合并以列开头
'''
print(pd.concat([s1,s2,s3],axis=1,keys=['one','two','three']))
# one two three
# a 1.0 NaN NaN
# b 2.0 NaN NaN
# c NaN 2.0 NaN
# d NaN 3.0 NaN
# e NaN NaN 4.0
# f NaN NaN 5.0
'''
DataFrame合并
'''
df1=DataFrame(np.arange(6).reshape(3,2),index=['a','b','c'],columns=['one','two'])
print(df1)
# one two
# a 0 1
# b 2 3
# c 4 5
df2=DataFrame(5+np.arange(4).reshape(2,2),index=['a','c'],columns=['three','fore'])
#不合并
print(pd.concat([df1,df2]))
# fore one three two
# a NaN 0.0 NaN 1.0
# b NaN 2.0 NaN 3.0
# c NaN 4.0 NaN 5.0
# a 6.0 NaN 5.0 NaN
# c 8.0 NaN 7.0 NaN
#合并
print(pd.concat([df1,df2],axis=1))
# one two three fore
# a 0 1 5.0 6.0
# b 2 3 NaN NaN
# c 4 5 7.0 8.0
#加入层次
print(pd.concat([df1,df2],axis=1,keys=['level1','level2']))
# level1 level2
# one two three fore
# a 0 1 5.0 6.0
# b 2 3 NaN NaN
# c 4 5 7.0 8.0
#给层次化创建参数
print(pd.concat([df1,df2],axis=1,keys=['level1','level2'],names=['upper','lower']))
# upper level1 level2
# lower one two three fore
# a 0 1 5.0 6.0
# b 2 3 NaN NaN
# c 4 5 7.0 8.0
#合并索引
df1=DataFrame(np.random.randn(3,4),columns=list('abcd'))
df2=DataFrame(np.random.randn(2,3),columns=list('bda'))
print(pd.concat([df1,df2]))
# a b c d
# 0 -0.333004 -0.897424 -1.217519 0.490918
# 1 1.021084 -0.425976 -0.486051 0.431409
# 2 0.380350 -1.534331 0.577973 -0.023094
# 0 0.529469 -1.242712 NaN 0.246182
# 1 0.359058 -2.106655 NaN -0.730740
print(pd.concat([df1,df2],ignore_index=True))
# a b c d
# 0 -0.333004 -0.897424 -1.217519 0.490918
# 1 1.021084 -0.425976 -0.486051 0.431409
# 2 0.380350 -1.534331 0.577973 -0.023094
# 3 0.529469 -1.242712 NaN 0.246182
# 4 0.359058 -2.106655 NaN -0.730740
python数据分析十三:pandas矩阵的轴向连接(concat详解)
最新推荐文章于 2023-02-13 07:00:00 发布