import numpy as np
import pandas as pd
import matplotlib as mpl
df1 = pd. DataFrame( np. random. randn( 4 , 3 ) , index= list ( "abcd" ) , columns= list ( "ABC" ) )
df1
A B C a 0.283456 0.605428 0.145223 b 1.782473 -1.043145 0.844106 c 0.236397 2.063387 -0.573410 d 0.204641 0.561990 1.709260
df2 = pd. DataFrame( np. random. randn( 3 , 3 ) , index= list ( "abd" ) , columns= list ( "CDE" ) )
df2
C D E a 0.044766 -1.580057 -0.512200 b -1.187690 0.870686 -1.442228 d 0.390094 0.222911 -0.807610
pd. concat(
objs: Union[ Iterable[ ~ FrameOrSeries] , Mapping[ collections. abc. Hashable, ~ FrameOrSeries] ] ,
axis= 0 ,
join= 'outer' ,
ignore_index: bool = False ,
keys= None ,
levels= None ,
names= None ,
verify_integrity: bool = False ,
sort: bool = False ,
copy: bool = True ,
)
pd. concat( [ df1, df2] , axis= 0 )
A B C D E a 0.283456 0.605428 0.145223 NaN NaN b 1.782473 -1.043145 0.844106 NaN NaN c 0.236397 2.063387 -0.573410 NaN NaN d 0.204641 0.561990 1.709260 NaN NaN a NaN NaN 0.044766 -1.580057 -0.512200 b NaN NaN -1.187690 0.870686 -1.442228 d NaN NaN 0.390094 0.222911 -0.807610
pd. concat( [ df1, df2] , axis= 0 , keys= [ "X" , "Y" ] )
A B C D E X a 0.283456 0.605428 0.145223 NaN NaN b 1.782473 -1.043145 0.844106 NaN NaN c 0.236397 2.063387 -0.573410 NaN NaN d 0.204641 0.561990 1.709260 NaN NaN Y a NaN NaN 0.044766 -1.580057 -0.512200 b NaN NaN -1.187690 0.870686 -1.442228 d NaN NaN 0.390094 0.222911 -0.807610
pd. concat( [ df1, df2] , axis= 0 , ignore_index= True )
A B C D E 0 0.283456 0.605428 0.145223 NaN NaN 1 1.782473 -1.043145 0.844106 NaN NaN 2 0.236397 2.063387 -0.573410 NaN NaN 3 0.204641 0.561990 1.709260 NaN NaN 4 NaN NaN 0.044766 -1.580057 -0.512200 5 NaN NaN -1.187690 0.870686 -1.442228 6 NaN NaN 0.390094 0.222911 -0.807610
pd. concat( [ df1, df2] , axis= 0 , join= "inner" )
C a 0.145223 b 0.844106 c -0.573410 d 1.709260 a 0.044766 b -1.187690 d 0.390094
pd. concat( [ df1, df2] , axis= 1 )
A B C C D E a 0.283456 0.605428 0.145223 0.044766 -1.580057 -0.512200 b 1.782473 -1.043145 0.844106 -1.187690 0.870686 -1.442228 c 0.236397 2.063387 -0.573410 NaN NaN NaN d 0.204641 0.561990 1.709260 0.390094 0.222911 -0.807610
pd. concat( [ df1, df2] , axis= 1 , join= "inner" )
A B C C D E a 0.283456 0.605428 0.145223 0.044766 -1.580057 -0.512200 b 1.782473 -1.043145 0.844106 -1.187690 0.870686 -1.442228 d 0.204641 0.561990 1.709260 0.390094 0.222911 -0.807610
pd. concat( [ df1, df2. reindex( df1. index) ] , axis= 1 )
A B C C D E a 0.283456 0.605428 0.145223 0.044766 -1.580057 -0.512200 b 1.782473 -1.043145 0.844106 -1.187690 0.870686 -1.442228 c 0.236397 2.063387 -0.573410 NaN NaN NaN d 0.204641 0.561990 1.709260 0.390094 0.222911 -0.807610
df1. append( df2)
A B C D E a 0.283456 0.605428 0.145223 NaN NaN b 1.782473 -1.043145 0.844106 NaN NaN c 0.236397 2.063387 -0.573410 NaN NaN d 0.204641 0.561990 1.709260 NaN NaN a NaN NaN 0.044766 -1.580057 -0.512200 b NaN NaN -1.187690 0.870686 -1.442228 d NaN NaN 0.390094 0.222911 -0.807610
pd. merge(
left,
right,
how= "inner" ,
on= None ,
left_on= None ,
right_on= None ,
left_index= False ,
right_index= False ,
sort= True ,
suffixes= ( "_x" , "_y" ) ,
copy= True ,
indicator= False ,
validate= None ,
)
left = pd. DataFrame( { "key1" : [ "K0" , "K0" , "K1" , "K2" ] , "key2" : [ "K0" , "K1" , "K0" , "K1" ] , "A" : [ "A0" , "A1" , "A2" , "A3" ] , "B" : [ "B0" , "B1" , "B2" , "B3" ] } )
left
key1 key2 A B 0 K0 K0 A0 B0 1 K0 K1 A1 B1 2 K1 K0 A2 B2 3 K2 K1 A3 B3
right = pd. DataFrame( { "key1" : [ "K0" , "K1" , "K1" , "K2" ] , "key2" : [ "K0" , "K0" , "K0" , "K0" ] , "C" : [ "C0" , "C1" , "C2" , "C3" ] , "D" : [ "D0" , "D1" , "D2" , "D3" ] } )
right
key1 key2 C D 0 K0 K0 C0 D0 1 K1 K0 C1 D1 2 K1 K0 C2 D2 3 K2 K0 C3 D3
pd. merge( left, right, on= [ "key1" , "key2" ] )
key1 key2 A B C D 0 K0 K0 A0 B0 C0 D0 1 K1 K0 A2 B2 C1 D1 2 K1 K0 A2 B2 C2 D2
pd. merge( left, right, on= [ "key1" , "key2" ] , how= "outer" )
key1 key2 A B C D 0 K0 K0 A0 B0 C0 D0 1 K0 K1 A1 B1 NaN NaN 2 K1 K0 A2 B2 C1 D1 3 K1 K0 A2 B2 C2 D2 4 K2 K1 A3 B3 NaN NaN 5 K2 K0 NaN NaN C3 D3
pd. merge( left, right, on= [ "key1" , "key2" ] , how= "left" )
key1 key2 A B C D 0 K0 K0 A0 B0 C0 D0 1 K0 K1 A1 B1 NaN NaN 2 K1 K0 A2 B2 C1 D1 3 K1 K0 A2 B2 C2 D2 4 K2 K1 A3 B3 NaN NaN
pd. merge( left, right, on= [ "key1" , "key2" ] , how= "right" )
key1 key2 A B C D 0 K0 K0 A0 B0 C0 D0 1 K1 K0 A2 B2 C1 D1 2 K1 K0 A2 B2 C2 D2 3 K2 K0 NaN NaN C3 D3
left = pd. DataFrame( { "A" : [ 1 , 2 ] , "B" : [ 2 , 2 ] } )
right = pd. DataFrame( { "A" : [ 4 , 5 , 6 ] , "B" : [ 2 , 2 , 2 ] } )
result = pd. merge( left, right, on= "B" , how= "outer" )
print ( left)
print ( right)
print ( result)
A B
0 1 2
1 2 2
A B
0 4 2
1 5 2
2 6 2
A_x B A_y
0 1 2 4
1 1 2 5
2 1 2 6
3 2 2 4
4 2 2 5
5 2 2 6
df1 = pd. DataFrame( { "col1" : [ 0 , 1 ] , "col_left" : [ "a" , "b" ] } )
df2 = pd. DataFrame( { "col1" : [ 1 , 2 , 2 ] , "col_right" : [ 2 , 2 , 2 ] } )
pd. merge( df1, df2, on= "col1" , how= "outer" , indicator= True )
col1 col_left col_right _merge 0 0 a NaN left_only 1 1 b 2.0 both 2 2 NaN 2.0 right_only 3 2 NaN 2.0 right_only