import pandas as pd
pd. __version__
'0.24.2'
arr = [ 1 , 2 , 3 , 4 ]
s1 = pd. Series( arr)
s1
0 1
1 2
2 3
3 4
dtype: int64
import numpy as np
n = np. random. randn( 5 )
index = [ 'a' , 'b' , 'c' , 'd' , 'e' ]
s2 = pd. Series( n, index= index)
s2
a -0.583111
b -0.466115
c 0.542662
d -0.745683
e -0.529050
dtype: float64
dic = {
'a' : 1 , 'b' : 2 , 'c' : 3 , 'd' : 4 , 'e' : 5
}
s3 = pd. Series( dic)
s3
a 1
b 2
c 3
d 4
e 5
dtype: int64
print ( s1)
s1. index= [ 'A' , 'B' , 'C' , 'D' ]
s1
0 1
1 2
2 3
3 4
dtype: int64
A 1
B 2
C 3
D 4
dtype: int64
s4 = s3. append( s1)
s4
a 1
b 2
c 3
d 4
e 5
A 1
B 2
C 3
D 4
dtype: int64
print ( s4)
s4 = s4. drop( 'e' )
s4
a 1
b 2
c 3
d 4
e 5
A 1
B 2
C 3
D 4
dtype: int64
a 1
b 2
c 3
d 4
A 1
B 2
C 3
D 4
dtype: int64
s4[ 'A' ] = 100
s4
a 1
b 2
c 3
d 4
A 100
B 2
C 3
D 4
dtype: int64
s4[ 'B' ]
2
s4[ : 3 ]
a 1
b 2
c 3
dtype: int64
s4. add( s3)
A NaN
B NaN
C NaN
D NaN
a 2.0
b 4.0
c 6.0
d 8.0
e NaN
dtype: float64
s4. sub( s3)
A NaN
B NaN
C NaN
D NaN
a 0.0
b 0.0
c 0.0
d 0.0
e NaN
dtype: float64
s4. mul( s3)
A NaN
B NaN
C NaN
D NaN
a 1.0
b 4.0
c 9.0
d 16.0
e NaN
dtype: float64
s4. div( s3)
A NaN
B NaN
C NaN
D NaN
a 1.0
b 1.0
c 1.0
d 1.0
e NaN
dtype: float64
print ( s4)
s4. median( )
a 1
b 2
c 3
d 4
A 100
B 2
C 3
D 4
dtype: int64
3.0
s4. sum ( )
119
print ( s4. max ( ) )
s4. min ( )
100
1
dates = pd. date_range( 'today' , periods= 6 )
numbers = np. random. randn( 6 , 4 )
columns = [ 'A' , 'B' , 'C' , 'D' ]
df1 = pd. DataFrame( numbers, index= dates, columns= columns)
df1
A B C D 2019-07-16 09:59:10.131414 1.536536 -1.598355 -2.354828 -1.151150 2019-07-17 09:59:10.131414 0.758288 0.143739 -0.389704 0.369642 2019-07-18 09:59:10.131414 -0.612505 0.752261 0.243023 -0.110990 2019-07-19 09:59:10.131414 0.130843 1.308658 0.765599 0.892070 2019-07-20 09:59:10.131414 1.220489 -0.415430 -0.878169 -0.215298 2019-07-21 09:59:10.131414 -0.098756 -2.210043 0.376714 0.521180
data = {
'animal' : [ 'cat' , 'cat' , 'snake' , 'dog' , 'dog' , 'cat' , 'snake' , 'cat' , 'dog' , 'dog' ] ,
'age' : [ 2.5 , 3 , 0.5 , np. nan, 5 , 2 , 4.5 , np. nan, 7 , 3 ] ,
'visits' : [ 1 , 3 , 2 , 3 , 2 , 3 , 1 , 1 , 2 , 1 ] ,
'priority' : [ 'yes' , 'yes' , 'no' , 'yes' , 'no' , 'no' , 'no' , 'yes' , 'no' , 'no' ]
}
labels = [ 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' , 'j' ]
df2 = pd. DataFrame( data, index= labels)
df2
animal age visits priority a cat 2.5 1 yes b cat 3.0 3 yes c snake 0.5 2 no d dog NaN 3 yes e dog 5.0 2 no f cat 2.0 3 no g snake 4.5 1 no h cat NaN 1 yes i dog 7.0 2 no j dog 3.0 1 no
df2. dtypes
animal object
age float64
visits int64
priority object
dtype: object
df2. head( )
animal age visits priority a cat 2.5 1 yes b cat 3.0 3 yes c snake 0.5 2 no d dog NaN 3 yes e dog 5.0 2 no
df2. tail( 3 )
animal age visits priority h cat NaN 1 yes i dog 7.0 2 no j dog 3.0 1 no
df2. index
Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'], dtype='object')
df2. columns
Index(['animal', 'age', 'visits', 'priority'], dtype='object')
df2. values
array([['cat', 2.5, 1, 'yes'],
['cat', 3.0, 3, 'yes'],
['snake', 0.5, 2, 'no'],
['dog', nan, 3, 'yes'],
['dog', 5.0, 2, 'no'],
['cat', 2.0, 3, 'no'],
['snake', 4.5, 1, 'no'],
['cat', nan, 1, 'yes'],
['dog', 7.0, 2, 'no'],
['dog', 3.0, 1, 'no']], dtype=object)
df2. describe( )
age visits count 8.000000 10.000000 mean 3.437500 1.900000 std 2.007797 0.875595 min 0.500000 1.000000 25% 2.375000 1.000000 50% 3.000000 2.000000 75% 4.625000 2.750000 max 7.000000 3.000000
df2. T
a b c d e f g h i j animal cat cat snake dog dog cat snake cat dog dog age 2.5 3 0.5 NaN 5 2 4.5 NaN 7 3 visits 1 3 2 3 2 3 1 1 2 1 priority yes yes no yes no no no yes no no
df2. sort_values( by= 'age' , ascending= True )
animal age visits priority c snake 0.5 2 no f cat 2.0 3 no a cat 2.5 1 yes b cat 3.0 3 yes j dog 3.0 1 no g snake 4.5 1 no e dog 5.0 2 no i dog 7.0 2 no d dog NaN 3 yes h cat NaN 1 yes
df2[ 1 : 3 ]
animal age visits priority b cat 3.0 3 yes c snake 0.5 2 no
df2[ 'age' ]
a 2.5
b 3.0
c 0.5
d NaN
e 5.0
f 2.0
g 4.5
h NaN
i 7.0
j 3.0
Name: age, dtype: float64
df2. age
a 2.5
b 3.0
c 0.5
d NaN
e 5.0
f 2.0
g 4.5
h NaN
i 7.0
j 3.0
Name: age, dtype: float64
df2[ [ 'age' , 'animal' ] ]
age animal a 2.5 cat b 3.0 cat c 0.5 snake d NaN dog e 5.0 dog f 2.0 cat g 4.5 snake h NaN cat i 7.0 dog j 3.0 dog
df2. iloc[ 1 : 3 ]
animal age visits priority b cat 3.0 3 yes c snake 0.5 2 no
df2. loc[ 'c' , 'age' ]
0.5
df2. loc[ : 'f' , [ 'age' , 'animal' ] ]
age animal a 2.5 cat b 3.0 cat c 0.5 snake d NaN dog e 5.0 dog f 2.0 cat
df3 = df2. copy( )
df3
animal age visits priority a cat 2.5 1 yes b cat 3.0 3 yes c snake 0.5 2 no d dog NaN 3 yes e dog 5.0 2 no f cat 2.0 3 no g snake 4.5 1 no h cat NaN 1 yes i dog 7.0 2 no j dog 3.0 1 no
df3. isnull( )
animal age visits priority a False False False False b False False False False c False False False False d False True False False e False False False False f False False False False g False False False False h False True False False i False False False False j False False False False
num= pd. Series( [ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ] , index= df3. index)
df3[ 'No.' ] = num
df3
animal age visits priority No. a cat 2.5 1 yes 1 b cat 3.0 3 yes 2 c snake 0.5 2 no 3 d dog NaN 3 yes 4 e dog 5.0 2 no 5 f cat 2.0 3 no 6 g snake 4.5 1 no 7 h cat NaN 1 yes 8 i dog 7.0 2 no 9 j dog 3.0 1 no 10
df3. loc[ 'f' , 'age' ] = 1.5
df3
animal age visits priority No. a cat 2.5 1 yes 1 b cat 3.0 3 yes 2 c snake 0.5 2 no 3 d dog NaN 3 yes 4 e dog 5.0 2 no 5 f cat 1.5 3 no 6 g snake 4.5 1 no 7 h cat NaN 1 yes 8 i dog 7.0 2 no 9 j dog 3.0 1 no 10
df3. mean( )
age 3.375
visits 1.900
No. 5.500
dtype: float64
df3[ 'visits' ] . sum ( )
19
string = pd. Series( [
'A' , 'B' , 'asDS' , np. nan
] )
print ( string)
string. str . lower( )
0 A
1 B
2 asDS
3 NaN
dtype: object
0 a
1 b
2 asds
3 NaN
dtype: object
string. str . upper( )
0 A
1 B
2 ASDS
3 NaN
dtype: object
df4 = df3. copy( )
print ( df4)
df4. fillna( value= 3 )
animal age visits priority No.
a cat 2.5 1 yes 1
b cat 3.0 3 yes 2
c snake 0.5 2 no 3
d dog NaN 3 yes 4
e dog 5.0 2 no 5
f cat 1.5 3 no 6
g snake 4.5 1 no 7
h cat NaN 1 yes 8
i dog 7.0 2 no 9
j dog 3.0 1 no 10
animal age visits priority No. a cat 2.5 1 yes 1 b cat 3.0 3 yes 2 c snake 0.5 2 no 3 d dog 3.0 3 yes 4 e dog 5.0 2 no 5 f cat 1.5 3 no 6 g snake 4.5 1 no 7 h cat 3.0 1 yes 8 i dog 7.0 2 no 9 j dog 3.0 1 no 10
df5 = df3. copy( )
print ( df5)
df5. dropna( how= 'any' )
animal age visits priority No.
a cat 2.5 1 yes 1
b cat 3.0 3 yes 2
c snake 0.5 2 no 3
d dog NaN 3 yes 4
e dog 5.0 2 no 5
f cat 1.5 3 no 6
g snake 4.5 1 no 7
h cat NaN 1 yes 8
i dog 7.0 2 no 9
j dog 3.0 1 no 10
animal age visits priority No. a cat 2.5 1 yes 1 b cat 3.0 3 yes 2 c snake 0.5 2 no 3 e dog 5.0 2 no 5 f cat 1.5 3 no 6 g snake 4.5 1 no 7 i dog 7.0 2 no 9 j dog 3.0 1 no 10
l = pd. DataFrame( { 'key' : [ 'foo1' , 'foo2' ] , 'one' : [ 1 , 2 ] } )
r = pd. DataFrame( { 'key' : [ 'foo2' , 'foo1' ] , 'two' : [ 4 , 5 ] } )
print ( l)
print ( r)
pd. merge( l, r, on= 'key' )
key one
0 foo1 1
1 foo2 2
key two
0 foo2 4
1 foo1 5
dti = pd. date_range( start= '20190101' , end= '20191231' , freq= "D" )
s = pd. Series( np. random. rand( len ( dti) ) , index= dti)
s
2019-01-01 0.037638
2019-01-02 0.146835
2019-01-03 0.630011
2019-01-04 0.225352
2019-01-05 0.549422
2019-01-06 0.136173
2019-01-07 0.976075
2019-01-08 0.581866
2019-01-09 0.667477
2019-01-10 0.616454
2019-01-11 0.625050
2019-01-12 0.131415
2019-01-13 0.558883
2019-01-14 0.749271
2019-01-15 0.676446
2019-01-16 0.084104
2019-01-17 0.073056
2019-01-18 0.232186
2019-01-19 0.213357
2019-01-20 0.457664
2019-01-21 0.538337
2019-01-22 0.728427
2019-01-23 0.899302
2019-01-24 0.850609
2019-01-25 0.716502
2019-01-26 0.319339
2019-01-27 0.577455
2019-01-28 0.126991
2019-01-29 0.527439
2019-01-30 0.551891
...
2019-12-02 0.246504
2019-12-03 0.117549
2019-12-04 0.223977
2019-12-05 0.407109
2019-12-06 0.953820
2019-12-07 0.583962
2019-12-08 0.014735
2019-12-09 0.009190
2019-12-10 0.496330
2019-12-11 0.191981
2019-12-12 0.002935
2019-12-13 0.530197
2019-12-14 0.328830
2019-12-15 0.081638
2019-12-16 0.922251
2019-12-17 0.332389
2019-12-18 0.076567
2019-12-19 0.906216
2019-12-20 0.481311
2019-12-21 0.080405
2019-12-22 0.291532
2019-12-23 0.933424
2019-12-24 0.439771
2019-12-25 0.738565
2019-12-26 0.215401
2019-12-27 0.849687
2019-12-28 0.861060
2019-12-29 0.831074
2019-12-30 0.944307
2019-12-31 0.245717
Freq: D, Length: 365, dtype: float64
s[ s. index. weekday == 2 ] . sum ( )
28.54901665149845
s. resample( 'M' ) . mean( )
2019-01-31 0.471694
2019-02-28 0.586159
2019-03-31 0.515226
2019-04-30 0.530170
2019-05-31 0.481162
2019-06-30 0.545577
2019-07-31 0.547692
2019-08-31 0.495158
2019-09-30 0.547109
2019-10-31 0.544706
2019-11-30 0.456312
2019-12-31 0.434590
Freq: M, dtype: float64
data = { 'animal' : [ 'cat' , 'cat' , 'snake' , 'dog' , 'dog' , 'cat' , 'snake' , 'cat' , 'dog' , 'dog' ] ,
'age' : [ 2.5 , 3 , 0.5 , np. nan, 5 , 2 , 4.5 , np. nan, 7 , 3 ] ,
'visits' : [ 1 , 3 , 2 , 3 , 2 , 3 , 1 , 1 , 2 , 1 ] ,
'priority' : [ 'yes' , 'yes' , 'no' , 'yes' , 'no' , 'no' , 'no' , 'yes' , 'no' , 'no' ] }
labels = [ 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' , 'j' ]
df = pd. DataFrame( data, index= labels)
df[ df[ 'age' ] > 3 ]
animal age visits priority e dog 5.0 2 no g snake 4.5 1 no i dog 7.0 2 no
df = pd. DataFrame( data, index= labels)
df[ ( df[ 'animal' ] == 'cat' ) & ( df[ 'age' ] < 3 ) ]
animal age visits priority a cat 2.5 1 yes f cat 2.0 3 no
df3[ df3[ 'animal' ] . isin( [ 'cat' , 'dog' ] ) ]
animal age visits priority No. a cat 2.5 1 yes 1 b cat 3.0 3 yes 2 d dog NaN 3 yes 4 e dog 5.0 2 no 5 f cat 1.5 3 no 6 h cat NaN 1 yes 8 i dog 7.0 2 no 9 j dog 3.0 1 no 10
df. loc[ df2. index[ [ 3 , 4 , 8 ] ] , [ 'animal' , 'age' ] ]
animal age d dog NaN e dog 5.0 i dog 7.0
df. sort_values( by= [ 'age' , 'visits' ] , ascending= [ False , True ] )
animal age visits priority i dog 7.0 2 no e dog 5.0 2 no g snake 4.5 1 no b cat 3.0 3 yes j dog 3.0 1 no a cat 2.5 1 yes f cat 2.0 3 no c snake 0.5 2 no d dog NaN 3 yes h cat NaN 1 yes
df4. groupby( by= 'animal' ) . sum ( )
age visits No. animal cat 7.0 8 17 dog 15.0 8 28 snake 5.0 3 10
df = pd. DataFrame( { 'From_To' : [ 'LoNDon_paris' , 'MAdrid_miLAN' , 'londON_StockhOlm' ,
'Budapest_PaRis' , 'Brussels_londOn' ] ,
'FlightNumber' : [ 10045 , np. nan, 10065 , np. nan, 10085 ] ,
'RecentDelays' : [ [ 23 , 47 ] , [ ] , [ 24 , 43 , 87 ] , [ 13 ] , [ 67 , 32 ] ] ,
'Airline' : [ 'KLM(!)' , '<Air France> (12)' , '(British Airways. )' ,
'12. Air France' , '"Swiss Air"' ] } )
df
From_To FlightNumber RecentDelays Airline 0 LoNDon_paris 10045.0 [23, 47] KLM(!) 1 MAdrid_miLAN NaN [] <Air France> (12) 2 londON_StockhOlm 10065.0 [24, 43, 87] (British Airways. ) 3 Budapest_PaRis NaN [13] 12. Air France 4 Brussels_londOn 10085.0 [67, 32] "Swiss Air"
df[ 'FlightNumber' ] = df[ 'FlightNumber' ] . interpolate( ) . astype( int )
df
From_To FlightNumber RecentDelays Airline 0 LoNDon_paris 10045 [23, 47] KLM(!) 1 MAdrid_miLAN 10055 [] <Air France> (12) 2 londON_StockhOlm 10065 [24, 43, 87] (British Airways. ) 3 Budapest_PaRis 10075 [13] 12. Air France 4 Brussels_londOn 10085 [67, 32] "Swiss Air"
temp = df. From_To. str . split( '_' , expand= True )
temp. columns = [ 'From' , 'To' ]
temp
From To 0 LoNDon paris 1 MAdrid miLAN 2 londON StockhOlm 3 Budapest PaRis 4 Brussels londOn
temp[ 'From' ] = temp[ 'From' ] . str . capitalize( )
temp[ 'To' ] = temp[ 'To' ] . str . capitalize( )
temp
From To 0 London Paris 1 Madrid Milan 2 London Stockholm 3 Budapest Paris 4 Brussels London
df = df. drop( 'From_To' , axis= 1 )
df = df. join( temp)
df
FlightNumber RecentDelays Airline From To 0 10045 [23, 47] KLM(!) London Paris 1 10055 [] <Air France> (12) Madrid Milan 2 10065 [24, 43, 87] (British Airways. ) London Stockholm 3 10075 [13] 12. Air France Budapest Paris 4 10085 [67, 32] "Swiss Air" Brussels London
df[ 'Airline' ] = df[ 'Airline' ] . str . extract(
'([a-zA-Z\s]+)' , expand= False
) . str . strip( )
df
FlightNumber RecentDelays Airline From To 0 10045 [23, 47] KLM London Paris 1 10055 [] Air France Madrid Milan 2 10065 [24, 43, 87] British Airways London Stockholm 3 10075 [13] Air France Budapest Paris 4 10085 [67, 32] Swiss Air Brussels London
delays = df[ 'RecentDelays' ] . apply ( pd. Series)
delays. columns = [ 'delay_{}' . format ( n) for n in range ( 1 , len ( delays. columns) + 1 ) ]
df = df. drop( 'RecentDelays' , axis= 1 ) . join( delays)
df
FlightNumber Airline From To delay_1 delay_2 delay_3 0 10045 KLM London Paris 23.0 47.0 NaN 1 10055 Air France Madrid Milan NaN NaN NaN 2 10065 British Airways London Stockholm 24.0 43.0 87.0 3 10075 Air France Budapest Paris 13.0 NaN NaN 4 10085 Swiss Air Brussels London 67.0 32.0 NaN
df= pd. DataFrame( { 'name' : [ 'Alice' , 'Bob' , 'Candy' , 'Dany' , 'Ella' , 'Frank' , 'Grace' , 'Jenny' ] , 'grades' : [ 58 , 83 , 79 , 65 , 93 , 45 , 61 , 88 ] } )
def choice ( x) :
if x> 60 :
return 1
else : return 0
df. grades = pd. Series( map ( lambda x: choice( x) , df. grades) )
df
name grades 0 Alice 0 1 Bob 1 2 Candy 1 3 Dany 1 4 Ella 1 5 Frank 0 6 Grace 1 7 Jenny 1
df = pd. DataFrame( { 'A' : [ 1 , 2 , 3 , 4 , 5 , 4 , 4 , 57 , 8 ] } )
df. loc[ df[ 'A' ] . shift( ) != df[ 'A' ] ]
def normalization ( df) :
numberator = df. sub( df. min ( ) )
denominator = ( df. max ( ) ) . sub( df. min ( ) )
Y = numberator. div( denominator)
return Y
df = pd. DataFrame( np. random. random( size= ( 5 , 3 ) ) )
print ( df)
normalization( df)
0 1 2
0 0.920675 0.181496 0.408179
1 0.016837 0.740842 0.239625
2 0.577404 0.503003 0.077401
3 0.502584 0.262550 0.000848
4 0.817712 0.774605 0.073925
0 1 2 0 1.000000 0.000000 1.000000 1 0.000000 0.943074 0.586199 2 0.620207 0.542072 0.187938 3 0.537427 0.136659 0.000000 4 0.886083 1.000000 0.179404
% matplotlib inline
ts = pd. Series( np. random. randn( 100 ) , index= pd. date_range( 'today' , periods= 100 ) )
ts = ts. cumsum( )
print ( ts)
ts. plot( )
2019-07-16 11:14:32.969237 -0.160527
2019-07-17 11:14:32.969237 -0.413502
2019-07-18 11:14:32.969237 0.494939
2019-07-19 11:14:32.969237 -0.178343
2019-07-20 11:14:32.969237 -1.279842
2019-07-21 11:14:32.969237 -0.538981
2019-07-22 11:14:32.969237 -1.952703
2019-07-23 11:14:32.969237 -2.350831
2019-07-24 11:14:32.969237 -2.652419
2019-07-25 11:14:32.969237 -4.976856
2019-07-26 11:14:32.969237 -5.596993
2019-07-27 11:14:32.969237 -4.880697
2019-07-28 11:14:32.969237 -5.918225
2019-07-29 11:14:32.969237 -4.720213
2019-07-30 11:14:32.969237 -4.056208
2019-07-31 11:14:32.969237 -3.526640
2019-08-01 11:14:32.969237 -2.295520
2019-08-02 11:14:32.969237 -0.381850
2019-08-03 11:14:32.969237 -0.077956
2019-08-04 11:14:32.969237 0.441831
2019-08-05 11:14:32.969237 -1.624691
2019-08-06 11:14:32.969237 -1.084316
2019-08-07 11:14:32.969237 -2.134124
2019-08-08 11:14:32.969237 -1.477398
2019-08-09 11:14:32.969237 -2.299194
2019-08-10 11:14:32.969237 -2.501663
2019-08-11 11:14:32.969237 -3.190793
2019-08-12 11:14:32.969237 -4.237049
2019-08-13 11:14:32.969237 -4.477230
2019-08-14 11:14:32.969237 -4.171017
...
2019-09-24 11:14:32.969237 -13.569730
2019-09-25 11:14:32.969237 -14.627188
2019-09-26 11:14:32.969237 -15.461638
2019-09-27 11:14:32.969237 -16.121560
2019-09-28 11:14:32.969237 -16.569511
2019-09-29 11:14:32.969237 -17.900842
2019-09-30 11:14:32.969237 -19.194001
2019-10-01 11:14:32.969237 -17.979293
2019-10-02 11:14:32.969237 -18.645903
2019-10-03 11:14:32.969237 -19.241367
2019-10-04 11:14:32.969237 -19.211365
2019-10-05 11:14:32.969237 -18.088419
2019-10-06 11:14:32.969237 -17.767976
2019-10-07 11:14:32.969237 -16.273883
2019-10-08 11:14:32.969237 -16.751812
2019-10-09 11:14:32.969237 -16.460468
2019-10-10 11:14:32.969237 -15.534514
2019-10-11 11:14:32.969237 -16.029253
2019-10-12 11:14:32.969237 -16.629995
2019-10-13 11:14:32.969237 -17.181734
2019-10-14 11:14:32.969237 -16.139546
2019-10-15 11:14:32.969237 -16.249424
2019-10-16 11:14:32.969237 -14.797719
2019-10-17 11:14:32.969237 -17.198546
2019-10-18 11:14:32.969237 -18.193887
2019-10-19 11:14:32.969237 -18.175841
2019-10-20 11:14:32.969237 -18.039003
2019-10-21 11:14:32.969237 -17.884838
2019-10-22 11:14:32.969237 -18.985760
2019-10-23 11:14:32.969237 -18.987684
Freq: D, Length: 100, dtype: float64
<matplotlib.axes._subplots.AxesSubplot at 0x1bc512b29b0>
df = pd. DataFrame( np. random. randn( 100 , 4 ) , index= ts. index, columns= [ 'A' , 'B' , 'C' , 'D' ] )
df = df. cumsum( )
print ( df)
df. plot( )
A B C D
2019-07-16 11:14:32.969237 -2.311551 -2.601142 0.852766 0.766899
2019-07-17 11:14:32.969237 -0.879667 -4.293468 -0.039314 0.822882
2019-07-18 11:14:32.969237 -1.249910 -5.562160 -0.456214 0.720813
2019-07-19 11:14:32.969237 -0.567523 -5.869549 -1.250540 1.204854
2019-07-20 11:14:32.969237 0.000393 -3.939871 -1.824283 1.377918
2019-07-21 11:14:32.969237 -1.957763 -4.426390 -1.644319 0.411990
2019-07-22 11:14:32.969237 -1.863936 -5.952407 -0.678510 0.882874
2019-07-23 11:14:32.969237 -2.047160 -6.771213 1.407736 1.757021
2019-07-24 11:14:32.969237 -2.230326 -6.520421 3.122783 2.976079
2019-07-25 11:14:32.969237 -3.833992 -6.785455 2.087702 4.075022
2019-07-26 11:14:32.969237 -4.315307 -8.567182 2.688330 5.365991
2019-07-27 11:14:32.969237 -5.248594 -8.344775 3.382635 4.214969
2019-07-28 11:14:32.969237 -5.054369 -7.385112 3.765415 5.066637
2019-07-29 11:14:32.969237 -2.931733 -7.085015 3.746368 5.756438
2019-07-30 11:14:32.969237 -4.190044 -7.517056 3.133894 8.217903
2019-07-31 11:14:32.969237 -3.139043 -8.779127 2.402586 7.860025
2019-08-01 11:14:32.969237 -1.870986 -8.921735 2.442751 7.956824
2019-08-02 11:14:32.969237 -1.947051 -9.726026 2.805189 8.730009
2019-08-03 11:14:32.969237 -2.468689 -7.685965 2.295436 6.795688
2019-08-04 11:14:32.969237 -2.138392 -7.481845 3.769528 7.018816
2019-08-05 11:14:32.969237 -1.521903 -5.906005 2.340666 7.280866
2019-08-06 11:14:32.969237 -0.851497 -5.947501 4.279168 6.229589
2019-08-07 11:14:32.969237 -0.745985 -6.307143 5.847261 5.630705
2019-08-08 11:14:32.969237 -0.459598 -5.138792 4.995194 5.647915
2019-08-09 11:14:32.969237 -0.324185 -5.226607 3.466786 4.292591
2019-08-10 11:14:32.969237 -0.352415 -5.121374 3.401821 4.966165
2019-08-11 11:14:32.969237 1.123371 -4.678556 2.997400 4.730402
2019-08-12 11:14:32.969237 1.621475 -4.918931 1.978229 5.891817
2019-08-13 11:14:32.969237 0.528799 -4.923886 1.741921 4.091429
2019-08-14 11:14:32.969237 0.234260 -6.577139 3.515839 3.965522
... ... ... ... ...
2019-09-24 11:14:32.969237 2.098830 11.540368 -2.760031 2.017074
2019-09-25 11:14:32.969237 1.917497 11.425361 -2.360769 1.540259
2019-09-26 11:14:32.969237 1.586440 11.089945 -2.934906 2.016988
2019-09-27 11:14:32.969237 2.426352 11.000135 -4.160570 1.678462
2019-09-28 11:14:32.969237 2.590117 11.409677 -5.102951 3.123796
2019-09-29 11:14:32.969237 2.586017 11.673688 -5.936028 2.159731
2019-09-30 11:14:32.969237 5.012078 12.535448 -6.913949 4.082058
2019-10-01 11:14:32.969237 3.529943 14.612272 -6.541449 3.130429
2019-10-02 11:14:32.969237 3.376133 12.740237 -7.041879 3.058573
2019-10-03 11:14:32.969237 3.536676 13.233300 -6.775922 3.562460
2019-10-04 11:14:32.969237 5.075667 13.630937 -6.409229 3.404647
2019-10-05 11:14:32.969237 4.633807 14.011680 -7.359063 2.555063
2019-10-06 11:14:32.969237 4.108268 14.233577 -8.319235 1.782257
2019-10-07 11:14:32.969237 5.389960 15.049002 -7.592306 3.064996
2019-10-08 11:14:32.969237 4.904890 15.129739 -7.845749 2.197024
2019-10-09 11:14:32.969237 2.894357 14.053121 -7.560088 2.127322
2019-10-10 11:14:32.969237 2.432563 13.678098 -7.010267 2.536035
2019-10-11 11:14:32.969237 1.493160 13.263020 -7.262265 2.954692
2019-10-12 11:14:32.969237 2.477873 14.443603 -7.815188 2.420356
2019-10-13 11:14:32.969237 1.914146 14.476938 -6.850849 2.985317
2019-10-14 11:14:32.969237 1.944343 13.532021 -7.611172 4.754920
2019-10-15 11:14:32.969237 2.379594 13.908116 -8.503684 5.217389
2019-10-16 11:14:32.969237 1.479926 13.646017 -7.861792 4.769845
2019-10-17 11:14:32.969237 3.376088 12.470308 -7.902426 4.735779
2019-10-18 11:14:32.969237 3.847433 12.177020 -6.719579 3.123475
2019-10-19 11:14:32.969237 3.904511 12.261467 -6.016796 3.419390
2019-10-20 11:14:32.969237 3.188237 14.305071 -6.137896 2.905813
2019-10-21 11:14:32.969237 4.006034 13.981431 -6.034235 2.483323
2019-10-22 11:14:32.969237 4.187015 14.311562 -6.466325 0.531675
2019-10-23 11:14:32.969237 4.928834 14.064165 -6.435447 -0.506871
[100 rows x 4 columns]
<matplotlib.axes._subplots.AxesSubplot at 0x1bc5148c748>
df = pd. DataFrame( { "revenue" : [ 57 , 68 , 63 , 71 , 72 , 90 , 80 , 62 , 59 , 51 , 47 , 52 ] ,
"advertising" : [ 2.1 , 1.9 , 2.7 , 3.0 , 3.6 , 3.2 , 2.7 , 2.4 , 1.8 , 1.6 , 1.3 , 1.9 ] ,
"month" : range ( 12 )
} )
ax = df. plot. bar( 'month' , 'revenue' , color= 'yellow' )
df. plot( 'month' , 'advertising' , secondary_y= True , ax= ax)
<matplotlib.axes._subplots.AxesSubplot at 0x1bc52686eb8>