from pandas import Series, DataFrame
import pandas as pd
import numpy as np
obj= pd. Series( [ 4 , 7 , - 5 , 3 ] )
obj
0 4
1 7
2 -5
3 3
dtype: int64
print ( type ( obj. values) )
print ( type ( obj. index) )
<class 'numpy.ndarray'>
<class 'pandas.core.indexes.range.RangeIndex'>
obj2= pd. Series( [ 4 , 7 , - 5 , 3 ] , index= [ "b" , "c" , "v" , "f" ] )
obj2
b 4
c 7
v -5
f 3
dtype: int64
obj2. index
Index(['b', 'c', 'v', 'f'], dtype='object')
print ( obj2[ "v" ] )
print ( obj2[ [ "c" , "v" , "f" ] ] )
-5
c 7
v -5
f 3
dtype: int64
obj2. index[ 3 ]
'f'
obj2* 2
b 8
c 14
v -10
f 6
dtype: int64
np. exp( obj2)
b 54.598150
c 1096.633158
v 0.006738
f 20.085537
dtype: float64
"b" in obj2
True
"r" in obj2
False
sdata= { "ohio" : 35000 , "texas" : 71000 , "oregon" : 16000 , "utah" : 5000 }
obj3= pd. Series( sdata)
obj3
ohio 35000
texas 71000
oregon 16000
utah 5000
dtype: int64
states= [ "california" , "ohio" , "michegan" , "utah" ]
obj4= pd. Series( sdata, index= states)
obj4
california NaN
ohio 35000.0
michegan NaN
utah 5000.0
dtype: float64
pd. isnull( obj4)
california True
ohio False
michegan True
utah False
dtype: bool
pd. notnull( obj4)
california False
ohio True
michegan False
utah True
dtype: bool
obj4. isnull( )
california True
ohio False
michegan True
utah False
dtype: bool
obj3
ohio 35000
texas 71000
oregon 16000
utah 5000
dtype: int64
obj4
california NaN
ohio 35000.0
michegan NaN
utah 5000.0
dtype: float64
obj3+ obj4
california NaN
michegan NaN
ohio 70000.0
oregon NaN
texas NaN
utah 10000.0
dtype: float64
obj4. name= "population"
obj4. index. name= "state"
obj4
state
california NaN
ohio 35000.0
michegan NaN
utah 5000.0
Name: population, dtype: float64
obj. index= [ "bob" , "steve" , "jeff" , 'ryan' ]
obj
bob 4
steve 7
jeff -5
ryan 3
dtype: int64
data = { 'state' : [ 'Ohio' , 'Ohio' , 'Ohio' , 'Nevada' , 'Nevada' ,
'Nevada' ] ,
'year' : [ 2000 , 2001 , 2002 , 2001 , 2002 , 2003 ] ,
'pop' : [ 1.5 , 1.7 , 3.6 , 2.4 , 2.9 , 3.2 ] }
frame = pd. DataFrame( data)
frame
state year pop 0 Ohio 2000 1.5 1 Ohio 2001 1.7 2 Ohio 2002 3.6 3 Nevada 2001 2.4 4 Nevada 2002 2.9 5 Nevada 2003 3.2
print ( frame. head( ) )
state year pop
0 Ohio 2000 1.5
1 Ohio 2001 1.7
2 Ohio 2002 3.6
3 Nevada 2001 2.4
4 Nevada 2002 2.9
pd. DataFrame( data, columns= [ "year" , "state" , "pop" ] )
year state pop 0 2000 Ohio 1.5 1 2001 Ohio 1.7 2 2002 Ohio 3.6 3 2001 Nevada 2.4 4 2002 Nevada 2.9 5 2003 Nevada 3.2
frame2= pd. DataFrame( data, columns= [ "year" , 'state' , 'pop' , 'debt' ] , index= [ 'one' , 'two' , 'three' , 'four' , 'five' , 'six' ] )
print ( frame2)
year state pop debt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 NaN
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 NaN
five 2002 Nevada 2.9 NaN
six 2003 Nevada 3.2 NaN
frame2. columns
Index(['year', 'state', 'pop', 'debt'], dtype='object')
frame2[ 'state' ]
one Ohio
two Ohio
three Ohio
four Nevada
five Nevada
six Nevada
Name: state, dtype: object
frame2. year
one 2000
two 2001
three 2002
four 2001
five 2002
six 2003
Name: year, dtype: int64
frame2. loc[ 'three' ]
year 2002
state Ohio
pop 3.6
debt NaN
Name: three, dtype: object
frame2[ 'debt' ] = 16.5
frame2
year state pop debt one 2000 Ohio 1.5 16.5 two 2001 Ohio 1.7 16.5 three 2002 Ohio 3.6 16.5 four 2001 Nevada 2.4 16.5 five 2002 Nevada 2.9 16.5 six 2003 Nevada 3.2 16.5
frame2[ 'debt' ] = np. arange( 6 . )
frame2
year state pop debt one 2000 Ohio 1.5 0.0 two 2001 Ohio 1.7 1.0 three 2002 Ohio 3.6 2.0 four 2001 Nevada 2.4 3.0 five 2002 Nevada 2.9 4.0 six 2003 Nevada 3.2 5.0
val= pd. Series( [ - 1.2 , - 1.5 , 3 ] , index= [ 'one' , 'two' , 'five' ] )
frame2[ 'debt' ] = val
frame2
year state pop debt one 2000 Ohio 1.5 -1.2 two 2001 Ohio 1.7 -1.5 three 2002 Ohio 3.6 NaN four 2001 Nevada 2.4 NaN five 2002 Nevada 2.9 3.0 six 2003 Nevada 3.2 NaN
frame2[ 'eastern' ] = frame2. state== 'Ohio'
frame2
year state pop debt eastern one 2000 Ohio 1.5 -1.2 True two 2001 Ohio 1.7 -1.5 True three 2002 Ohio 3.6 NaN True four 2001 Nevada 2.4 NaN False five 2002 Nevada 2.9 3.0 False six 2003 Nevada 3.2 NaN False
del frame2[ 'eastern' ]
frame2. columns
Index(['year', 'state', 'pop', 'debt'], dtype='object')
pop= { 'Neveda' : { 2001 : 2.4 , 2002 : 2.9 } ,
'Ohio' : { 2000 : 1.5 , 2001 : 1.7 , 2002 : 3.6 } }
frame3= pd. DataFrame( pop)
frame3
Neveda Ohio 2000 NaN 1.5 2001 2.4 1.7 2002 2.9 3.6
frame3. T
2000 2001 2002 Neveda NaN 2.4 2.9 Ohio 1.5 1.7 3.6
pd. DataFrame( pop, index= [ 2001 , 2002 , 2003 ] )
Neveda Ohio 2001 2.4 1.7 2002 2.9 3.6 2003 NaN NaN
pdata= { 'Ohio' : frame3[ 'Ohio' ] [ : - 1 ] ,
'Neveda' : frame3[ 'Neveda' ] [ : 3 ] }
pd. DataFrame( pdata)
Ohio Neveda 2000 1.5 NaN 2001 1.7 2.4 2002 NaN 2.9
frame3. index. name= 'year' ; frame3. columns. name= 'state'
frame3
state Neveda Ohio year 2000 NaN 1.5 2001 2.4 1.7 2002 2.9 3.6
frame3. values
array([[nan, 1.5],
[2.4, 1.7],
[2.9, 3.6]])
frame2. values
array([[2000, 'Ohio', 1.5, -1.2],
[2001, 'Ohio', 1.7, -1.5],
[2002, 'Ohio', 3.6, nan],
[2001, 'Nevada', 2.4, nan],
[2002, 'Nevada', 2.9, 3.0],
[2003, 'Nevada', 3.2, nan]], dtype=object)
obj= pd. Series( range ( 3 ) , index= [ 'a' , 'b' , 'c' ] )
index= obj. index
values= obj. values
obj
a 0
b 1
c 2
dtype: int64
index[ 1 : ]
Index(['b', 'c'], dtype='object')
index[ 1 ] = "w"
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-116-287780bd69fa> in <module>
----> 1 index[1]="w"
/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in __setitem__(self, key, value)
3936
3937 def __setitem__(self, key, value):
-> 3938 raise TypeError("Index does not support mutable operations")
3939
3940 def __getitem__(self, key):
TypeError: Index does not support mutable operations
labels= pd. Index( np. arange( 3 ) )
labels
Int64Index([0, 1, 2], dtype='int64')
obj2= pd. Series( [ 1.5 , - 2.5 , 0 ] , index= labels)
obj2
0 1.5
1 -2.5
2 0.0
dtype: float64
obj2. index is labels
True
frame3
state Neveda Ohio year 2000 NaN 1.5 2001 2.4 1.7 2002 2.9 3.6
frame3. columns
Index(['Neveda', 'Ohio'], dtype='object', name='state')
'Ohio' in frame3. columns
True
2003 in frame3. index
False
dup_labels= pd. Index( [ 'foo' , 'foo' , 'bar' , 'bar' ] )
dup_labels
Index(['foo', 'foo', 'bar', 'bar'], dtype='object')
基本功能 重新索引
obj= pd. Series( [ 4.5 , 7.2 , - 5.3 , 3.6 ] , index= [ 'd' , 'b' , 'a' , 'c' ] )
obj
d 4.5
b 7.2
a -5.3
c 3.6
dtype: float64
obj2= obj. reindex( [ 'a' , 'b' , 'c' , 'd' , 'e' ] )
obj2
a -5.3
b 7.2
c 3.6
d 4.5
e NaN
dtype: float64
obj3= pd. Series( [ 'blue' , 'purple' , 'yellow' ] , index= [ 0 , 2 , 4 ] )
obj3
0 blue
2 purple
4 yellow
dtype: object
obj3. reindex( range ( 6 ) , method= 'ffill' )
0 blue
1 blue
2 purple
3 purple
4 yellow
5 yellow
dtype: object
frame= pd. DataFrame( np. arange( 9 ) . reshape( ( 3 , 3 ) ) ,
index= [ 'a' , 'c' , 'd' ] ,
columns= [ 'Ohio' , 'Texas' , 'California' ] )
frame
Ohio Texas California a 0 1 2 c 3 4 5 d 6 7 8
frame2= frame. reindex( [ 'a' , 'b' , 'c' , 'd' ] )
frame2
Ohio Texas California a 0.0 1.0 2.0 b NaN NaN NaN c 3.0 4.0 5.0 d 6.0 7.0 8.0
states= [ 'Texas' , 'Utah' , 'California' ]
frame. reindex( columns= states)
Texas Utah California a 1 NaN 2 c 4 NaN 5 d 7 NaN 8
obj= pd. Series( np. arange( 5 . ) , index= [ 'a' , 'b' , 'c' , 'd' , 'e' ] )
obj
a 0.0
b 1.0
c 2.0
d 3.0
e 4.0
dtype: float64
new_obj= obj. drop( 'c' )
new_obj
a 0.0
b 1.0
d 3.0
e 4.0
dtype: float64
obj. drop( [ 'd' , 'c' ] )
a 0.0
b 1.0
e 4.0
dtype: float64
data= pd. DataFrame( np. arange( 16 ) . reshape( 4 , 4 ) ,
index= [ 'ohio' , 'Colorado' , 'Utah' , 'Newyork' ] ,
columns= [ 'one' , 'two' , 'three' , 'four' ] )
data
one two three four ohio 0 1 2 3 Colorado 4 5 6 7 Utah 8 9 10 11 Newyork 12 13 14 15
data. drop( [ 'Colorado' , 'ohio' ] )
one two three four Utah 8 9 10 11 Newyork 12 13 14 15
data. drop( 'three' , axis= 1 )
one two four ohio 0 1 3 Colorado 4 5 7 Utah 8 9 11 Newyork 12 13 15
data. drop( [ 'two' , 'four' ] , axis= 1 )
one three ohio 0 2 Colorado 4 6 Utah 8 10 Newyork 12 14
obj. drop( 'c' , inplace= True )
obj
a 0.0
b 1.0
d 3.0
e 4.0
dtype: float64
obj= pd. Series( np. arange( 4 . ) , index= [ 'a' , 'b' , 'c' , 'd' ] )
obj
a 0.0
b 1.0
c 2.0
d 3.0
dtype: float64
obj[ 'b' ]
1.0
obj[ 1 ]
1.0
obj[ 2 : 4 ]
c 2.0
d 3.0
dtype: float64
obj[ [ 1 , 3 ] ]
b 1.0
d 3.0
dtype: float64
obj[ [ 'b' , 'a' , 'd' ] ]
b 1.0
a 0.0
d 3.0
dtype: float64
obj[ obj< 2 ]
a 0.0
b 1.0
dtype: float64
obj[ 'b' : 'c' ]
b 1.0
c 2.0
dtype: float64
obj[ "b" : 'c' ] = 5
obj
a 0.0
b 5.0
c 5.0
d 3.0
dtype: float64
data= pd. DataFrame( np. arange( 16 ) . reshape( 4 , 4 ) ,
index= [ 'Ohio' , 'Colorado' , 'Utah' , 'California' ] ,
columns= [ 'one' , 'two' , 'three' , 'four' ] )
data
one two three four Ohio 0 1 2 3 Colorado 4 5 6 7 Utah 8 9 10 11 California 12 13 14 15
data[ 'two' ]
Ohio 1
Colorado 5
Utah 9
California 13
Name: two, dtype: int64
data[ [ 'three' , 'one' ] ]
three one Ohio 2 0 Colorado 6 4 Utah 10 8 California 14 12
data[ 'two' ]
Ohio 1
Colorado 5
Utah 9
California 13
Name: two, dtype: int64
data[ data[ 'three' ] > 5 ]
one two three four Colorado 4 5 6 7 Utah 8 9 10 11 California 12 13 14 15
data< 5
one two three four Ohio True True True True Colorado True False False False Utah False False False False California False False False False
data[ data< 5 ] = 0
data
one two three four Ohio 0 0 0 0 Colorado 0 5 6 7 Utah 8 9 10 11 California 12 13 14 15
data. loc[ 'Colorado' , [ 'one' , 'two' ] ]
one 0
two 5
Name: Colorado, dtype: int64
data. iloc[ 2 , [ 3 , 0 , 1 ] ]
four 11
one 8
two 9
Name: Utah, dtype: int64
data. iloc[ 2 ]
one 8
two 9
three 10
four 11
Name: Utah, dtype: int64
data. iloc[ [ 1 , 2 ] , [ 3 , 0 , 1 ] ]
four one two Colorado 7 0 5 Utah 11 8 9
data. loc[ : 'Utah' , 'two' ]
Ohio 0
Colorado 5
Utah 9
Name: two, dtype: int64
data. iloc[ : , : 3 ] [ data. three> 5 ]
one two three Colorado 0 5 6 Utah 8 9 10 California 12 13 14
ser= pd. Series( np. arange( 3 . ) )
ser
0 0.0
1 1.0
2 2.0
dtype: float64
ser2= pd. Series( np. arange( 3 . ) , index= [ 'a' , 'b' , 'c' ] )
ser2[ - 1 ]
2.0
ser[ : 1 ]
0 0.0
dtype: float64
ser. loc[ : 1 ]
0 0.0
1 1.0
dtype: float64
ser. iloc[ : 1 ]
0 0.0
dtype: float64
s1= pd. Series( [ 7.3 , - 2.5 , 3.4 , 1.5 ] , index= [ 'a' , 'c' , 'd' , 'e' ] )
s2= pd. Series( [ - 2.1 , 3.6 , - 1.5 , 4 , 3.1 ] , index= [ 'a' , 'c' , 'e' , 'f' , 'g' ] )
s1
a 7.3
c -2.5
d 3.4
e 1.5
dtype: float64
s2
a -2.1
c 3.6
e -1.5
f 4.0
g 3.1
dtype: float64
s1+ s2
a 5.2
c 1.1
d NaN
e 0.0
f NaN
g NaN
dtype: float64
df1= pd. DataFrame( np. arange( 9 . ) . reshape( 3 , 3 ) ,
columns= list ( 'bcd' ) ,
index= [ 'Ohio' , 'Texas' , 'Colorato' ] )
df2= pd. DataFrame( np. arange( 12 . ) . reshape( 4 , 3 ) ,
columns= list ( 'bde' ) ,
index= [ 'Utah' , 'Ohio' , 'Texas' , 'Oregon' ] )
df1
b c d Ohio 0.0 1.0 2.0 Texas 3.0 4.0 5.0 Colorato 6.0 7.0 8.0
df2
b d e Utah 0.0 1.0 2.0 Ohio 3.0 4.0 5.0 Texas 6.0 7.0 8.0 Oregon 9.0 10.0 11.0
df1+ df2
b c d e Colorato NaN NaN NaN NaN Ohio 3.0 NaN 6.0 NaN Oregon NaN NaN NaN NaN Texas 9.0 NaN 12.0 NaN Utah NaN NaN NaN NaN
df1= pd. DataFrame( { 'A' : [ 1 , 2 ] } )
df2= pd. DataFrame( { 'B' : [ 3 , 4 ] } )
df1
df2
df1- df2
df1= pd. DataFrame( np. arange( 12 . ) . reshape( 3 , 4 ) ,
columns= list ( 'abcd' ) )
df2= pd. DataFrame( np. arange( 20 . ) . reshape( 4 , 5 ) ,
columns= list ( 'abcde' ) )
df2. loc[ 3 , 'e' ] = np. nan
df1
a b c d 0 0.0 1.0 2.0 3.0 1 4.0 5.0 6.0 7.0 2 8.0 9.0 10.0 11.0
df2
a b c d e 0 0.0 1.0 2.0 3.0 4.0 1 5.0 6.0 7.0 8.0 9.0 2 10.0 11.0 12.0 13.0 14.0 3 15.0 16.0 17.0 18.0 NaN
df1+ df2
a b c d e 0 0.0 2.0 4.0 6.0 NaN 1 9.0 11.0 13.0 15.0 NaN 2 18.0 20.0 22.0 24.0 NaN 3 NaN NaN NaN NaN NaN
df1. add( df2, fill_value= 0 )
a b c d e 0 0.0 2.0 4.0 6.0 4.0 1 9.0 11.0 13.0 15.0 9.0 2 18.0 20.0 22.0 24.0 14.0 3 15.0 16.0 17.0 18.0 NaN
1 / df1
a b c d 0 inf 1.000000 0.500000 0.333333 1 0.250 0.200000 0.166667 0.142857 2 0.125 0.111111 0.100000 0.090909
df1. rdiv( 1 )
a b c d 0 inf 1.000000 0.500000 0.333333 1 0.250 0.200000 0.166667 0.142857 2 0.125 0.111111 0.100000 0.090909
df1. reindex( columns= df2. columns, fill_value= 0 )
a b c d e 0 0.0 1.0 2.0 3.0 0 1 4.0 5.0 6.0 7.0 0 2 8.0 9.0 10.0 11.0 0
arr= np. arange( 12 . ) . reshape( 3 , 4 )
arr
array([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
arr[ 0 ]
array([0., 1., 2., 3.])
arr- arr[ 0 ]
array([[0., 0., 0., 0.],
[4., 4., 4., 4.],
[8., 8., 8., 8.]])
frame= pd. DataFrame( np. arange( 12 . ) . reshape( 4 , 3 ) ,
columns= list ( 'bde' ) ,
index= [ 'Utah' , 'Ohio' , 'Texas' , 'Oregon' ] )
series= frame. iloc[ 0 ]
frame
b d e Utah 0.0 1.0 2.0 Ohio 3.0 4.0 5.0 Texas 6.0 7.0 8.0 Oregon 9.0 10.0 11.0
series
b 0.0
d 1.0
e 2.0
Name: Utah, dtype: float64
frame- series
b d e Utah 0.0 0.0 0.0 Ohio 3.0 3.0 3.0 Texas 6.0 6.0 6.0 Oregon 9.0 9.0 9.0
series2= pd. Series( range ( 3 ) , index= [ 'b' , 'e' , 'f' ] )
frame+ series2
a b c d e f three NaN 2.0 NaN NaN NaN NaN one NaN 6.0 NaN NaN NaN NaN
series3= frame[ 'd' ]
frame
series3
Utah 1.0
Ohio 4.0
Texas 7.0
Oregon 10.0
Name: d, dtype: float64
frame. sub( series3)
Ohio Oregon Texas Utah a b c d three NaN NaN NaN NaN NaN NaN NaN NaN one NaN NaN NaN NaN NaN NaN NaN NaN
frame= pd. DataFrame( np. random. randn( 4 , 3 ) ,
columns= list ( 'bde' ) ,
index= [ 'Utah' , 'Ohio' , 'Texas' , 'Oregon' ] )
frame
b d e Utah -0.674038 1.405883 1.123189 Ohio -0.901923 1.277413 -0.979557 Texas 0.063148 -2.040145 0.155796 Oregon -1.667468 -0.353109 -0.616387
np. abs ( frame)
b d e Utah 0.674038 1.405883 1.123189 Ohio 0.901923 1.277413 0.979557 Texas 0.063148 2.040145 0.155796 Oregon 1.667468 0.353109 0.616387
f= lambda x: x. max ( ) - x. min ( )
frame. apply ( f)
b 1.730616
d 3.446028
e 2.102747
dtype: float64
frame. apply ( f, axis= 'columns' )
Utah 2.079921
Ohio 2.256970
Texas 2.195941
Oregon 1.314358
dtype: float64
def f ( x) :
return pd. Series( [ x. min ( ) , x. max ( ) ] , index= [ "min" , "max" ] )
frame. apply ( f)
b d e min -1.667468 -2.040145 -0.979557 max 0.063148 1.405883 1.123189
format = lambda x: '%.2f' % x
frame. applymap( format )
b d e Utah -1.75 0.13 0.25 Ohio 1.77 -0.76 0.57 Texas 0.17 -0.84 0.40 Oregon 0.20 0.20 -1.46
frame[ 'e' ] . map ( format )
Utah 0.25
Ohio 0.57
Texas 0.40
Oregon -1.46
Name: e, dtype: object
obj= pd. Series( range ( 4 ) , index= [ 'd' , 'a' , 'b' , 'c' ] )
obj. sort_index( )
a 0.0
b 5.0
c 5.0
d 3.0
dtype: float64
frame= pd. DataFrame( np. arange( 8 ) . reshape( 2 , 4 ) ,
index= [ 'three' , 'one' ] ,
columns= [ 'd' , 'a' , 'b' , 'c' ] )
frame. sort_index( )
frame. sort_index( axis= 1 )
frame. sort_index( axis= 1 , ascending= False )
obj= pd. Series( [ 4 , 7 , - 3 , 2 ] )
obj. sort_values( )
2 -3
3 2
0 4
1 7
dtype: int64
obj= pd. Series( [ 4 , np. nan, 7 , np. nan, - 3 , 2 ] )
obj. sort_values( )
4 -3.0
5 2.0
0 4.0
2 7.0
1 NaN
3 NaN
dtype: float64
frame= pd. DataFrame( { 'a' : [ 4 , 7 , - 3 , 2 ] , 'b' : [ 0 , 1 , 0 , 1 ] } )
frame
frame. sort_values( by= 'b' )
frame. sort_values( by= [ 'a' , 'b' ] )
obj= pd. Series( [ 7 , - 5 , 7 , 4 , 2 , 0 , 4 ] )
obj. rank( )
0 6.5
1 1.0
2 6.5
3 4.5
4 3.0
5 2.0
6 4.5
dtype: float64
obj. rank( method= 'first' )
0 6.0
1 1.0
2 7.0
3 4.0
4 3.0
5 2.0
6 5.0
dtype: float64
obj. rank( ascending= False , method= 'max' )
0 2.0
1 7.0
2 2.0
3 4.0
4 5.0
5 6.0
6 4.0
dtype: float64
frame = pd. DataFrame( { 'b' : [ 4.3 , 7 , - 3 , 2 ] , 'a' : [ 0 , 1 , 0 , 1 ] , 'c' : [ - 2 , 5 , 8 , - 2.5 ] } )
frame
b a c 0 4.3 0 -2.0 1 7.0 1 5.0 2 -3.0 0 8.0 3 2.0 1 -2.5
frame. rank( axis= 'columns' )
b a c 0 3.0 2.0 1.0 1 3.0 1.0 2.0 2 1.0 2.0 3.0 3 3.0 2.0 1.0
obj = pd. Series( range ( 5 ) , index= [ 'a' , 'a' , 'b' , 'b' , 'c' ] )
obj
a 0
a 1
b 2
b 3
c 4
dtype: int64
obj. index. is_unique
False
obj[ 'a' ]
a 0
a 1
dtype: int64
obj[ 'c' ]
4
df= pd. DataFrame( np. random. randn( 4 , 3 ) , index= [ 'a' , 'a' , 'b' , 'b' ] )
df
0 1 2 a 1.095240 0.137070 0.533132 a 0.470992 -0.038642 -0.118522 b 0.509320 -0.095165 1.565080 b 1.551403 -0.028062 0.090268
df. loc[ 'b' ]
0 1 2 b 0.509320 -0.095165 1.565080 b 1.551403 -0.028062 0.090268
df = pd. DataFrame( [ [ 1.4 , np. nan] , [ 7.1 , - 4.5 ] , [ np. nan, np. nan] , [ 0.75 , - 1.3 ] ] ,
index= [ 'a' , 'b' , 'c' , 'd' ] ,
columns= [ 'one' , 'two' ] )
df
one two a 1.40 NaN b 7.10 -4.5 c NaN NaN d 0.75 -1.3
df. sum ( )
one 9.25
two -5.80
dtype: float64
df. sum ( axis= 1 )
a 1.40
b 2.60
c 0.00
d -0.55
dtype: float64
df. mean( axis= 'columns' , skipna= False )
a NaN
b 1.300
c NaN
d -0.275
dtype: float64
df. idxmin( )
one d
two b
dtype: object
df. cumsum( )
one two a 1.40 NaN b 8.50 -4.5 c NaN NaN d 9.25 -5.8
obj= pd. Series( [ 'a' , 'a' , 'b' , 'c' ] * 4 )
obj. describe( )
count 16
unique 3
top a
freq 8
dtype: object
import pandas_datareader. data as web
all_data = { ticker: web. get_data_yahoo( ticker)
for ticker in [ 'AAPL' , 'IBM' , 'MSFT' , 'GOOG' ] }
price = pd. DataFrame( { ticker: data[ 'Adj Close' ]
for ticker, data in all_data. items( ) } )
volume = pd. DataFrame( { ticker: data[ 'Volume' ]
for ticker, data in all_data. items( ) } )
returns = price. pct_change( )
returns. tail( )
AAPL IBM MSFT GOOG Date 2019-08-13 0.042348 0.012452 0.020694 0.019205 2019-08-14 -0.029765 -0.033434 -0.030114 -0.027546 2019-08-15 -0.004981 0.005105 -0.002239 0.002551 2019-08-16 0.023595 0.013948 0.018327 0.008858 2019-08-19 0.026344 0.016709 0.012047 0.022393
returns[ 'MSFT' ] . corr( returns[ 'IBM' ] )
0.4905235623531012
returns[ 'MSFT' ] . cov( returns[ 'IBM' ] )
8.766298066095883e-05
returns. MSFT. corr( returns. IBM)
0.4905235623531012
returns. corr( )
AAPL IBM MSFT GOOG AAPL 1.000000 0.384193 0.455895 0.461466 IBM 0.384193 1.000000 0.490524 0.404765 MSFT 0.455895 0.490524 1.000000 0.537158 GOOG 0.461466 0.404765 0.537158 1.000000
returns. cov( )
AAPL IBM MSFT GOOG AAPL 0.000267 0.000078 0.000108 0.000117 IBM 0.000078 0.000153 0.000088 0.000078 MSFT 0.000108 0.000088 0.000209 0.000121 GOOG 0.000117 0.000078 0.000121 0.000242
returns. corrwith( returns. IBM)
AAPL 0.384193
IBM 1.000000
MSFT 0.490524
GOOG 0.404765
dtype: float64
returns. corrwith( volume)
AAPL -0.062747
IBM -0.152642
MSFT -0.090553
GOOG -0.019246
dtype: float64
obj = pd. Series( [ 'c' , 'a' , 'd' , 'a' , 'a' , 'b' , 'b' , 'c' ,
'c' ] )
uniques= obj. unique( )
uniques
array(['c', 'a', 'd', 'b'], dtype=object)
obj. value_counts( )
a 3
c 3
b 2
d 1
dtype: int64
pd. value_counts( obj. values, sort= False )
c 3
d 1
b 2
a 3
dtype: int64
obj
0 c
1 a
2 d
3 a
4 a
5 b
6 b
7 c
8 c
dtype: object
mask= obj. isin( [ 'b' , 'c' ] )
mask
0 True
1 False
2 False
3 False
4 False
5 True
6 True
7 True
8 True
dtype: bool
obj[ mask]
0 c
5 b
6 b
7 c
8 c
dtype: object
to_match = pd. Series( [ 'c' , 'r' , 'b' , 'b' , 'd' , 'a' ] )
unique_vals = pd. Series( [ 'c' , 'b' , 'a' ] )
pd. Index( unique_vals) . get_indexer( to_match)
array([ 0, -1, 1, 1, -1, 2])
data = pd. DataFrame( { 'Qu1' : [ 1 , 3 , 4 , 3 , 4 ] ,
'Qu2' : [ 2 , 3 , 1 , 2 , 3 ] ,
'Qu3' : [ 1 , 5 , 2 , 4 , 4 ] } )
data
Qu1 Qu2 Qu3 0 1 2 1 1 3 3 5 2 4 1 2 3 3 2 4 4 4 3 4
result= data. apply ( pd. value_counts) . fillna( 0 )
result
Qu1 Qu2 Qu3 1 1.0 1.0 1.0 2 0.0 2.0 1.0 3 2.0 2.0 0.0 4 2.0 0.0 2.0 5 0.0 0.0 1.0