import pandas as pd
import numpy as np
from pandas import Series, DataFrame
obj = pd. Series( [ 4 , 7 , - 5 , 3 ] )
obj
0 4
1 7
2 -5
3 3
dtype: int64
obj. values
array([ 4, 7, -5, 3], dtype=int64)
obj. index
RangeIndex(start=0, stop=4, step=1)
obj2 = pd. Series( [ 4 , 7 , - 5 , 3 ] , index = [ 'd' , 'b' , 'a' , 'c' ] )
obj2
d 4
b 7
a -5
c 3
dtype: int64
obj2. index
Index(['d', 'b', 'a', 'c'], dtype='object')
obj2[ 'a' ]
-5
obj2[ 'd' ] = 6
obj2[ [ 'c' , 'a' , 'd' ] ]
c 3
a -5
d 6
dtype: int64
obj2[ obj2> 0 ]
d 6
b 7
c 3
dtype: int64
obj2* 2
d 12
b 14
a -10
c 6
dtype: int64
np. exp( obj2)
d 403.428793
b 1096.633158
a 0.006738
c 20.085537
dtype: float64
'b' in obj2
True
'r' in obj2
False
sdata = { 'Ohio' : 35000 , 'Texas' : 71000 , 'Oregon' : 16000 , 'Utah' : 5000 }
obj3 = pd. Series( sdata)
obj3
Ohio 35000
Texas 71000
Oregon 16000
Utah 5000
dtype: int64
states = [ 'California' , 'Ohio' , 'Oregon' , 'Texas' ]
obj4 = pd. Series( sdata, index= states)
obj4
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
dtype: float64
pd. isnull( obj4)
California True
Ohio False
Oregon False
Texas False
dtype: bool
pd. notnull( obj4)
California False
Ohio True
Oregon True
Texas True
dtype: bool
obj4. isnull( )
California True
Ohio False
Oregon False
Texas False
dtype: bool
obj3
Ohio 35000
Texas 71000
Oregon 16000
Utah 5000
dtype: int64
obj4
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
dtype: float64
obj3 + obj4
California NaN
Ohio 70000.0
Oregon 32000.0
Texas 142000.0
Utah NaN
dtype: float64
obj4. name = 'population'
obj4. index. name = 'state'
obj4
state
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
Name: population, dtype: float64
obj
0 4
1 7
2 -5
3 3
dtype: int64
obj. index = [ 'Bob' , 'Steve' , 'Jeff' , 'Ryan' ]
obj
Bob 4
Steve 7
Jeff -5
Ryan 3
dtype: int64
data = data = { 'state' : [ 'Ohio' , 'Ohio' , 'Ohio' , 'Nevada' , 'Nevada' , 'Nevada' ] ,
'year' : [ 2000 , 2001 , 2002 , 2001 , 2002 , 2003 ] ,
'pop' : [ 1.5 , 1.7 , 3.6 , 2.4 , 2.9 , 3.2 ] }
frame = pd. DataFrame( data)
frame
state year pop 0 Ohio 2000 1.5 1 Ohio 2001 1.7 2 Ohio 2002 3.6 3 Nevada 2001 2.4 4 Nevada 2002 2.9 5 Nevada 2003 3.2
frame. head( )
state year pop 0 Ohio 2000 1.5 1 Ohio 2001 1.7 2 Ohio 2002 3.6 3 Nevada 2001 2.4 4 Nevada 2002 2.9
pd. DataFrame( data, columns= [ 'year' , 'state' , 'pop' ] )
year state pop 0 2000 Ohio 1.5 1 2001 Ohio 1.7 2 2002 Ohio 3.6 3 2001 Nevada 2.4 4 2002 Nevada 2.9 5 2003 Nevada 3.2
frame2 = pd. DataFrame( data, columns= [ 'year' , 'state' , 'pop' , 'debt' ] ,
index= [ 'one' , 'two' , 'three' , 'four' , 'five' , 'six' ] )
frame2
year state pop debt one 2000 Ohio 1.5 NaN two 2001 Ohio 1.7 NaN three 2002 Ohio 3.6 NaN four 2001 Nevada 2.4 NaN five 2002 Nevada 2.9 NaN six 2003 Nevada 3.2 NaN
frame2. columns
Index(['year', 'state', 'pop', 'debt'], dtype='object')
frame2[ 'state' ]
one Ohio
two Ohio
three Ohio
four Nevada
five Nevada
six Nevada
Name: state, dtype: object
frame2. year
one 2000
two 2001
three 2002
four 2001
five 2002
six 2003
Name: year, dtype: int64
frame2. loc[ 'three' ]
year 2002
state Ohio
pop 3.6
debt NaN
Name: three, dtype: object
frame2[ 'debt' ] = 16.5
frame2
year state pop debt one 2000 Ohio 1.5 16.5 two 2001 Ohio 1.7 16.5 three 2002 Ohio 3.6 16.5 four 2001 Nevada 2.4 16.5 five 2002 Nevada 2.9 16.5 six 2003 Nevada 3.2 16.5
frame2[ 'debt' ] = np. arange( 6 . )
frame2
year state pop debt one 2000 Ohio 1.5 0.0 two 2001 Ohio 1.7 1.0 three 2002 Ohio 3.6 2.0 four 2001 Nevada 2.4 3.0 five 2002 Nevada 2.9 4.0 six 2003 Nevada 3.2 5.0
val = pd. Series( [ - 1.2 , - 1.5 , - 1.7 ] , index = [ 'two' , 'four' , 'five' ] )
frame2[ 'debt' ] = val
frame2
year state pop debt one 2000 Ohio 1.5 NaN two 2001 Ohio 1.7 -1.2 three 2002 Ohio 3.6 NaN four 2001 Nevada 2.4 -1.5 five 2002 Nevada 2.9 -1.7 six 2003 Nevada 3.2 NaN
frame2[ 'eastern' ] = frame2. state == 'Ohio'
frame2
year state pop debt eastern one 2000 Ohio 1.5 NaN True two 2001 Ohio 1.7 -1.2 True three 2002 Ohio 3.6 NaN True four 2001 Nevada 2.4 -1.5 False five 2002 Nevada 2.9 -1.7 False six 2003 Nevada 3.2 NaN False
del frame2[ 'eastern' ]
frame2
year state pop debt one 2000 Ohio 1.5 NaN two 2001 Ohio 1.7 -1.2 three 2002 Ohio 3.6 NaN four 2001 Nevada 2.4 -1.5 five 2002 Nevada 2.9 -1.7 six 2003 Nevada 3.2 NaN
pop = { 'Nevada' : { 2001 : 2.4 , 2002 : 2.9 } ,
'Ohio' : { 2000 : 1.5 , 2001 : 1.7 , 2002 : 3.6 } }
frame3 = pd. DataFrame( pop)
frame3
Nevada Ohio 2000 NaN 1.5 2001 2.4 1.7 2002 2.9 3.6
frame3. T
2000 2001 2002 Nevada NaN 2.4 2.9 Ohio 1.5 1.7 3.6
frame3. index. name = 'year' ; frame3. columns. name = 'state'
frame3
state Nevada Ohio year 2000 NaN 1.5 2001 2.4 1.7 2002 2.9 3.6
frame3. values
array([[nan, 1.5],
[2.4, 1.7],
[2.9, 3.6]])
frame2. values
array([[2000, 'Ohio', 1.5, nan],
[2001, 'Ohio', 1.7, -1.2],
[2002, 'Ohio', 3.6, nan],
[2001, 'Nevada', 2.4, -1.5],
[2002, 'Nevada', 2.9, -1.7],
[2003, 'Nevada', 3.2, nan]], dtype=object)
obj = pd. Series( range ( 3 ) , index= [ 'a' , 'b' , 'c' ] )
index = obj. index
index
Index(['a', 'b', 'c'], dtype='object')
index[ 1 : ]
Index(['b', 'c'], dtype='object')
labels = pd. Index( np. arange( 3 ) )
labels
Int64Index([0, 1, 2], dtype='int64')
obj2 = pd. Series( [ 1.5 , - 2.5 , 0 ] , index= labels)
obj2
0 1.5
1 -2.5
2 0.0
dtype: float64
obj2. index is labels
True
frame3
Nevada Ohio 2000 NaN 1.5 2001 2.4 1.7 2002 2.9 3.6
frame3. columns
Index(['Nevada', 'Ohio'], dtype='object')
'Ohio' in frame3. columns
True
dup_labels = pd. Index( [ 'foo' , 'foo' , 'bar' , 'bar' ] )
dup_labels
Index(['foo', 'foo', 'bar', 'bar'], dtype='object')
obj = pd. Series( [ 4.5 , 7.2 , - 5.3 , 3.6 ] , index= [ 'd' , 'b' , 'a' , 'c' ] )
obj
d 4.5
b 7.2
a -5.3
c 3.6
dtype: float64
obj2 = obj. reindex( [ 'a' , 'b' , 'c' , 'd' , 'e' ] )
obj2
a -5.3
b 7.2
c 3.6
d 4.5
e NaN
dtype: float64
obj3 = pd. Series( [ 'blue' , 'purple' , 'yellow' ] , index= [ 0 , 2 , 4 ] )
obj3
0 blue
2 purple
4 yellow
dtype: object
obj3. reindex( range ( 6 ) , method= 'ffill' )
0 blue
1 blue
2 purple
3 purple
4 yellow
5 yellow
dtype: object
frame = pd. DataFrame( np. arange( 9 ) . reshape( ( 3 , 3 ) ) ,
index= [ 'a' , 'c' , 'd' ] ,
columns= [ 'Ohio' , 'Texas' , 'California' ] )
frame
Ohio Texas California a 0 1 2 c 3 4 5 d 6 7 8
frame2 = frame. reindex( [ 'a' , 'b' , 'c' , 'd' ] )
frame2
Ohio Texas California a 0.0 1.0 2.0 b NaN NaN NaN c 3.0 4.0 5.0 d 6.0 7.0 8.0
states = [ 'Texas' , 'Utah' , 'California' ]
frame. reindex( columns= states)
Texas Utah California a 1 NaN 2 c 4 NaN 5 d 7 NaN 8
obj = pd. Series( np. arange( 5 . ) , index= [ 'a' , 'b' , 'c' , 'd' , 'e' ] )
obj
a 0.0
b 1.0
c 2.0
d 3.0
e 4.0
dtype: float64
new_obj = obj. drop( 'c' )
new_obj
a 0.0
b 1.0
d 3.0
e 4.0
dtype: float64
obj. drop( [ 'd' , 'c' ] )
a 0.0
b 1.0
e 4.0
dtype: float64
data = pd. DataFrame( np. arange( 16 ) . reshape( ( 4 , 4 ) ) ,
index= [ 'Ohio' , 'Colorado' , 'Utah' , 'New York' ] ,
columns= [ 'one' , 'two' , 'three' , 'four' ] )
data
one two three four Ohio 0 1 2 3 Colorado 4 5 6 7 Utah 8 9 10 11 New York 12 13 14 15
data. drop( [ 'Colorado' , 'Ohio' ] )
one two three four Utah 8 9 10 11 New York 12 13 14 15
data. drop( 'two' , axis= 1 )
one three four Ohio 0 2 3 Colorado 4 6 7 Utah 8 10 11 New York 12 14 15
data. drop( [ 'two' , 'four' ] , axis = 'columns' )
one three Ohio 0 2 Colorado 4 6 Utah 8 10 New York 12 14
obj
a 0.0
b 1.0
c 2.0
d 3.0
e 4.0
dtype: float64
obj. drop( 'c' , inplace= True )
obj
a 0.0
b 1.0
d 3.0
e 4.0
dtype: float64
obj
a 0.0
b 1.0
d 3.0
e 4.0
dtype: float64
obj = pd. Series( np. arange( 4 ) , index= [ 'a' , 'b' , 'c' , 'd' ] )
obj
a 0
b 1
c 2
d 3
dtype: int32
obj[ 'b' ]
1
obj[ 1 ]
1
obj[ 2 : 4 ]
c 2
d 3
dtype: int32
obj[ [ 'b' , 'a' , 'd' ] ]
b 1
a 0
d 3
dtype: int32
obj[ [ 1 , 3 ] ]
b 1
d 3
dtype: int32
obj[ obj< 2 ]
a 0
b 1
dtype: int32
obj[ 'b' : 'c' ]
b 1
c 2
dtype: int32
obj[ 'b' : 'c' ] = 5
obj
a 0
b 5
c 5
d 3
dtype: int32
data = pd. DataFrame( np. arange( 16 ) . reshape( ( 4 , 4 ) ) ,
index= [ 'Ohio' , 'Colorado' , 'Utah' , 'New York' ] ,
columns= [ 'one' , 'two' , 'three' , 'four' ] )
data
one two three four Ohio 0 1 2 3 Colorado 4 5 6 7 Utah 8 9 10 11 New York 12 13 14 15
data[ 'two' ]
Ohio 1
Colorado 5
Utah 9
New York 13
Name: two, dtype: int32
data[ [ 'three' , 'one' ] ]
three one Ohio 2 0 Colorado 6 4 Utah 10 8 New York 14 12
data[ : 2 ]
one two three four Ohio 0 1 2 3 Colorado 4 5 6 7
data[ data[ 'three' ] > 5 ]
one two three four Colorado 4 5 6 7 Utah 8 9 10 11 New York 12 13 14 15
data< 5
one two three four Ohio True True True True Colorado True False False False Utah False False False False New York False False False False
data[ data< 5 ] = 0
data
one two three four Ohio 0 0 0 0 Colorado 0 5 6 7 Utah 8 9 10 11 New York 12 13 14 15
data. loc[ 'Colorado' , [ 'two' , 'three' ] ]
two 5
three 6
Name: Colorado, dtype: int32
data. iloc[ 2 , [ 3 , 0 , 1 ] ]
four 11
one 8
two 9
Name: Utah, dtype: int32
data. iloc[ 2 ]
one 8
two 9
three 10
four 11
Name: Utah, dtype: int32
data. iloc[ [ 1 , 2 ] , [ 3 , 0 , 1 ] ]
four one two Colorado 7 0 5 Utah 11 8 9
data. loc[ : 'Utah' , 'two' ]
Ohio 0
Colorado 5
Utah 9
Name: two, dtype: int32
data. iloc[ : , : 3 ] [ data. three> 5 ]
one two three Colorado 0 5 6 Utah 8 9 10 New York 12 13 14
ser = pd. Series( np. arange( 3 ) )
ser
0 0
1 1
2 2
dtype: int32
ser[ - 1 ]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-73-44969a759c20> in <module>()
----> 1 ser[-1]
C:\Anaconda\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
765 key = com._apply_if_callable(key, self)
766 try:
--> 767 result = self.index.get_value(self, key)
768
769 if not is_scalar(result):
C:\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)
3116 try:
3117 return self._engine.get_value(s, k,
-> 3118 tz=getattr(series.dtype, 'tz', None))
3119 except KeyError as e1:
3120 if len(self) > 0 and self.inferred_type in ['integer', 'boolean']:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: -1
ser2 = pd. Series( np. arange( 3 . ) , index= [ 'a' , 'b' , 'c' ] )
ser2[ - 1 ]
2.0
ser[ : 1 ]
0 0
dtype: int32
ser. loc[ : 1 ]
0 0
1 1
dtype: int32
s1 = pd. Series( [ 7.3 , - 2.5 , 3.4 , 1.5 ] , index= [ 'a' , 'c' , 'd' , 'e' ] )
s2 = pd. Series( [ - 2.1 , 3.6 , - 1.5 , 4 , 3.1 ] ,
index= [ 'a' , 'c' , 'e' , 'f' , 'g' ] )
s1
a 7.3
c -2.5
d 3.4
e 1.5
dtype: float64
s2
a -2.1
c 3.6
e -1.5
f 4.0
g 3.1
dtype: float64
s1 + s2
a 5.2
c 1.1
d NaN
e 0.0
f NaN
g NaN
dtype: float64
df1 = pd. DataFrame( np. arange( 9 . ) . reshape( ( 3 , 3 ) ) , columns= list ( 'bcd' ) ,
index= [ 'Ohio' , 'Texas' , 'Colorado' ] )
df2 = pd. DataFrame( np. arange( 12 . ) . reshape( ( 4 , 3 ) ) , columns= list ( 'bde' ) ,
index= [ 'Utah' , 'Ohio' , 'Texas' , 'Oregon' ] )
df1
b c d Ohio 0.0 1.0 2.0 Texas 3.0 4.0 5.0 Colorado 6.0 7.0 8.0
df2
b d e Utah 0.0 1.0 2.0 Ohio 3.0 4.0 5.0 Texas 6.0 7.0 8.0 Oregon 9.0 10.0 11.0
df1 + df2
b c d e Colorado NaN NaN NaN NaN Ohio 3.0 NaN 6.0 NaN Oregon NaN NaN NaN NaN Texas 9.0 NaN 12.0 NaN Utah NaN NaN NaN NaN
df1 = pd. DataFrame( np. arange( 12 . ) . reshape( ( 3 , 4 ) ) ,
columns= list ( 'abcd' ) )
df2 = pd. DataFrame( np. arange( 20 . ) . reshape( ( 4 , 5 ) ) ,
columns= list ( 'abcde' ) )
df2. loc[ 1 , 'b' ] = np. nan
df1
a b c d 0 0.0 1.0 2.0 3.0 1 4.0 5.0 6.0 7.0 2 8.0 9.0 10.0 11.0
df2
a b c d e 0 0.0 1.0 2.0 3.0 4.0 1 5.0 NaN 7.0 8.0 9.0 2 10.0 11.0 12.0 13.0 14.0 3 15.0 16.0 17.0 18.0 19.0
df1 + df2
a b c d e 0 0.0 2.0 4.0 6.0 NaN 1 9.0 NaN 13.0 15.0 NaN 2 18.0 20.0 22.0 24.0 NaN 3 NaN NaN NaN NaN NaN
df1. add( df2, fill_value= 0 )
a b c d e 0 0.0 2.0 4.0 6.0 4.0 1 9.0 5.0 13.0 15.0 9.0 2 18.0 20.0 22.0 24.0 14.0 3 15.0 16.0 17.0 18.0 19.0
arr = np. arange( 12 ) . reshape( 3 , 4 )
arr
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
arr[ 0 ]
array([0, 1, 2, 3])
arr - arr[ 0 ]
array([[0, 0, 0, 0],
[4, 4, 4, 4],
[8, 8, 8, 8]])
frame = pd. DataFrame( np. arange( 12 . ) . reshape( ( 4 , 3 ) ) ,
columns= list ( 'bde' ) ,
index= [ 'Utah' , 'Ohio' , 'Texas' , 'Oregon' ] )
series = frame. iloc[ 0 ]
frame
b d e Utah 0.0 1.0 2.0 Ohio 3.0 4.0 5.0 Texas 6.0 7.0 8.0 Oregon 9.0 10.0 11.0
series
b 0.0
d 1.0
e 2.0
Name: Utah, dtype: float64
frame - series
b d e Utah 0.0 0.0 0.0 Ohio 3.0 3.0 3.0 Texas 6.0 6.0 6.0 Oregon 9.0 9.0 9.0
series2 = pd. Series( range ( 3 ) , index= [ 'b' , 'e' , 'f' ] )
frame + series2
b d e f Utah 0.0 NaN 3.0 NaN Ohio 3.0 NaN 6.0 NaN Texas 6.0 NaN 9.0 NaN Oregon 9.0 NaN 12.0 NaN
series3 = frame[ 'd' ]
frame
b d e Utah 0.0 1.0 2.0 Ohio 3.0 4.0 5.0 Texas 6.0 7.0 8.0 Oregon 9.0 10.0 11.0
series3
Utah 1.0
Ohio 4.0
Texas 7.0
Oregon 10.0
Name: d, dtype: float64
frame. sub( series3, axis= 'index' )
b d e Utah -1.0 0.0 1.0 Ohio -1.0 0.0 1.0 Texas -1.0 0.0 1.0 Oregon -1.0 0.0 1.0
frame = pd. DataFrame( np. random. randn( 4 , 3 ) , columns= list ( 'bde' ) ,
index= [ 'Utah' , 'Ohio' , 'Texas' , 'Oregon' ] )
frame
b d e Utah -0.951265 -0.498273 -0.388690 Ohio 1.988546 0.370789 -0.488038 Texas 0.692938 -0.160944 0.654771 Oregon -1.314237 1.163286 -1.687210
np. abs ( frame)
b d e Utah 0.951265 0.498273 0.388690 Ohio 1.988546 0.370789 0.488038 Texas 0.692938 0.160944 0.654771 Oregon 1.314237 1.163286 1.687210
f = lambda x: x. max ( ) - x. min ( )
frame. apply ( f)
b 3.302783
d 1.661559
e 2.341980
dtype: float64
frame. apply ( f, axis= 'columns' )
Utah 0.562574
Ohio 2.476585
Texas 0.853882
Oregon 2.850495
dtype: float64
def f ( x) :
return pd. Series( [ x. min ( ) , x. max ( ) ] , index= [ 'min' , 'max' ] )
frame. apply ( f)
b d e min -1.314237 -0.498273 -1.687210 max 1.988546 1.163286 0.654771
format = lambda x: '%.2f' % x
frame. applymap( format )
b d e Utah -0.95 -0.50 -0.39 Ohio 1.99 0.37 -0.49 Texas 0.69 -0.16 0.65 Oregon -1.31 1.16 -1.69
frame[ 'e' ] . map ( format )
Utah -0.39
Ohio -0.49
Texas 0.65
Oregon -1.69
Name: e, dtype: object
obj = pd. Series( range ( 4 ) , index= [ 'd' , 'a' , 'b' , 'c' ] )
obj. sort_index( )
a 1
b 2
c 3
d 0
dtype: int64
frame = pd. DataFrame( np. arange( 8 ) . reshape( ( 2 , 4 ) ) ,
index= [ 'three' , 'one' ] ,
columns= [ 'd' , 'a' , 'b' , 'c' ] )
frame. sort_index( )
frame. sort_index( axis= 1 , ascending= False )
obj = pd. Series( [ 4 , 7 , - 3 , 2 ] )
obj. sort_values( )
2 -3
3 2
0 4
1 7
dtype: int64
obj = pd. Series( [ 4 , np. nan, 7 , np. nan, - 3 , 2 ] )
obj. sort_values( )
4 -3.0
5 2.0
0 4.0
2 7.0
1 NaN
3 NaN
dtype: float64
frame = pd. DataFrame( { 'b' : [ 4 , 7 , - 3 , 2 ] , 'a' : [ 0 , 1 , 0 , 1 ] } )
frame
frame. sort_values( by= 'b' )
frame. sort_values( by= [ 'a' , 'b' ] )
obj= pd. Series( [ 7 , - 5 , 7 , 4 , 2 , 0 , 4 ] )
obj. rank( )
0 6.5
1 1.0
2 6.5
3 4.5
4 3.0
5 2.0
6 4.5
dtype: float64
obj. rank( method= 'first' )
0 6.0
1 1.0
2 7.0
3 4.0
4 3.0
5 2.0
6 5.0
dtype: float64
obj. rank( ascending= False , method= 'max' )
0 2.0
1 7.0
2 2.0
3 4.0
4 5.0
5 6.0
6 4.0
dtype: float64
frame = pd. DataFrame( { 'b' : [ 4.3 , 7 , - 3 , 2 ] , 'a' : [ 0 , 1 , 0 , 1 ] ,
'c' : [ - 2 , 5 , 8 , - 2.5 ] } )
frame
b a c 0 4.3 0 -2.0 1 7.0 1 5.0 2 -3.0 0 8.0 3 2.0 1 -2.5
frame. rank( axis= 'columns' )
b a c 0 3.0 2.0 1.0 1 3.0 1.0 2.0 2 1.0 2.0 3.0 3 3.0 2.0 1.0
obj = pd. Series( range ( 5 ) , index= [ 'a' , 'a' , 'b' , 'b' , 'c' ] )
obj
a 0
a 1
b 2
b 3
c 4
dtype: int64
obj. index. is_unique
False
obj[ 'a' ]
a 0
a 1
dtype: int64
obj[ 'b' ]
b 2
b 3
dtype: int64
df = pd. DataFrame( np. random. randn( 4 , 3 ) , index= [ 'a' , 'a' , 'b' , 'b' ] )
df
0 1 2 a 1.265240 0.407293 -0.652129 a 0.268019 -1.423912 1.297783 b 0.797760 -0.353663 1.323543 b 0.961888 0.227132 1.843558
df. loc[ 'b' ]
0 1 2 b 0.797760 -0.353663 1.323543 b 0.961888 0.227132 1.843558
df = pd. DataFrame( [ [ 1.4 , np. nan] , [ 7.1 , - 4.5 ] ,
[ np. nan, np. nan] , [ 0.75 , - 1.3 ] ] ,
index= [ 'a' , 'b' , 'c' , 'd' ] ,
columns= [ 'one' , 'two' ] )
df
one two a 1.40 NaN b 7.10 -4.5 c NaN NaN d 0.75 -1.3
df. sum ( )
one 9.25
two -5.80
dtype: float64
df. sum ( axis= 1 )
a 1.40
b 2.60
c 0.00
d -0.55
dtype: float64
df. mean( axis= 'columns' , skipna= False )
a NaN
b 1.300
c NaN
d -0.275
dtype: float64
df. idxmax( )
one b
two d
dtype: object
df. cumsum( )
one two a 1.40 NaN b 8.50 -4.5 c NaN NaN d 9.25 -5.8
df. describe( )
one two count 3.000000 2.000000 mean 3.083333 -2.900000 std 3.493685 2.262742 min 0.750000 -4.500000 25% 1.075000 -3.700000 50% 1.400000 -2.900000 75% 4.250000 -2.100000 max 7.100000 -1.300000
obj = pd. Series( [ 'a' , 'a' , 'b' , 'c' ] * 4 )
obj. describe( )
count 16
unique 3
top a
freq 8
dtype: object
import pandas_datareader. data as web
all_data = { ticker: web. get_data_yahoo( ticker)
for ticker in [ 'AAPL' , 'IBM' , 'MSFT' , 'GOOG' ] }
price = pd. DataFrame( { ticker: data[ 'Adj Close' ]
for ticker, data in all_data. items( ) } )
volume = pd. DataFrame( { ticker: data[ 'Volume' ]
for ticker, data in all_data. items( ) } )
returns = price. pct_change( )
returns. tail( )
AAPL IBM MSFT GOOG Date 2018-10-18 -0.023374 -0.026110 -0.019962 -0.024846 2018-10-19 0.015230 -0.011107 0.001475 0.007804 2018-10-22 0.006110 0.007126 0.008927 0.004287 2018-10-23 0.009427 0.009152 -0.013956 0.002297 2018-10-24 -0.034302 -0.030486 -0.053469 -0.048003
returns[ 'MSFT' ] . corr( returns[ 'IBM' ] )
0.4746674318628231
returns[ "MSFT" ] . cov( returns[ "IBM" ] )
8.150193655338736e-05
returns. MSFT. corr( returns. IBM)
0.4746674318628231
returns. corr( )
AAPL IBM MSFT GOOG AAPL 1.000000 0.364434 0.421984 0.438015 IBM 0.364434 1.000000 0.474667 0.398449 MSFT 0.421984 0.474667 1.000000 0.516364 GOOG 0.438015 0.398449 0.516364 1.000000
returns. cov( )
AAPL IBM MSFT GOOG AAPL 0.000252 0.000070 0.000095 0.000106 IBM 0.000070 0.000146 0.000082 0.000073 MSFT 0.000095 0.000082 0.000202 0.000112 GOOG 0.000106 0.000073 0.000112 0.000232
returns. corrwith( returns. IBM)
AAPL 0.364434
IBM 1.000000
MSFT 0.474667
GOOG 0.398449
dtype: float64
returns. corrwith( volume)
AAPL -0.065065
IBM -0.173822
MSFT -0.088563
GOOG -0.016396
dtype: float64
obj = pd. Series( [ 'c' , 'a' , 'd' , 'a' , 'a' , 'b' , 'b' , 'c' , 'c' ] )
uniques = obj. unique( )
uniques
array(['c', 'a', 'd', 'b'], dtype=object)
obj. value_counts( )
c 3
a 3
b 2
d 1
dtype: int64
obj
0 c
1 a
2 d
3 a
4 a
5 b
6 b
7 c
8 c
dtype: object
mask = obj. isin( [ 'b' , 'c' ] )
mask
0 True
1 False
2 False
3 False
4 False
5 True
6 True
7 True
8 True
dtype: bool
obj[ mask]
0 c
5 b
6 b
7 c
8 c
dtype: object
to_match = pd. Series( [ 'c' , 'a' , 'b' , 'b' , 'c' , 'a' ] )
unique_values = pd. Series( [ 'c' , 'b' , 'a' ] )
pd. Index( unique_values) . get_indexer( to_match)
array([0, 2, 1, 1, 0, 2], dtype=int64)