Pandas
Series
import pandas as pd
import numpy as np
s = pd. Series( [ 1 , 3 , 5 , np. nan, 6 , 8 ] )
data = np. array( [ 'a' , 'b' , 'c' , 'd' ] )
s = pd. Series( data)
print ( s)
data1 = { 'a' : 0. , 'b' : 1. , 'c' : 2. }
s = pd. Series( data1)
print ( s)
DataFrame
import pandas as pd
import numpy as np
print ( "*********步骤1*********\n" )
dates = pd. date_range( '20200101' , periods= 7 )
print ( dates)
print ( "--" * 16 )
df = pd. DataFrame( np. random. randn( 7 , 4 ) , index= dates, columns= list ( 'ABCD' ) )
print ( df)
df1 = pd. DataFrame( { 'A' : 1. ,
'B' : pd. Timestamp( '20200102' ) ,
'C' : pd. Series( 1 , index= list ( range ( 4 ) ) , dtype= 'float32' ) ,
'D' : np. array( [ 3 ] * 4 , dtype= 'int32' ) ,
'E' : pd. Categorical( [ "test" , "train" , "test" , "train" ] ) ,
'F' : 'foo' } )
print ( df1)
print ( "*********步骤2*********\n" )
data2 = np. arange( 30 ) . reshape( 6 , 5 )
df2 = pd. DataFrame( data2, index= [ 'a' , 'b ' , 'c' , 'd' , 'e' , 'f' ] , columns= [
'A' , 'B' , 'C' , 'D' , 'E' ] )
print ( df2)
print ( "--" * 10 )
print ( df2. head( ) )
print ( "--" * 10 )
print ( df2. tail( 3 ) )
print ( "index is :" )
print ( df2. index)
print ( "columns is :" )
print ( df2. columns)
print ( "values is :" )
print ( df2. values)
print ( df2. loc[ 'a' : 'f' : 2 , 'A' ] )
print ( df2. describe( ) )
print ( "*********步骤3*********\n" )
data3 = np. arange( 30 ) . reshape( 6 , 5 )
df3 = pd. DataFrame( data3, index= [ 'a' , 'b' , ' c' , 'd' , 'e' , 'f' ] , columns= [
'A' , 'B' , 'C' , 'D' , 'E' ] )
a = df3. drop( [ 'a' ] , axis= 0 )
b = df3. drop( [ 'A' ] , axis= 1 )
print ( '-------原始数据df-----' )
print ( df3)
print ( '-------删除行---------' )
print ( a)
print ( ' -------删除列---------' )
print ( b)
c = b. append( a)
print ( b)
print ( '------合并后产生的新数据------' )
print ( c)
b. reset_index( inplace= True )
print ( b)
print ( "*********步骤4*********\n" )
df4 = pd. DataFrame( np. random. randn( 4 , 3 ) , columns= [ 'col1 ' , 'col2' , 'col3' ] )
print ( "df4:" , df4)
i = 1
for s in df4. iteritems( ) :
print ( "第%d列数据%s" % ( i, s) )
i += 1
print ( "*********步骤5*********\n" )
s = pd. Series( [ 1 , 2 , 3 , 4 , 5 , 4 ] )
print ( s. pct_change( ) )
df5 = pd. DataFrame( np. random. randn( 5 , 2 ) )
print ( df5. pct_change( ) )
s1 = pd. Series( np. random. randn( 10 ) )
s2 = pd. Series( np. random. randn( 10 ) )
print ( "s1:" , s1)
print ( "s2:" , s2)
print ( "协方差:" , s1. cov( s2) )
frame = pd. DataFrame( np. random. randn( 10 , 5 ) , columns= [ 'a' , 'b' , 'c' , 'd' , 'e' ] )
print ( frame[ 'a' ] . cov( frame[ 'b' ] ) )
print ( frame. cov( ) )
s3 = pd. Series( np. random. randn( 5 ) , index= list ( 'abcde' ) )
s3[ 'd' ] = s3[ 'b' ]
print ( s3. rank( ) )
print ( "*********步骤6*********\n" )
df6 = pd. DataFrame( np. random. randn( 5 , 3 ) , index= [ 'a' , 'c' , 'e' , 'f' ,
'h' ] , columns= [ 'one' , 'two' , 'three' ] )
df6 = df6. reindex( [ 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' ] )
print ( df6)
df7 = pd. DataFrame( np. random. randn( 5 , 3 ) , index= [ 'a' , 'c' , 'e' , 'f' ,
'h' ] , columns= [ 'one' , 'two' , 'three' ] )
df7 = df7. reindex( [ 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' ] )
print ( df7[ 'one' ] . isnull( ) )
df8 = pd. DataFrame( np. random. randn( 5 , 3 ) , index= [ 'a' , 'c' , 'e' , 'f' ,
'h' ] , columns= [ 'one' , 'two' , ' three' ] )
df8 = df8. reindex( [ 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' ] )
print ( df8)
print ( df8[ 'one' ] . sum ( ) )
df9 = pd. DataFrame( np. random. randn( 3 , 3 ) , index= [ 'a' , 'c' , 'e' ] , columns= [ 'one' ,
'two' , 'three' ] )
df9 = df9. reindex( [ 'a' , 'b' , 'c' ] )
print ( df9)
print ( "NaN replaced with '0':" )
print ( df9. fillna( 0 ) )
df10 = pd. DataFrame( np. random. randn( 5 , 3 ) , index= [
'a' , 'c ' , 'e' , 'f' , 'h' ] , columns= [ 'one' , 'two' , 'three' ] )
df10 = df10. reindex( [ 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' ] )
print ( df10)
print ( "---" * 10 )
print ( df10. dropna( ) )