Pandas常用操作
import pandas as pd
import numpy as np
1.读取csv文件
csv_file = pd. read_csv( 'test.csv' )
csv_file = pd. read_csv( 'test.csv' , header= None )
csv_file = pd. read_csv( 'test.csv' , names= [ 'a' , 'b' , 'c' , 'd' , 'e' ] )
csv_file = pd. read_csv( 'test.csv' , index_col= 0 )
csv_file = pd. read_csv( 'test.csv' , usecols= [ 0 , 1 , 3 ] )
csv_file = pd. read_csv( 'test.csv' , dtype= { 'col_name1' : object , 'col_name2' : np. float64} )
csv_file = pd. read_csv( 'test.csv' , sep= '\t' )
csv_file = pd. read_csv( 'test.csv' , na_values= 'str' )
2.写csv文件
csv_file. to_csv( 'result.csv' )
csv_file. to_csv( 'result.csv' , index= False )
csv_file. to_csv( 'result.csv' , columns= [ 0 , 1 , 3 ] )
csv_file. to_csv( 'result.csv' , header= None )
csv_file. to_csv( 'result.csv' , sep= '\t' )
csv_file. to_csv( 'result.csv' , na_rep= 'str' )
3.DataFrame与Numpy格式的转换
np_values = df_values. values
df_values = pd. DataFrame( np_values)
df_values = pd. DataFame( np_values, columns= [ 1 , 2 , 3 , 4 , 5 ] )
df_values = pd. DataFame( np_values, index= [ i for i in range ( 100 ) ] )
4.DataFrame数据的创建
df= pd. Dataframe( columns= [ ] , index= [ ] , data= [ ] )
data = { '水果' : [ '苹果' , '梨' , '草莓' ] ,
'数量' : [ 3 , 2 , 5 ] ,
'价格' : [ 10 , 9 , 8 ] }
df = DataFrame( data)
data = { '数量' : { '苹果' : 3 , '梨' : 2 , '草莓' : 5 } ,
'价格' : { '苹果' : 10 , '梨' : 9 , '草莓' : 8 } }
df = DataFrame( data)
data = { '水果' : Series( [ '苹果' , '梨' , '草莓' ] ) ,
'数量' : Series( [ 3 , 2 , 5 ] ) ,
'价格' : Series( [ 10 , 9 , 8 ] ) }
df = DataFrame( data)
5.DataFrame数据的统计性描述
df_values. describe( )
df_values. describe( include= [ 'object' ] )
df. describe( 'all' )
df_values. info( )
df_values. dtypes
6.DataFrame数据的查看
df_values. head( )
df_values. head( n= 10 )
df_values. tail( )
df_values. columns
df_values. index
7.DataFrame的切片操作
df_values[ 'column_name' ]
df_values[ df. columns[ index] ]
df_values. loc[ index]
df_values. loc[ [ index] , [ 'a' , 'b' ] ]
df_values. loc[ [ index] , 'a' : 'b' ]
df_values. iloc[ 0 : 10 , 0 : 10 ]
df_values. iloc[ [ 0 , 5 , 10 ] , [ 1 , 8 , 10 ] ]
df_values[ df_values. A> 0 ]
df_values[ df_values[ 'A' ] . isin( [ 'one' , 'two' ] ) ]
df_values[ 'A' ] = np. array( [ 1 ] * len ( df_values) )
df_values. loc[ : , [ 'a' , 'c' ] ] = [ ]
8.相关的操作(排序、合并)
df_values. sort_index( axis= 1 , ascending= False )
df_values. sort_values( by= 'column_Name' , ascending= True )
pd. concat( [ df1[ : ] , df2[ : ] , . . . ] , axis= 0 )
pd. concat( [ df1, df2, . . . ] , axis= 1 )
df_values. append( df1[ : ] , ignore_index= True )