import pandas as pd
import numpy as np
print ( "*" * 25 + "dataframe创建" + "*" * 25 )
*************************dataframe创建*************************
pd. DataFrame( np. arange( 12 ) . reshape( 3 , 4 ) )
pd. DataFrame( np. arange( 12 ) . reshape( 3 , 4 ) , index= list ( "abc" ) , columns= list ( "DEFG" ) )
d1= { "name" : [ "n1" , "n2" ] , "age" : [ 20 , 30 ] , "tel" : [ 111 , 222 ] }
t1= pd. DataFrame( d1)
type ( t1)
pandas.core.frame.DataFrame
t1
name age tel 0 n1 20 111 1 n2 30 222
d2= [ { "name" : "n1" , "age" : 20 , "tel" : 111 } , { "name" : "n2" , "tel" : 222 } ]
t2= pd. DataFrame( d2)
type ( t2)
pandas.core.frame.DataFrame
t2
name age tel 0 n1 20.0 111 1 n2 NaN 222
print ( "*" * 25 + "dataframe操作" + "*" * 25 )
*************************dataframe操作*************************
t2. index
RangeIndex(start=0, stop=2, step=1)
t2. columns
Index(['name', 'age', 'tel'], dtype='object')
t2. values
array([['n1', 20.0, 111],
['n2', nan, 222]], dtype=object)
t2. shape
(2, 3)
t2. dtypes
name object
age float64
tel int64
dtype: object
t2. ndim
2
t2. head( 1 )
t2. tail( 1 )
t2. info( )
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 name 2 non-null object
1 age 1 non-null float64
2 tel 2 non-null int64
dtypes: float64(1), int64(1), object(1)
memory usage: 176.0+ bytes
t2. describe( )
age tel count 1.0 2.000000 mean 20.0 166.500000 std NaN 78.488853 min 20.0 111.000000 25% 20.0 138.750000 50% 20.0 166.500000 75% 20.0 194.250000 max 20.0 222.000000
print ( "*" * 25 + "dataframe使用" + "*" * 25 )
*************************dataframe使用*************************
df = pd. read_csv( "./dogNames2.csv" )
df. sort_values( by= "Count_AnimalName" , ascending= False )
Row_Labels Count_AnimalName 1156 BELLA 1195 9140 MAX 1153 2660 CHARLIE 856 3251 COCO 852 12368 ROCKY 823 ... ... ... 6884 J-LO 1 6888 JOANN 1 6890 JOAO 1 6891 JOAQUIN 1 16219 39743 1
16220 rows × 2 columns
print ( "*" * 25 + "dataframe索引" + "*" * 25 )
*************************dataframe索引*************************
df_sorted= df. sort_values( by= "Count_AnimalName" , ascending= False )
df_sorted[ : 5 ]
Row_Labels Count_AnimalName 1156 BELLA 1195 9140 MAX 1153 2660 CHARLIE 856 3251 COCO 852 12368 ROCKY 823
df_sorted[ : 5 ] [ "Row_Labels" ]
1156 BELLA
9140 MAX
2660 CHARLIE
3251 COCO
12368 ROCKY
Name: Row_Labels, dtype: object
df_sorted[ : 1 ]
Row_Labels Count_AnimalName 1156 BELLA 1195
t3= pd. DataFrame( np. arange( 12 ) . reshape( 3 , 4 ) , index= list ( "abc" ) , columns= list ( "WXYZ" ) )
t3
t3. loc[ "a" , "Z" ]
3
type ( t3. loc[ "a" , "Z" ] )
numpy.int64
t3. loc[ "a" ]
W 0
X 1
Y 2
Z 3
Name: a, dtype: int64
t3. loc[ : , "Z" ]
a 3
b 7
c 11
Name: Z, dtype: int64
t3. loc[ [ "a" , "c" ] ]
t3. loc[ : , [ "X" , "Z" ] ]
t3. iloc[ 1 ]
W 4
X 5
Y 6
Z 7
Name: b, dtype: int64
t3. iloc[ : , 2 ]
a 2
b 6
c 10
Name: Y, dtype: int64
t3. iloc[ [ 0 , 2 ] , [ 2 , 1 ] ]
t3. iloc[ 1 : , : 2 ]
t3. iloc[ 1 : , : 2 ] = 11
t3
W X Y Z a 0 1 2 3 b 11 11 6 7 c 11 11 10 11
字符串方法: