第1章 pandas基础
import pandas as pd; import numpy as np; from pandas import Series, DataFrame
1、读取写入文件
data = pd. read_csv( "table.csv" )
data1 = pd. read_table( "table.txt" )
data2 = pd. read_excel( "table.xlsx" )
data. head( )
School
Class
ID
Gender
Address
Height
Weight
Math
Physics
0
S_1
C_1
1101
M
street_1
173
63
34.0
A+
1
S_1
C_1
1102
F
street_2
192
73
32.5
B+
2
S_1
C_1
1103
M
street_2
186
82
87.2
B+
3
S_1
C_1
1104
F
street_2
167
81
80.4
B-
4
S_1
C_1
1105
F
street_4
159
64
84.8
B+
data1. head( )
col1
col2
col3
col4
0
2
a
1.4
apple
1
3
b
3.4
banana
2
6
c
2.5
orange
3
5
d
3.2
lemon
data2. head( )
School
Class
ID
Gender
Address
Height
Weight
Math
Physics
0
S_1
C_1
1101
M
street_1
173
63
34.0
A+
1
S_1
C_1
1102
F
street_2
192
73
32.5
B+
2
S_1
C_1
1103
M
street_2
186
82
87.2
B+
3
S_1
C_1
1104
F
street_2
167
81
80.4
B-
4
S_1
C_1
1105
F
street_4
159
64
84.8
B+
data. to_csv( "new_data.csv" )
data2. to_excel( "new_data2.xlsx" )
2、Series
s = pd. Series( np. random. randn( 5 ) , index= [ "a" , "b" , "c" , "d" , "e" ] , name= "Series" , dtype= "float" )
s
a -0.104532
b -1.099814
c -0.143590
d -1.507353
e -0.400417
Name: Series, dtype: float64
s. values
array([-0.10453234, -1.09981446, -0.14358968, -1.50735287, -0.40041748])
s. name
'Series'
s. index
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
s. dtype
dtype('float64')
s[ "b" ]
-1.0998144639653273
s. mean( )
-0.6511413659907033
[ i for i in s]
[-0.1045323399024992,
-1.0998144639653273,
-0.1435896809218303,
-1.507352865614497,
-0.4004174795493625]
3、DataFrame
df = pd. DataFrame( {
"col1" : list ( "abcde" ) , "col2" : range ( 5 , 10 ) , "col3" : [ 1.3 , 2.5 , 3.6 , 4.4 , 5.8 ] } , index= list ( "一二三四五" ) )
df
col1
col2
col3
一
a
5
1.3
二
b
6
2.5
三
c
7
3.6
四
d
8
4.4
五
e
9
5.8
df[ "col1" ]
一 a
二 b
三 c
四 d
五 e
Name: col1, dtype: object
type ( df)
pandas.core.frame.DataFrame
type ( df[ "col1" ] )
pandas.core.series.Series
df. rename( index= {
"一" : "one" } , columns= {
"col1" : "New_col1" } )
New_col1
col2
col3
one
a
5
1.3
二
b
6
2.5
三
c
7
3.6
四
d
8
4.4
五
e
9
5.8
df. index, df. columns, df. values
(Index(['一', '二', '三', '四', '五'], dtype='object'),
Index(['col1', 'col2', 'col3'], dtype='object'),
array([['a', 5, 1.3],
['b', 6, 2.5],
['c', 7, 3.6],
['d', 8, 4.4],
['e', 9, 5.8]], dtype=object))
df. shape
(5, 3)
df. info( )
<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 一 to 五
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 col1 5 non-null object
1 col2 5 non-null int64
2 col3 5 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 320.0+ bytes
df. describe( ) . T
count
mean
std
min
25%
50%
75%
max
col2
5.0
7.00
1.581139
5.0
6.0
7.0
8.0
9.0
col3
5.0
3.52
1.728294
1.3
2.5
3.6
4.4
5.8
df. mean( )
col2 7.00
col3 3.52
dtype: float64
df1 = pd. DataFrame( {
"A" : [ 1 , 2 , 3 ] } , index= [ 1 , 2 , 3 ] )
df2 = pd. DataFrame( {
"A" : [ 1 , 2 , 3 ] } , index= [ 3 , 1 , 2 ] )
df1- df2
df. drop( index= "五" , columns= "col1" )
col2
col3
一
5
1.3
二
6
2.5
三
7
3.6
四
8
4.4
df[ "col1" ] = [ 1 , 2 , 3 , 4 , 5 ]
df
col1
col2
col3
一
1
5
1.3
二
2
6
2.5
三
3
7
3.6
四
4
8
4.4
五
5
9
5.8
del df[ "col1" ]
df
col2
col3
一
5
1.3
二
6
2.5
三
7
3.6
四
8
4.4
五
9
5.8
df[ "col1" ] = [ 1 , 2 ,