In [3]: import pandas as pd
In [4]: df = pd.read_csv('ex1.csv') read_csv函数可以把源文件里的逗号去掉
In [5]: df
Out[5]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [6]: pd.read_table('ex1.csv', sep=',') read_table需要加上sep指定逗号隔开
Out[6]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [13]: pd.read_csv('ex2.csv',header=None) 列名为空
Out[13]:
0 1 2 3 4
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [14]: pd.read_csv('ex2.csv',names=['a','b','c','d','message']) 添加列名
Out[14]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [15]: pd.read_csv('ex2.csv',names=['a','b','c','d','message'],index_col='message') 将某一列作为索引
Out[15]:
a b c d
message
hello 1 2 3 4
world 5 6 7 8
foo 9 10 11 12
In [16]: pd.read_csv('csv_mindex.csv')
Out[16]:
key1 key2 value1 value2
0 one a 1 2
1 one b 3 4
2 one c 5 6
3 one d 7 8
4 two a 9 10
5 two b 11 12
6 two c 13 14
7 two d 15 16
In [17]: pd.read_csv('csv_mindex.csv',index_col=['key1','key2']) 层次化索引
Out[17]:
value1 value2
key1 key2
one a 1 2
b 3 4
c 5 6
d 7 8
two a 9 10
b 11 12
c 13 14
d 15 16
In [18]: list(open('ex3.txt'))
Out[18]:
[' A B C\n',
'aaa -0.264438 -1.026059 -0.619500\n',
'bbb 0.927272 0.302904 -0.032399\n',
'ccc -0.264273 -0.386314 -0.217601\n',
'ddd -0.871858 -0.348382 1.100491\n']
In [19]: pd.read_table('ex3.txt', sep='\s+') 分隔符不固定时,可以用正则表达式'\s+'
Out[19]:
A B C
aaa -0.264438 -1.026059 -0.619500
bbb 0.927272 0.302904 -0.032399
ccc -0.264273 -0.386314 -0.217601
ddd -0.871858 -0.348382 1.100491
In [26]: pd.read_csv('ex4.csv',skiprows=[0,2,3]) skipros可以跳过指定行
Out[26]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [27]: result = pd.read_csv('ex5.csv') 可以自动检测常见的空,如NULL Nan
In [28]: result
Out[28]:
something a b c d message
0 one 1 2 3.0 4 NaN
1 two 5 6 NaN 8 world
2 three 9 10 11.0 12 foo
In [29]: pd.isnull(result)
Out[29]:
something a b c d message
0 False False False False False True
1 False False False True False False
2 False False False False False False
In [30]: sentinels = {'message': ['foo','NA'], 'something': [ 'two']}
In [31]: pd.read_csv('ex5.csv', na_values = sentinels) na_values可以将指定字符转换为nan
Out[31]:
something a b c d message
0 one 1 2 3.0 4 NaN
1 NaN 5 6 NaN 8 world
2 three 9 10 11.0 12 NaN