In [92]: dt = pd.read_table('aaa.txt',sep='|',header=None)
In [93]: dt
Out[93]:
0 1 2 3 4
0 2016-06-28 13:15:20 aaa bbb 111 lsq
1 2016-06-28 13:15:41 aaa bbb 222 ljk
2 2016-06-28 13:15:45 aaa bbb 333 llj
3 2016-06-28 13:15:20 aaa bbb 111 lsq
4 2016-06-28 13:15:41 aaa bbb 222 ljk
5 2016-06-28 13:15:45 aaa bbb 333 llj
6 2016-06-28 13:15:20 aaa bbb 111 lsq
7 2016-06-28 13:15:41 aaa bbb 222 ljk
8 2016-06-28 13:15:45 aaa bbb 333 llj
9 2016-06-28 13:15:20 aaa bbb 111 lsq
10 2016-06-28 13:15:41 aaa bbb 222 ljk
11 2016-06-28 13:15:45 aaa bbb 333 llj
In [94]: dt.columns
Out[94]: Int64Index([0, 1, 2, 3, 4], dtype='int64')
#去其中的几列,生成新的dataframe
In [95]: dt1 = dt[[0,2]]
In [96]: dt1
Out[96]:
0 2
0 2016-06-28 13:15:20 bbb
1 2016-06-28 13:15:41 bbb
2 2016-06-28 13:15:45 bbb
3 2016-06-28 13:15:20 bbb
4 2016-06-28 13:15:41 bbb
5 2016-06-28 13:15:45 bbb
6 2016-06-28 13:15:20 bbb
7 2016-06-28 13:15:41 bbb
8 2016-06-28 13:15:45 bbb
9 2016-06-28 13:15:20 bbb
10 2016-06-28 13:15:41 bbb
11 2016-06-28 13:15:45 bbb
#重命名列
In [97]: names = ['create_time','url']
In [98]: dt1.columns = names
In [99]: dt1
Out[99]:
create_time url
0 2016-06-28 13:15:20 bbb
1 2016-06-28 13:15:41 bbb
2 2016-06-28 13:15:45 bbb
3 2016-06-28 13:15:20 bbb
4 2016-06-28 13:15:41 bbb
5 2016-06-28 13:15:45 bbb
6 2016-06-28 13:15:20 bbb
7 2016-06-28 13:15:41 bbb
8 2016-06-28 13:15:45 bbb
9 2016-06-28 13:15:20 bbb
10 2016-06-28 13:15:41 bbb
11 2016-06-28 13:15:45 bbb
In [93]: dt
Out[93]:
0 1 2 3 4
0 2016-06-28 13:15:20 aaa bbb 111 lsq
1 2016-06-28 13:15:41 aaa bbb 222 ljk
2 2016-06-28 13:15:45 aaa bbb 333 llj
3 2016-06-28 13:15:20 aaa bbb 111 lsq
4 2016-06-28 13:15:41 aaa bbb 222 ljk
5 2016-06-28 13:15:45 aaa bbb 333 llj
6 2016-06-28 13:15:20 aaa bbb 111 lsq
7 2016-06-28 13:15:41 aaa bbb 222 ljk
8 2016-06-28 13:15:45 aaa bbb 333 llj
9 2016-06-28 13:15:20 aaa bbb 111 lsq
10 2016-06-28 13:15:41 aaa bbb 222 ljk
11 2016-06-28 13:15:45 aaa bbb 333 llj
In [94]: dt.columns
Out[94]: Int64Index([0, 1, 2, 3, 4], dtype='int64')
#去其中的几列,生成新的dataframe
In [95]: dt1 = dt[[0,2]]
In [96]: dt1
Out[96]:
0 2
0 2016-06-28 13:15:20 bbb
1 2016-06-28 13:15:41 bbb
2 2016-06-28 13:15:45 bbb
3 2016-06-28 13:15:20 bbb
4 2016-06-28 13:15:41 bbb
5 2016-06-28 13:15:45 bbb
6 2016-06-28 13:15:20 bbb
7 2016-06-28 13:15:41 bbb
8 2016-06-28 13:15:45 bbb
9 2016-06-28 13:15:20 bbb
10 2016-06-28 13:15:41 bbb
11 2016-06-28 13:15:45 bbb
#重命名列
In [97]: names = ['create_time','url']
In [98]: dt1.columns = names
In [99]: dt1
Out[99]:
create_time url
0 2016-06-28 13:15:20 bbb
1 2016-06-28 13:15:41 bbb
2 2016-06-28 13:15:45 bbb
3 2016-06-28 13:15:20 bbb
4 2016-06-28 13:15:41 bbb
5 2016-06-28 13:15:45 bbb
6 2016-06-28 13:15:20 bbb
7 2016-06-28 13:15:41 bbb
8 2016-06-28 13:15:45 bbb
9 2016-06-28 13:15:20 bbb
10 2016-06-28 13:15:41 bbb
11 2016-06-28 13:15:45 bbb
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/10972173/viewspace-2121246/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/10972173/viewspace-2121246/