#pandas 基本介绍
# numpy 和 pandas 不同点:
# pandas 更像字典形式的numpy
import pandas as pd
import numpy as np
s = pd.Series([1,3,4,np.nan,44,1])
print s
dates = pd.date_range('20160101',periods = 6)
print dates
df = pd.DataFrame(np.random.randn(6,4),index = dates,columns=['a','b','c','d'])
print df
df1 = pd.DataFrame(np.arange(12).reshape((3,4)))
print df1
df2 = pd.DataFrame({'A':1,
'B':pd.Timestamp('20130102'),
'C':pd.Series(1,index =list(range(4)),dtype = 'float32'),
'D':np.array([3] * 4,dtype = 'int32'),
'E':pd.Categorical(['test','train','test','train']),
'F':'foo'})
print df2
print df2.dtypes
print df2.index
print df2.columns
print df2.values
print df2.describe()
print df2.T
print df2.sort_index(axis=1,ascending = False)
print df2.sort_index(axis=0,ascending = False)
print df2.sort_values(by='E',ascending = False)
0 1.0
1 3.0
2 4.0
3 NaN
4 44.0
5 1.0
dtype: float64
DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
'2016-01-05', '2016-01-06'],
dtype='datetime64[ns]', freq='D')
a b c d
2016-01-01 0.263569 -0.687714 -0.723049 1.115857
2016-01-02 -1.239271 -0.090627 0.652509 -1.693457
2016-01-03 0.278214 -0.338247 0.873545 -0.259281
2016-01-04 0.351920 0.003695 -0.259181 -1.924558
2016-01-05 -0.343248 -0.168607 -0.811324 -1.111886
2016-01-06 -1.224475 0.166382 -0.972534 -0.160138
0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
A B C D E F
0 1 2013-01-02 1.0 3 test foo
1 1 2013-01-02 1.0 3 train foo
2 1 2013-01-02 1.0 3 test foo
3 1 2013-01-02 1.0 3 train foo
A int64
B datetime64[ns]
C float32
D int32
E category
F object
dtype: object
Int64Index([0, 1, 2, 3], dtype='int64')
Index([u'A', u'B', u'C', u'D', u'E', u'F'], dtype='object')
[[1 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo']
[1 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']
[1 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo']
[1 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']]
A C D
count 4.0 4.0 4.0
mean 1.0 1.0 3.0
std 0.0 0.0 0.0
min 1.0 1.0 3.0
25% 1.0 1.0 3.0
50% 1.0 1.0 3.0
75% 1.0 1.0 3.0
max 1.0 1.0 3.0
0 1 2 \
A 1 1 1
B 2013-01-02 00:00:00 2013-01-02 00:00:00 2013-01-02 00:00:00
C 1 1 1
D 3 3 3
E test train test
F foo foo foo
3
A 1
B 2013-01-02 00:00:00
C