import pandas as pd
import numpy as np
1、生成Series数据表
s = pd.Series([1,3,6,np.nan,44,1])
print(s)
0 1.0
1 3.0
2 6.0
3 NaN
4 44.0
5 1.0
dtype: float64
2、生成DataFrame数据表
datas = pd.date_range('20160101',periods=6)#
columns = ['a','b','c','d']
df = pd.DataFrame(np.random.randn(6,4),index=datas,columns=columns)
print(df)
a | b | c | d | |
---|---|---|---|---|
2016-01-01 | 0.101034 | 0.326522 | -0.333575 | 1.238334 |
2016-01-02 | 1.035345 | -0.326224 | 1.028198 | 1.502042 |
2016-01-03 | -0.516077 | 1.278102 | 0.569306 | -0.565420 |
2016-01-04 | 1.585359 | -0.369289 | -0.225413 | -0.057557 |
2016-01-05 | -1.180227 | -0.252925 | -0.175618 | -0.781248 |
2016-01-06 | 0.365030 | -2.304684 | -0.155198 | -0.421975 |
df2 = pd.DataFrame({'A':1,
'B':pd.Timestamp('20130102'),
'C':pd.Series(1,index=list(range(4)),dtype='float32'),
'D':np.array([3]*4,dtype='int32'),
'E':'foo'})
print(df2)
A | B | C | D | F | |
---|---|---|---|---|---|
0 | 1 | 2013-01-02 | 1.0 | 3 | foo |
1 | 1 | 2013-01-02 | 1.0 | 3 | foo |
2 | 1 | 2013-01-02 | 1.0 | 3 | foo |
3 | 1 | 2013-01-02 | 1.0 | 3 | foo |
print(df2.dtypes)
A int64
B datetime64[ns]
C float32
D int32
F object
dtype: object
print(df2.index)
Int64Index([0, 1, 2, 3], dtype='int64')
print(df2.columns)
Index(['A', 'B', 'C', 'D', 'F'], dtype='object')
print(df2.values)
array([[1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo'],
[1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo'],
[1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo'],
[1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo']], dtype=object)
print(df2.describe())#数据帧描述
A | C | D | |
---|---|---|---|
count | 4.0 | 4.0 | 4.0 |
mean | 1.0 | 1.0 | 3.0 |
std | 0.0 | 0.0 | 0.0 |
min | 1.0 | 1.0 | 3.0 |
25% | 1.0 | 1.0 | 3.0 |
50% | 1.0 | 1.0 | 3.0 |
75% | 1.0 | 1.0 | 3.0 |
max | 1.0 | 1.0 | 3.0 |