1、dataframe的创建
- df_1 = pd.DataFrame({'A': [0, 1, 2], 'B': [3, 4, 5]})
- df_2 = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=['A' , 'B' , 'C'], index=[1, 2]),colunmns是列,index是行
import numpy as np
price=np.array([
[3,4,5,6,7],
[5,6,5,4,3],
[4,4,5,4,3],
[5,5,6,7,4],
[5,6,7,5,4],
])
print (price)#打印二维数组
print (price[0,2])#打印第一行第三个数
print (price[1,:])#打印第二行所有数
print (price[1,1:3])#打印第二行2到第3个数
print (price[:,1])#打印第二列所有数
print (price[:,price[0,:].argmax()].mean())#第一行最大的列的均值
for i in range(5):
print (price[i,:].mean())#计算所有行的均值
print (price.mean(axis=0))#按列来计算均值
print (price.mean(axis=1))#按行来计算均值
for i in range(5):
print (price[i,:].std())#计算所有行的标准差
[[3 4 5 6 7]
[5 6 5 4 3]
[4 4 5 4 3]
[5 5 6 7 4]
[5 6 7 5 4]]
5
[5 6 5 4 3]
[6 5]
[4 6 4 5 6]
4.2
5.0
4.6
4.0
5.4
5.4
[4.4 5. 5.6 5.2 4.2]
[5. 4.6 4. 5.4 5.4]
1.4142135623730951
1.0198039027185568
0.6324555320336759
1.019803902718557
1.019803902718557
import numpy as np
price=np.array([
['date','s1','s2','s3','s4','s5'],
['a',3,4,5,6,7],
['b',5,6,5,4,3],
['c',4,4,5,4,3],
['d',5,5,6,7,4],
['e',5,6,7,5,4],
])
#打印类型,因为数组的类型必须一致,所以都被转换为字符串
price.dtype #最大长度为4的字符串类型,但转换成字符串后就不能用mean()之类的计算函数了
#所以需要引用dataframe
dtype('<U4')
import pandas as pd
price=pd.DataFrame({
's':['s1','s2','s3','s4','s5'],
'a':[3,4,5,6,7],
'b':[5,6,5,4,3],
'c':[4,4,5,4,3],
'd':[5,5,6,7,4],
'e':[5,6,7,5,4],}
)
print (price)#引入了dataframe就能做数学计算了
print (price.std())#打印标准差,pandas标准差和numpy不一样,存在贝塞尔矫正
print (price.std(ddof=0))#关闭贝塞尔矫正后打印标准差,值变得和numpy一样
s a b c d e
0 s1 3 5 4 5 5
1 s2 4 6 4 5 6
2 s3 5 5 5 6 7
3 s4 6 4 4 7 5
4 s5 7 3 3 4 4
a 1.581139
b 1.140175
c 0.707107
d 1.140175
e 1.140175
dtype: float64
a 1.414214
b 1.019804
c 0.632456
d 1.019804
e 1.019804
dtype: float64
2、读取dataframe数据单元
import pandas as pd
price=pd.DataFrame({
'a':[3,4,5,6,7],
'b':[5,6,5,4,3],
'c':[4,4,5,4,3],
'd':[5,5,6,7,4],
'e':[5,6,7,5,4]},
index=['s1','s2','s3','s4','s5'])
print (price)
print (price.loc['s1'])
type(price.loc['s1'])
price['d']
print (price.values)#返回numpy ndarray
print (price.values.mean())#返回所有值的均值
a b c d e
s1 3 5 4 5 5
s2 4 6 4 5 6
s3 5 5 5 6 7
s4 6 4 4 7 5
s5 7 3 3 4 4
a 3
b 5
c 4
d 5
e 5
Name: s1, dtype: int64
[[3 5 4 5 5]
[4 6 4 5 6]
[5 5 5 6 7]
[6 4 4 7 5]
[7 3 3 4 4]]
4.88