DataFrame

1、dataframe的创建

  • df_1 = pd.DataFrame({'A': [0, 1, 2], 'B': [3, 4, 5]}) 
  • df_2 = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=['A' , 'B' , 'C'], index=[1, 2]),colunmns是列,index是行
import numpy as np
price=np.array([
[3,4,5,6,7],
[5,6,5,4,3],
[4,4,5,4,3],
[5,5,6,7,4],
[5,6,7,5,4],
])
print (price)#打印二维数组
print (price[0,2])#打印第一行第三个数
print (price[1,:])#打印第二行所有数
print (price[1,1:3])#打印第二行2到第3个数
print (price[:,1])#打印第二列所有数
print (price[:,price[0,:].argmax()].mean())#第一行最大的列的均值
for i in range(5):
    print (price[i,:].mean())#计算所有行的均值
print (price.mean(axis=0))#按列来计算均值
print (price.mean(axis=1))#按行来计算均值
for i in range(5):
    print (price[i,:].std())#计算所有行的标准差
[[3 4 5 6 7]
 [5 6 5 4 3]
 [4 4 5 4 3]
 [5 5 6 7 4]
 [5 6 7 5 4]]
5
[5 6 5 4 3]
[6 5]
[4 6 4 5 6]
4.2
5.0
4.6
4.0
5.4
5.4
[4.4 5.  5.6 5.2 4.2]
[5.  4.6 4.  5.4 5.4]
1.4142135623730951
1.0198039027185568
0.6324555320336759
1.019803902718557
1.019803902718557
import numpy as np
price=np.array([
['date','s1','s2','s3','s4','s5'],
['a',3,4,5,6,7],
['b',5,6,5,4,3],
['c',4,4,5,4,3],
['d',5,5,6,7,4],
['e',5,6,7,5,4],
])
#打印类型,因为数组的类型必须一致,所以都被转换为字符串
price.dtype #最大长度为4的字符串类型,但转换成字符串后就不能用mean()之类的计算函数了
#所以需要引用dataframe
dtype('<U4')
import pandas as pd
price=pd.DataFrame({
's':['s1','s2','s3','s4','s5'],
'a':[3,4,5,6,7],
'b':[5,6,5,4,3],
'c':[4,4,5,4,3],
'd':[5,5,6,7,4],
'e':[5,6,7,5,4],}
)
print (price)#引入了dataframe就能做数学计算了
print (price.std())#打印标准差,pandas标准差和numpy不一样,存在贝塞尔矫正
print (price.std(ddof=0))#关闭贝塞尔矫正后打印标准差,值变得和numpy一样
    s  a  b  c  d  e
0  s1  3  5  4  5  5
1  s2  4  6  4  5  6
2  s3  5  5  5  6  7
3  s4  6  4  4  7  5
4  s5  7  3  3  4  4
a    1.581139
b    1.140175
c    0.707107
d    1.140175
e    1.140175
dtype: float64
a    1.414214
b    1.019804
c    0.632456
d    1.019804
e    1.019804
dtype: float64

 2、读取dataframe数据单元

import pandas as pd
price=pd.DataFrame({
'a':[3,4,5,6,7],
'b':[5,6,5,4,3],
'c':[4,4,5,4,3],
'd':[5,5,6,7,4],
'e':[5,6,7,5,4]},
index=['s1','s2','s3','s4','s5'])
print (price)
print (price.loc['s1'])
type(price.loc['s1'])
price['d']
print (price.values)#返回numpy ndarray
print (price.values.mean())#返回所有值的均值
 a  b  c  d  e
s1  3  5  4  5  5
s2  4  6  4  5  6
s3  5  5  5  6  7
s4  6  4  4  7  5
s5  7  3  3  4  4
a    3
b    5
c    4
d    5
e    5
Name: s1, dtype: int64
[[3 5 4 5 5]
 [4 6 4 5 6]
 [5 5 5 6 7]
 [6 4 4 7 5]
 [7 3 3 4 4]]
4.88
©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客 返回首页