创建方法
#创建方法
df_1 = pd.DataFrame({'A':[0,1,2],'B':[3,4,5]})
print(df_1)
#创建方法 二位数组,列标签 行索引
df_2 = pd.DataFrame([[0,1,2],[3,4,5]],
columns=['A','B','C'],index=[1,2])
print(df_2)
函数
mean、sum、max、min ...
import pandas as pd
arr = pd.DataFrame({
'data': ['05-21', '05-22', '05-23', '05-24', '05-25', '05-26', '05-27', '05-28', '05-29', '05-30'],
's1': [27.93, 58.08, 38.67, 45.83, 70.26, 46.61, 49.73, 34.02, 56.64, 57.28],
's2': [28.18, 50.61, 31.73, 31.48, 55.96, 22.73, 40.47, 42.02, 31.39, 64.21],
's3': [29.39, 51.62, 57.91, 45.94, 53.81, 45.77, 69.13, 28.75, 43.43, 55.79],
's4': [40.52, 48.55, 59.23, 71.21, 58.48, 63.63, 55.16, 34.90, 54.65, 68.03],
's5': [26.26, 54.03, 49.08, 46.53, 43.23, 56.79, 58.71, 26.43, 44.97, 54.16]
})
print(arr)
#求每一列的均值
print(arr.mean())
#求每一行的均值
print(arr.mean(axis=1))
#求每一列的均值
print(arr.sum())
#求每一行的均值
print(arr.sum(axis=1))
#求每一列的最大值
print(arr.max())
#求每一列的最大值
print(arr.max(axis=1))
#求每一列的最小值
print(arr.min())
#求每一行的最小值
print(arr.min(axis=1))
std(ddof=0)
DataFrame的std方法中,ddof默认值为1,计算的是考虑了贝塞尔校正之后的标准差。
import numpy as np
import pandas as pd
arr = pd.DataFrame({
'data': ['05-21', '05-22', '05-23', '05-24', '05-25', '05-26', '05-27', '05-28', '05-29', '05-30'],
's1': [27.93, 58.08, 38.67, 45.83, 70.26, 46.61, 49.73, 34.02, 56.64, 57.28],
's2': [28.18, 50.61, 31.73, 31.48, 55.96, 22.73, 40.47, 42.02, 31.39, 64.21],
's3': [29.39, 51.62, 57.91, 45.94, 53.81, 45.77, 69.13, 28.75, 43.43, 55.79],
's4': [40.52, 48.55, 59.23, 71.21, 58.48, 63.63, 55.16, 34.90, 54.65, 68.03],
's5': [26.26, 54.03, 49.08, 46.53, 43.23, 56.79, 58.71, 26.43, 44.97, 54.16]
})
npArr = np.array([
[27.93, 58.08, 38.67, 45.83, 70.26, 46.61, 49.73, 34.02, 56.64, 57.28],
[28.18, 50.61, 31.73, 31.48, 55.96, 22.73, 40.47, 42.02, 31.39, 64.21],
[29.39, 51.62, 57.91, 45.94, 53.81, 45.77, 69.13, 28.75, 43.43, 55.79],
[40.52, 48.55, 59.23, 71.21, 58.48, 63.63, 55.16, 34.90, 54.65, 68.03],
[26.26, 54.03, 49.08, 46.53, 43.23, 56.79, 58.71, 26.43, 44.97, 54.16]
])
#求每行的方差
# print(arr.std())
# #求每一行的方差
# print(npArr.std(axis=1))
a = np.array([0,1,2,3,4,5,6,7,8,9,0])
b = pd.DataFrame([0,1,2,3,4,5,6,7,8,9,0])
#Numpy中std与np.sqrt(((a-np.mean(a))**2).sum()/(a.size))相等
print(a.std())
print(np.sqrt(((a-np.mean(a))**2).sum()/(a.size)))
#Pandas中的std与np.sqrt(((a-np.mean(a))**2).sum()/(a.size-1))
print(b.std())
print(np.sqrt(((a-np.mean(a))**2).sum()/(a.size-1)))
#DataFrame的std方法中,ddof默认值为1,计算的是考虑了贝塞尔校正之后的标准差
#DataFrame的std方法中,若ddof设为0,计算就不考虑了贝塞尔校正之后的标准差
#Pandas中的std(ddof=0)与np.sqrt(((a-np.mean(a))**2).sum()/(a.size))一样
print(b.std(ddof=0))