import numpy as np
import pandas as pd
df = pd.DataFrame({
'key1':[4,5,3,np.nan,2],'key2':[1,2,np.nan,4,5],'key3':[1,2,3,'j','k']},
index =['a','b','c','d','e'])print(df)print(df['key1'].dtype,df['key2'].dtype,df['key3'].dtype)print('-----')
m1 = df.mean()print(m1,type(m1))print('单独统计一列:',df['key2'].mean())print('-----')# np.nan :空值# .mean()计算均值# 只统计数字列# 可以通过索引单独统计一列
m2 = df.mean(axis=1)print(m2)print('-----')# axis参数:默认为0,以列来计算,axis=1,以行来计算,这里就按照行来汇总了
m3 = df.mean(skipna=False)print(m3)print('-----')# skipna参数:是否忽略NaN,默认True,如False,有NaN的列统计结果仍未NaN
key1 key2 key3
a 4.01.01
b 5.02.02
c 3.0 NaN 3
d NaN 4.0 j
e 2.05.0 k
float64 float64 object-----
key1 3.5
key2 3.0
dtype: float64 <class'pandas.core.series.Series'>
单独统计一列:3.0-----
a 2.5
b 3.5
c 3.0
d 4.0
e 3.5
dtype: float64
-----
key1 NaN
key2 NaN
dtype: float64
-----