Datawhale Task03 统计相关 打卡

统计相关

1.次序统计

1.1计算最小值

import numpy as np
x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.amin(x)
print(y)
11
y = np.amin(x,axis = 0)  #axis=0是行方向上的最小值
print(y)
[11 12 13 14 15]
y = np.amin(x,axis = 1) #axis=1是列方向上的最小值
print(y)
[11 16 21 26 31]

1.2计算最大值

import numpy as np
x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.amax(x)
print(y)
35
y = np.amax(x,axis = 0) #axis=0是列方向上的最大值 每一列求
print(y)
[31 32 33 34 35]
y = np.amax(x,axis = 1) #axis=1是列方向上的最大值
print(y)
[15 20 25 30 35]

1.3计算极差


[[ 0  8  3  3  3]
 [ 7 16  0 10  9]
 [19 11 18  3  6]
 [ 5 16  8  6  1]]
print(np.ptp(x))  #np.ptp
print(np.ptp(x,axis = 1)) 
print(np.ptp(x,axis = 0))
19
[ 8 16 16 15]
[19  8 18  7  8]

1.4计算分位数

np.random.seed(2020)  #分位数(Quantile),亦称分位点,是指将一个随机变量的概率分布范围分为几个等份的数值点
x = np.random.randint(0,20,size = [4,5])
print(x)
print(np.percentile(x,[25,50]))
print(np.percentile(x,[25,50],axis = 0))
print(np.percentile(x,[25,50],axis = 1))
[[ 0  8  3  3  3]
 [ 7 16  0 10  9]
 [19 11 18  3  6]
 [ 5 16  8  6  1]]
[3.  6.5]
[[ 3.75 10.25  2.25  3.    2.5 ]
 [ 6.   13.5   5.5   4.5   4.5 ]]
[[ 3.  7.  6.  5.]
 [ 3.  9. 11.  6.]]

2.均值与方差

2.1计算中位数

#两种方法 一个是上面的分位数 另一个是直接中位数
np.random.seed(2020)  
x = np.random.randint(0,20,size = [4,5])
print(x)
print(np.percentile(x,50))
print(np.median(x))
[[ 0  8  3  3  3]
 [ 7 16  0 10  9]
 [19 11 18  3  6]
 [ 5 16  8  6  1]]
6.5
6.5
print(np.percentile(x,50,axis = 0))
print(np.median(x,axis = 0))
[ 6.  13.5  5.5  4.5  4.5]
[ 6.  13.5  5.5  4.5  4.5]

2.2计算平均值

import numpy as np

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.mean(x)
y
23.0
y = np.mean(x,axis = 1)
y
array([13., 18., 23., 28., 33.])
print(y)
[13. 18. 23. 28. 33.]

2.3计算加权平均值

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.arange(1,26).reshape([5,5])
print(y)
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]]
z = np.average(x,weights = y)
print(z)
27.0

2.3计算方差

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
#用np
y = np.var(x)
print(y)
52.0
#直接计算
y = np.mean((np.mean(x)-x)**2)
print(y)
52.0
y = np.var(x,ddof = 1) #ddof就是自由度要设置为1才是无偏的。也就是分母用n-1替换n。
print(y)
54.166666666666664
y = np.mean((np.mean(x)-x)**2)/(x.size-1)
print(y)
2.1666666666666665

2.3计算标准差

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.std(x)
y
7.211102550927978
y = np.sqrt(np.var(x))
y
7.211102550927978

3.相关

3.1计算协方差矩阵

在这里插入图片描述

#协方差(Covariance)在概率论和统计学中用于衡量两个变量的总体误差。而方差是协方差的一种特殊情况,即当两个变量是相同的情况。
import numpy as np  
x = [1,2,3,4,6]
y = [0,2,5,6,7]
print(np.cov(x))  #是无偏的方差 n-1
print(np.var(x))
print(np.cov(y))
print(np.cov(x,y)) #https://blog.csdn.net/xueluowutong/article/details/85334256
3.7
2.96
8.5
[[3.7  5.25]
 [5.25 8.5 ]]
print(x,y)
[1, 2, 3, 4, 6] [0, 2, 5, 6, 7]

3.2计算相关系数

import numpy as np
np.random.seed(2020)
x,y = np.random.randint(0,20,size = (2,4))
print(x)
print(y)
[0 8 3 3]
[ 3  7 16  0]
z = np.corrcoef(x,y)
z
array([[1.        , 0.18793271],
       [0.18793271, 1.        ]])

4.直方图

import numpy as np
x = np.array([0.2,6.4,3.0,1.6])
bins = np.array([0.0,1.0,2.5,4.0,10.0])
inds = np.digitize(x,bins)
print(inds)
for n in range(x.size):
    print(bins[inds[n]-1],'<=',x[n],'<',bins[inds[n]])
[1 4 3 2]
0.0 <= 0.2 < 1.0
4.0 <= 6.4 < 10.0
2.5 <= 3.0 < 4.0
1.0 <= 1.6 < 2.5

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值