本文通过对同一个样本分别抽样5、10、15、20、25、30次,并进行概率拟合画图,验证了四种抽样方法下样本均值的一致性(相合性)。
里面只有简单随机抽样(放回和不放回)、分层抽样写了是怎么抽出来的,整群抽样部分因为水平不够,是另外写了一段手动抽出来的。(此处应有link)
等距是同学抽的,不清楚具体。
import xlrd
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn
import math
import xlwt
#读取数据
workbook = xlwt.Workbook(encoding='utf-8',style_compression=0)
worksheet1 = workbook.add_sheet('test_sheet',cell_overwrite_ok=True)
data = xlrd.open_workbook("问卷数据1.xlsx")
table = data.sheets()[0]
sex = table.col_values(0)
height = table.col_values(1)
scores = table.col_values(5)
del sex[0]
del height[0]
del scores[0]
heightsp = []
#初始设置
colors = {
0:'#D3EEF5',1:'#A6DDEA',2:'#7ACBE0',3:'#227A8F',4:'#165160',5:'#062329'}
means = []
for i in range(6):
means.append([])
stds = []
for i in range(6):
stds.append([])
#总体身高均值
height_array = np.array(height)
height_mean=height_array.mean()
1 简单随机不放回抽样
1.1 不放回
def sampling_1m(n,t,times =30):
for i in range(times):
height_sample = []
height_sample += random.sample(height,n)
height_sample_array = np.array(height_sample)
means[t].append(height_sample_array.mean())
seaborn.kdeplot(means[t],label = 'n = %s'%(n),color = colors[t])
means = []
for i in range(6):
means.append([])
sampling_1m(5,0)
sampling_1m(10,1)
sampling_1m(15,2)
sampling_1m(