python按比例抽样质检_python 实现样本抽样

单纯随机抽样

def __randomSampling(self,df_index,scale):

try:

len_df_index = len(df_index)

df_index_choice = np.random.choice(df_index, int( len_df_index* scale),p=[1/(len_df_index)]*len_df_index,replace=False)

# print (df_index_choice)

return df_index_choice

except Exception as e:

print (e)

return None

def RandomSampling(self,scale):

"""随机抽样

"""

df_choice_index = self.__randomSampling(self.df.index,scale)

df_choice = self.df.iloc[df_choice_index,:]

df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]

return (df_choice,df_not_choice)

重复随机抽样

def __repetitionRandomSampling(self,df_index,scale):

try:

df_index_choice=df_index[np.random.randint(0,len(df_index)-1,size=int(len(df_index) * scale))]

return df_index_choice

except Exception as e:

print (e)

return None

def RepetitionRandomSampling(self,scale):

"""重复随机抽样

"""

df_choice_index = self.__repetitionRandomSampling(self.df.index,scale)

df_choice = self.df.iloc[df_choice_index,:]

df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]

return (df_choice,df_not_choice)

系统抽样

def __systematicSampling(self,df_index,scale):

df_index_choice = []

try:

len_df_index = len(df_index)

len_choice = int(len_df_index * scale)

index = 0

k = 1/scale

while len(df_index_choice)

df_index_choice.append(df_index[int(0+index*k) % len_df_index])

index = index + 1

return df_index_choice

except Exception as e:

print (e)

return None

def SystematicSampling(self,scale):

"""系统抽样

"""

df_choice_index = self.__systematicSampling(self.df.index,scale)

df_choice = self.df.iloc[df_choice_index,:]

df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]

return (df_choice,df_not_choice)

分层抽样

先按对观察指标影响较大的某种特征,将总体分为若干个类别,再从每一层内按上述抽样方法抽取一定比例的观察单位,合起来组成样本。

def StratifiedSampling(self,sampling_type,scale):

"""分层抽取样本

Args:

sampling_type: 随机类型,仅支持 rs,rrs,ss,分别是随机抽样,重复随机抽样,系统抽样

scale:抽取样本比例,值域为 (0,1)

"""

df_choice = None

df_values = list(set(self.df_col[0].values))

for i in range(len(df_values)):

df_index = self.df_col[self.df_col[0]==df_values[i]].index

if sampling_type == 'rs':

df_choice_index = self.__randomSampling(df_index,scale)

elif sampling_type == 'rrs':

df_choice_index = self.__repetitionRandomSampling(df_index,scale)

elif sampling_type == 'ss':

df_choice_index = self.__systematicSampling(df_index,scale)

else :

raise Exception('不支持的随机类型。')

if df_choice is None:

df_choice = self.df.iloc[df_choice_index]

else:

df_temp = self.df.iloc[df_choice_index]

df_choice=df_choice.append(df_temp)

df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]

return (df_choice,df_not_choice)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值