从N个元素中随机抽取k个元素,但是N不定。每个元素抽取的概率是k/N。解答:先选中前k个,从k+1到最后一个元素,以1/i (i = k+1, k+2, ... N)的概率选中第i个元素,并且随机替换一个原来选中的元素。
'''
Reservoir Sampling implementation
'''
from random import Random
def RandomSelect(knum, rand=None):
''' (int, func) -> list
Reservoir Sampling implementation
>>> myList = [1,2,3,4,5,6,7,8,-1]
>>> ge = RandomSelect(3)
>>> ge.next()
>>> for v in myList:
cr.send(v)
'''
selection = None
k_elems_list = []
count = 0
if rand is None:
rand = Random()
while True:
item = yield selection
if item == -1:
break
if len(k_elems_list) < knum:
k_elems_list.append(item)
elif rand.randint(0, count) == 0:
index = rand.randint(0, knum-1)
k_elems_list.pop(index)
k_elems_list.insert(index, item)
count += 1
print k_elems_list
if __name__ == '__main__':
myList = [1,2,3,4,5,6,7,8,-1]
cr = RandomSelect(3);
cr.next() # advance to the yield statement, otherwise I can't call send
try:
for val in myList:
cr.send(val)
except StopIteration:
pass
finally:
del cr