numpy实现softmax
def npsoftmax(x, axis=0):
x = x - x.max(axis=axis, keepdims=True)
y = np.exp(x)
return y / y.sum(axis=axis, keepdims=True)
然后根据softmax加权抽样
def pdsample(self, action):
n = len(action)
pre = [0] * n
pre[0] = action[0]
for i in range(1,n):
pre[i] = pre[i-1] + action[i]
rd = np.random.rand(1) # 均匀分布
for i in range(n):
if rd < pre[i]: return i
return n-1
一个优化:二分查找比较大小,这里略去
方法二:
probs = self.sess.run(self.acts_prob, {self.s: s}) # 获取所有操作的概率
return np.random.choice(np.arange(probs.shape[1]), p=probs.ravel()) # return a int