加强学习

import numpy as np
from scipy import stats
import random
import matplotlib.pyplot as plt

n = 10
arms = np.random.randn(n)
eps = 0.1

def reward(prob):
    rwd = 0
    for i in range(10):
        if random.random() < prob:
            rwd += 1
    return rwd

av = np.array([np.random.randint(0,(n+1)), 0]).reshape(1,2)

def bestArm(a):
    bestLot = 0
    bestMean = 0
    
    for u in a:
        avg = np.mean(a[np.where(a[0,:] == u[0])][:,1])
        if bestMean < avg:
            bestMean = avg
            bestLot = u[0]
            
    return bestLot
    
if __name__ == "__main__":
    plt.xlabel("Plays")
    plt.ylabel("Avg Reward")
    
    for i in range(500):
        if random.random() > eps:
            choice = bestArm(av)
            thisAv = np.array([[choice, reward(arms[choice])]])
            av = np.concatenate((av, thisAv), axis=0)
        else:
            choice = np.where(arms == np.random.choice(arms))[0][0]
            thisAv = np.array([[choice, reward(arms[choice])]])
            av = np.concatenate((av, thisAv), axis=0)
        pCorrect = len(av[np.where(av[0,:] == np.argmax(arms))])/float(len(av))
        runningMean = np.mean(av[:,1])
        plt.scatter(i, runningMean)


加强学习


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值