from numpy import random
import matplotlib.pyplot as plt
K = [1,2,3,4,5] # 总共的摇臂数有5个
R = {1:2,2:3,3:5,4:1,5:9} # 各个摇臂对应的奖赏
prob = {1:0.6,2:0.5,3:0.2,4:0.7,5:0.05} #各个摇臂对应的概率吐币的概率
T = 2000
count = dict(zip(list(range(1,6)),[0]*5)) # 计算每个摇臂的摇到的次数
avg = [random.normal(0,1) for i in K] # avg为遵循正态分布的每个摇臂获得奖励的均值
eplison = 0
r = 0
count_plot1 = [0]
for i in range(T):
Q = dict(zip(list(range(1,6)),[0]*5))
if random.random() < eplison:
k = random.choice(K)
else:
k = max(Q,key=Q.get)
#v = random.choice([R[k],0],p=[prob[k],1-prob[k]])
v = random.normal(avg[k-1],1)
r += v
count_plot1.append(r/(i+1))
Q[k] = (Q[k]*count[k]+v)/(count[k]+1)
count[k] = count[k] + 1
print("end the reword is {}".format(r))
Q.clear()
count.clear()
count = dict(zip(list(range(1,6)),[0]*5)) # 计算每个摇臂的摇到的次数
eplison = 0.01
r = 0
count_plot2 = [0]
for i in range(T):
Q = dict(zip(list(range(1,6)),[0]*5))
if random.random() < eplison:
k = random.choice(K)
else:
k = max(Q,key=Q.get)
#v = random.choice([R[k],0],p=[prob[k],1-prob[k]])
v = random.normal(avg[k-1],1)
r += v
count_plot2.append(r/(i+1))
Q[k] = (Q[k]*count[k]+v)/(count[k]+1)
count[k] = count[k] + 1
print("end the reword is {}".format(r))
Q.clear()
count.clear()
count = dict(zip(list(range(1,6)),[0]*5)) # 计算每个摇臂的摇到的次数
eplison = 0.1
r = 0
count_plot3 = [0]
for i in range(T):
Q = dict(zip(list(range(1,6)),[0]*5))
if random.random() < eplison:
k = random.choice(K)
else:
k = max(Q,key=Q.get)
#v = random.choice([R[k],0],p=[prob[k],1-prob[k]])
v = random.normal(avg[k-1],1)
r += v
count_plot3.append(r/(i+1))
Q[k] = (Q[k]*count[k]+v)/(count[k]+1)
count[k] = count[k] + 1
print("end the reword is {}".format(r))
X = range(len(count_plot1))
plt.plot(X[1:], count_plot1[1:])
plt.plot(X[1:], count_plot2[1:])
plt.plot(X[1:], count_plot3[1:])
plt.show