import copy
def update(Q, R, yn):
Q1 = copy.deepcopy(Q)
for i in range(len(Q1)):
for j in range(len(Q1[i])):
if R[i][j] == -1:
continue
Q1[i][j] = R[i][j] + yn*max([Q[j][l] for l in range(len(Q[j]))])
LL_print(Q1)
return Q1
def LL_print(LL):
for i in range(len(LL)):
print(LL[i])
def dis(Q,Q1):
dis = 0
for i in range(len(Q)):
for j in range(len(Q[i])):
dis += abs(Q[i][j]-Q1[i][j])
return dis
def Q_find(yn, R):
#初始化
Q = [[0 for j in range(len(R))] for i in range(len(R))]
#迭代
flag = 0
while True:
Q1 = update(Q, R, yn)
if dis(Q, Q1) < 1e-5:
Q = Q1
break
Q = Q1
flag += 1
print("\n第", flag, "次更新")
print()
num_max = max([max(Q[i]) for i in range(len(Q))])
Q = [[round(Q[i][j]/num_max*100) for j in range(len(Q[i]))] for i in range(len(Q))]
LL_print(Q)
return Q
if __name__ == "__main__":
#奖励衰减程度和奖励矩阵
yn = 0.8
R = [[-1,-1,-1,-1,0,-1],
[-1,-1,-1,0,-1,100],
[-1,-1,-1,0,-1,-1],
[-1,0,0,-1,0,-1],
[0,-1,-1,0,-1,100],
[-1,0,-1,-1,0,100]]
Q = Q_find(yn, R)