import random
import time
from testyuanyang import YuanYangEnv
class DP_Policy_Iter:
def __init__(self,yuanyang):
self.states=yuanyang.states
self.actions=yuanyang.actions
self.v=[0.0 for i in range(len(self.states)+1)] #值函数
self.pi=dict() #创建空字典
self.gamma= yuanyang.gamma
self.yuanyang=yuanyang
#初始化策略
for state in self.states:
flag1=0
flag2=0
flag1=yuanyang.collide(yuanyang.state_to_position(state))
flag2=yuanyang.find(yuanyang.state_to_position(state))
if flag1==1 or flag2==1:continue
self.pi[state]=self.actions[int(random.random()*len(self.actions))]
def policy_evaluate(self):
#策略评估计算值函数
for i in range(100):
delta=0.0
for state in self.states:
flag1=0
11-02
3113
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交