解决关于《【莫烦Python】强化学习 Reinforcement Learning 5.Policy Gradients》因为版本产生的bug。
版本
原始版本:
Tensorflow: 1.0
gym: 0.8.0
原因:下载不到Tensorflow: 1.0版本
我的版本:
tensorflow-cpu: 1.15.0
gym: 0.26.2
Error
TypeError: tuple indices must be integers or slices, not tuple
Traceback (most recent call last):
File "E:\pythonProject\Reinforcement-learning-with-tensorflow-master-origin\contents\6_OpenAI_gym\run_CartPole.py", line 37, in <module>
action = RL.choose_action(observation)
File "E:\pythonProject\Reinforcement-learning-with-tensorflow-master-origin\contents\6_OpenAI_gym\RL_brain.py", line 125, in choose_action
observation = observation[np.newaxis, :]
TypeError: tuple indices must be integers or slices, not tuple
修改RL_brain.py
def choose_action(self, observation):
# to have batch dimension when feed into tf placeholder
observation = np.array(observation, dtype=object) # add a new code
observation = observation[np.newaxis, :]
ValueError: too many values to unpack (expected 4)
Traceback (most recent call last):
File "E:\pythonProject\Reinforcement-learning-with-tensorflow-master-origin\contents\6_OpenAI_gym\run_CartPole.py", line 39, in <module>
observation_, reward, done, info = env.step(action)
ValueError: too many values to unpack (expected 4)
修改run_CartPole.py
while True:
env.render()
action = RL.choose_action(observation)
# observation_, reward, done, info = env.step(action) # delete the original code
observation_, reward, done, info, _ = env.step(action) # add a new code
ValueError: could not broadcast input array from shape (8,) into shape (10,)
Traceback (most recent call last):
File "E:\pythonProject\Reinforcement-learning-with-tensorflow-master-origin\contents\6_OpenAI_gym\run_CartPole.py", line 48, in <module>
RL.store_transition(observation, action, reward, observation_)
File "E:\pythonProject\Reinforcement-learning-with-tensorflow-master-origin\contents\6_OpenAI_gym\RL_brain.py", line 119, in store_transition
self.memory[index, :] = transition
ValueError: could not broadcast input array from shape (8,) into shape (10,)
修改RL_brain.py
def store_transition(self, s, a, r, s_):
if not hasattr(self, 'memory_counter'):
self.memory_counter = 0
# transition = np.hstack((s, [a, r], s_)) # delete the original code
if isinstance(s, tuple): # add a new piece of code
transition = np.hstack((s[0], [a, r], s_))
else:
transition = np.hstack((s, [a, r], s_))
# replace the old memory with new memory
index = self.memory_counter % self.memory_size
self.memory[index, :] = transition
self.memory_counter += 1
ValueError: setting an array element with a sequence.
Traceback (most recent call last):
File "E:\pythonProject\Reinforcement-learning-with-tensorflow-master-origin\contents\6_OpenAI_gym\run_CartPole.py", line 37, in <module>
action = RL.choose_action(observation)
File "E:\pythonProject\Reinforcement-learning-with-tensorflow-master-origin\contents\6_OpenAI_gym\RL_brain.py", line 135, in choose_action
actions_value = self.sess.run(self.q_eval, feed_dict={
self.s: observation})
File "E:\ProgramData\anaconda3\envs\tempEnv\lib\site-packages\tensorflow_core\python\client\session.py", line 956, in run
episode: 55 ep_r: 17.48 epsilon: 0.27
run_metadata_ptr)
File "E:\ProgramData\anaconda3\envs\tempEnv\lib\site-packages\tensorflow_core\python\client\session.py", line 1149, in _run
np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
ValueError: setting an array element with a sequence.
修改RL_brain.py
def choose_action(self, observation):
# to have batch dimension when feed into tf placeholder
observation = np.array(observation, dtype=object)
observation = observation[np.newaxis, :]
if np.random.uniform() < self