使用OpenAI-Gym中的CartPole
tensorflow1.13.1
keras2.2.4
import numpy as np import gym import matplotlib.pyplot as plt from keras.models import Sequential from keras.layers import Dense, Activation, Flatten from keras.optimizers import Adam from rl.agents.dqn import DQNAgent from rl.policy import EpsGreedyQPolicy from rl.memory import SequentialMemory #初始化gym ENV_NAME = 'CartPole-v0' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) nb_actions = env.action_space.n #Keras搭建深度神经网络 model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) #配置智能体 memory = SequentialMemory(limit=50000, window_length=1) policy = EpsGreedyQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) #训练和验证 H = dqn.fit(env, nb_steps=10000, visualize=False, verbose=2) dqn.test(env, nb_episodes=5, visualize=True)
参考