Qlearning算法
- import numpy as np
- import gym
- import random
- if __name__ == '__main__':
- env = gym.make("FrozenLake-v0")
- env.render()
- action_size = env.action_space.n
- print("Action size ", action_size)
- state_size = env.observation_space.n
- print("State size ", state_size)
- qtable = np.zeros((state_size, action_size))
- print(qtable)
- total_episodes = 10000 # Total episodes
- learning_rate = 0.8 # Learning rate
- max_steps = 99 # Max steps per episode
- gamma = 0.95 # Discounting rate
- # Exploration parameters
- epsilon = 1.0 # Exploration rate
- max_epsilon = 1.0 # Exploration probability at start
- min_epsilon = 0.01 # Minimum exploration probability
- decay_rate = 0.001 # Exponential decay rate for exploration prob
- # List of rewards
- rewards = []
- # 2 For life or until learning is stopped
- for episode in range(total_episodes):
- # Reset the environment
- state = env.reset()
- step = 0
- done = False
- total_rewards = 0