####Q-learnig算法 深度强化学习原理与实践 P115-119
# coding: utf-8
import gym
import numpy as np
import sys
import time
import pandas as pd
import matplotlib
from collections import defaultdict, namedtuple
from matplotlib import pyplot as plt
env = gym.make ("CartPole-v0")
class QLearning ():
def __init__(self, env, num_episodes, discount=1.0, alpha=0.5, epsilon=0.1, n_bins=10):
self.nA = env.action_space.n #动作数,直接传入gym自带的游戏环境CartPole,不用自定义动作和状态,直接获取
print("动作数",self.nA)
self.nS = env.observation_space.shape[0] #状态数
print("observation_space\n",env.observation_space.shape,"\n",env.observation_space)
print ("状态数", self.nS)
self.env = env
self.num_episodes = num_episodes #迭代次数
self.discount = discount
self.alpha = alpha #时间差分误差系数
self.epsilon = epsilon #贪婪策略系数
# Initialize Q(s; a)
self.Q = defaultdict (lambda: np.zeros (self.nA))
print ("初始化动作值函数Q", self.Q)
# Keeps track of useful statistics
record = namedtuple ("Record", ["episode_lengths", "episode_rewards"])
self.rec = record (episode_lengths=np.zeros (num_episodes),
episode_rewards=np.zeros (num_episodes))
print ("记录record\n", self.rec)
#分桶简化状态空间数量
self.cart_position_bins = pd.cut ([-2.4, 2.4], bins=n_bins, retbins=True)[1] #位置 将连续区间[-2.4,2.4]划分为10个大小相同的小区间
print ("cart_position_bins\n", self.cart_position_bins)
self.pole_angle_bins = pd.cut ([-2, 2], bins=n_bins, retbins=True)[1] #杆子的角度
print("pole_angle_bins\n",self.pole_angle_bins)
self.
Q-learning算法
最新推荐文章于 2024-03-13 20:31:50 发布