Q-learning算法

####Q-learnig算法  深度强化学习原理与实践   P115-119
# coding: utf-8

import gym
import numpy as np
import sys
import time
import pandas as pd
import matplotlib
from collections import defaultdict, namedtuple
from matplotlib import pyplot as plt

env = gym.make ("CartPole-v0")

class QLearning ():
    def __init__(self, env, num_episodes, discount=1.0, alpha=0.5, epsilon=0.1, n_bins=10):
        self.nA = env.action_space.n    #动作数,直接传入gym自带的游戏环境CartPole,不用自定义动作和状态,直接获取
        print("动作数",self.nA)
        self.nS = env.observation_space.shape[0]  #状态数
        print("observation_space\n",env.observation_space.shape,"\n",env.observation_space)
        print ("状态数", self.nS)
        self.env = env
        self.num_episodes = num_episodes    #迭代次数
        self.discount = discount
        self.alpha = alpha           #时间差分误差系数
        self.epsilon = epsilon     #贪婪策略系数
        # Initialize Q(s; a)
        self.Q = defaultdict (lambda: np.zeros (self.nA))
        print ("初始化动作值函数Q", self.Q)

        # Keeps track of useful statistics
        record = namedtuple ("Record", ["episode_lengths", "episode_rewards"])
        self.rec = record (episode_lengths=np.zeros (num_episodes),
                           episode_rewards=np.zeros (num_episodes))
        print ("记录record\n", self.rec)
        #分桶简化状态空间数量
        self.cart_position_bins = pd.cut ([-2.4, 2.4], bins=n_bins, retbins=True)[1]  #位置  将连续区间[-2.4,2.4]划分为10个大小相同的小区间
        print ("cart_position_bins\n", self.cart_position_bins)
        self.pole_angle_bins = pd.cut ([-2, 2], bins=n_bins, retbins=True)[1]            #杆子的角度
        print("pole_angle_bins\n",self.pole_angle_bins)
        self.
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值