Machine Learning 学习之 Qleaning 学习

最新推荐文章于 2024-03-18 21:53:43 发布

BUAA-XX

最新推荐文章于 2024-03-18 21:53:43 发布

阅读量339

点赞数

分类专栏： Python，算法机器学习文章标签： python 机器学习

本文链接：https://blog.csdn.net/sinat_33829806/article/details/78303136

版权

Python，算法同时被 2 个专栏收录

27 篇文章 0 订阅

订阅专栏

机器学习

12 篇文章 1 订阅

订阅专栏

学习改编自莫烦Python Qlearning学习教程

import numpy as np
import time

np.random.seed(2)  # reproducible


N_STATES = 6   # the length of the 1 dimensional world
ACTIONS = ['left', 'right']     # available actions
EPSILON = 0.9  # greedy police
ALPHA = 0.1     # learning rate
GAMMA = 0.9    # discount factor
MAX_EPISODES = 53   # maximum episodes
FRESH_TIME = 0.05    # fresh time for one move


def build_q_table(n_states, actions):
    table=[[0 for j in range(len(actions))] for i in range(n_states)]
    print(table)    # show table
    return table

def list_Zero(lis):
    for li in lis:
        if li!=0:
            return False
    return True

def max_Index(lis):
    mmax=-999999
    index=0
    for j in range(len(lis)):
        if lis[j]>mmax:
            mmax=lis[j]
            index=j
    return index

def max_Value(lis):
    mmax=-999999
    index=0
    for j in range(len(lis)):
        if lis[j]>mmax:
            mmax=lis[j]
            index=j
    return mmax


def choose_action(state, q_table):
    # This is how to choose an action
    state_actions = q_table[state]
    #print state_actions
    if (np.random.uniform() > EPSILON) or list_Zero(state_actions):  # act non-greedy or state-action have no value
        action_num=np.random.randint(0,len(ACTIONS))
        action_name = ACTIONS[action_num]
        #print action_name
    else:   # act greedy
        action_num=max_Index(state_actions)
        action_name = ACTIONS[action_num]

    return action_name,action_num


def get_feedback(S, A):
    # This is how agent will interact with the environment
    if A == 'right':    # move right
        if S == N_STATES - 2:   # terminate
            S_ = 'terminal'
            R = 1
        else:
            S_ = S + 1
            R = 0
    else:   # move left
        R = 0
        if S == 0:
            S_ = S  # reach the wall
        else:
            S_ = S - 1
    return S_, R


def update(S, episode, step_counter):
    # This is how environment be updated
    env_list = ['-']*(N_STATES-1) + ['T']   # '---------T' our environment
    if S == 'terminal':
        interaction = 'Episode %s: total_steps = %s' % (episode+1, step_counter)
        print '\r{}'.format(interaction)
        time.sleep(1)
        print '\r'                                
    else:
        env_list[S] = 'o'
        interaction = ''.join(env_list)
        print '\r{}'.format(interaction)
        time.sleep(FRESH_TIME)


def RL(method):
    # main part of RL loop
    q_table = build_q_table(N_STATES, ACTIONS)
    print(q_table)
    for episode in range(MAX_EPISODES):
        step_counter = 0
        S = 0
        is_terminated = False
        update(S, episode, step_counter)

        while not is_terminated:
            A,Anum= choose_action(S, q_table)
            S_, R = get_feedback(S, A)  # take action & get next state and reward

            if S_ != 'terminal':
                if method=='Qlearning':
                    q_reward= R + GAMMA * max_Value(q_table[S_])
                elif method=='Sarsa':
                    A_,Anum_= choose_action(S_, q_table)
                    q_reward= R + GAMMA * q_table[S_][Anum_]

            else: 
                q_reward = R     # next state is terminal
                is_terminated = True    # terminate this episode

            q_table[S][Anum] =(1-ALPHA)*q_table[S][Anum]+ALPHA *q_reward 
            #print(q_table)
            S = S_  # move to next state
            update(S, episode, step_counter+1)
            step_counter += 1

    return q_table




if __name__ == "__main__":
    #q_table = RL('Qlearning')
    q_table = RL('Sarsa')
    print('\r\nQ-table:\n')
    print(q_table)

BUAA-XX

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Machine Learning 学习之 Qleaning 学习

学习改编自莫烦Python Qlearning学习教程import numpy as npimport timenp.random.seed(2) # reproducibleN_STATES = 6 # the length of the 1 dimensional worldACTIONS = ['left', 'right'] # available actionsE
复制链接

扫一扫