import numpy as np
import pandas as pd
import time
#
np.random.seed(2)# 再生成一个随机数种子
N_STATES = 6 # 状态数
ACTIONS = ["left", "right"] # 动作
EPCILON = 0.9# 贪婪度
ALPHA = 0.1# 学习率
GAMMA = 0.9# 奖励递减值
MAX_EPISODE = 13# 最大回合数
FRESH_TIME = 0.1# 更新间隔
def build_q_table(n_states, actions):#建立q表
table = pd.DataFrame(np.zeros((n_states, len(actions))), columns=actions)#初始化q表
# print(table)
return table
def choose_action(state, table):#选择动作
actions = table.iloc[state, :]
if np.random.uniform() > EPCILON or actions.all() == 0:
action = np.random.choice(ACTIONS)
else:
action = actions.idxmax()
return action
def env_feedback(S, A):#环境反馈
if A == "right":
if S == N_STATES - 2:
S_ = "terminal"
reward = 1
else:
S_ = S + 1
reward = 0
else:
if S == 0:
S_ = S
else:
S_ = S - 1
reward = 0
return S_, reward
def print_env(S, episode, step_counter):#打印环境
env_list = ['-'] * (N_STATES - 1) + ['T']
if S == 'terminal':
interaction = 'Episode %s: total_steps = %s' % (episode + 1, step_counter)
print('\r{}'.format(interaction), end='')
time.sleep(2)
print('\r ', end='')
else:
env_list[S] = 'o'
interaction = ''.join(env_list)
print('\r{}'.format(interaction), end='')
time.sleep(FRESH_TIME)
def q_learning():#q学习
q_table = build_q_table(N_STATES, ACTIONS)#建立q表
for episode in range(MAX_EPISODE):#执行最大次数
is_terminated = False
step_counter = 0
S = 0
print_env(S, episode, step_counter)
while not is_terminated:
A = choose_action(S, q_table)#选择动作
S_, r = env_feedback(S, A)#环境反馈
q_predict = q_table.loc[S, A]
if S_ == "terminal":#如果终止状态
q_target = r
is_terminated = True
print(q_table)
print('第'+str(episode+1)+'次')
print('总共'+str(step_counter+1)+'步')
# print( )
else:#如果不是终止状态
q_target = r + GAMMA * q_table.iloc[S_, :].max()#计算q值
print('Q值为:'+str(q_target))#打印q值
print('S_max值为:' + str(q_table.iloc[S_, :].max())) # 打印q值
q_table.loc[S, A] += ALPHA * (q_target - q_predict)#更新q表
S = S_
step_counter += 1
print_env(S, episode, step_counter)
print('\n'+'走了第'+str(step_counter)+'步')
print('跟新后Q表:'+'\n'+str(q_table))
return q_table
if __name__ == "__main__":#主函数
q_table = q_learning()
# print(q_table)
寻宝 Q-learning改版,显示Q表更新,注解版
最新推荐文章于 2023-06-07 17:36:27 发布