C# Qlearning 与SARSA（一）

最新推荐文章于 2024-08-24 15:52:22 发布

Donneyming

最新推荐文章于 2024-08-24 15:52:22 发布

阅读量63

点赞数

文章标签： c# java 开发语言

本文链接：https://blog.csdn.net/Donneyming/article/details/134349280

版权

本文介绍了使用Q-learning算法在六间房间的环境中进行决策，通过迭代更新Q值表，以达到从起始房间移动到目标房间的目标。算法展示了探索与利用的平衡，以及如何在测试阶段基于训练得到的Q值选择动作。

摘要由CSDN通过智能技术生成

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace RL

{
class Qlearning
{
int start_room;
int current_state;
int current_action;
int current_action_point;
int next_state;
int step;
double next_state_max_q;

Random rd;
int ACTIONS;
//探索次数
int episode;
//目标状态，即：移动到 5 号房间。
int target_state;
//γ，折损率，取值是 0 到 1 之间。
double gamma;

public Qlearning()
{
rd = new Random();
ACTIONS = 6;
episode = 1000;
target_state = 5;
gamma = 0.8;
start_room = rd.Next() % 5;
}

int[,] r = new int[6, 6]{
{ -1, -1, -1, -1, 0, -1},
{-1, -1, -1, 0, -1, 100},
{ -1, -1, -1, 0, -1, -1},
{-1, 0, 0, -1, 0, -1},
{0, -1, -1, 0, -1, 100},
{ -1, 0, -1, -1, 0, 100 }

};
double[][] q = new double[6][] {
new double[6],
new double[6],
new double[6],
new double[6],
new double[6],
new double[6]
};

public double maxReturn(double[] tmp)
{
double ret = 0;
for (int i = 0; i < tmp.Length; i++)
{
if (tmp[i] > ret)
ret = tmp[i];
}
return ret;
}

public void updateQ()
{
for (int c = 0; c < episode; c++)
{
current_state = start_room;
while (current_state != target_state)
{
current_action = rd.Next() % 6;
current_action_point = r[current_state, current_action];
if (current_action_point < 0)
{
q[current_state][current_action] = current_action_point;
}
else
{
// Q(1, 5) = 100 + 0.8 * Max[Q(5, 1), Q(5, 4), Q(5, 5)] = 100 + 0.8 * Max{ 0, 0, 0} = 100
next_state = current_action;
next_state_max_q = maxReturn(q[next_state]);
q[current_state][current_action] = current_action_point + gamma * next_state_max_q;
current_state = next_state;
}
}

Console.WriteLine("Q值表更新过程为：\n");
for (int i = 0; i < 6; i++)
{
for (int j = 0; j < 6; j++)
{
Console.Write("{0},", q[i][j]);
}
Console.WriteLine("\n");
}
}

Console.WriteLine(" 最终的Q值表为：\n");
for (int i = 0; i < 6; i++)
{
for (int j = 0; j < 6; j++)
{
Console.Write("{0},", q[i][j]);
}
Console.WriteLine("\n");
}
}

public int testQ()
{

Console.WriteLine("start_room is {0}", start_room);
current_state = start_room;
step = 0;

// 这里是进行测试，依据训练好的Q值，选择动作。
while (current_state != target_state)
{
// 这里相当于python中的 np.argmax()函数，即当 np.argmax(f(x))中的f(x)取最大值的时候输出 x 的取值大小。
// 用两个for循环实现。
double m = -100;
for (int i = 0; i < 6; ++i)
{
if (m < q[current_state][i])
{//找最大值
m = q[current_state][i];
m = q[current_state][i];
}
}
for (int j = 0; j < 6; j++)
{
if (m == q[current_state][j])
next_state = j;
}

Console.WriteLine("\n Agent 由 {0} 号房间移动到了 {1} 号房间\n", current_state, next_state);
current_state = next_state;
step += 1;
}
Console.WriteLine("\n Agent 在 {0} 号房间开始移动了 {1} 步到达了目标房间\n", start_room, step);

return 0;
}
/// <summary>
///
/// </summary>
/// <param name="args"></param>

}
}

Donneyming

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
C# Qlearning 与SARSA（一）

Console.WriteLine("\n Agent 由 {0} 号房间移动到了 {1} 号房间\n", current_state, next_state);Console.WriteLine("\n Agent 在 {0} 号房间开始移动了 {1} 步到达了目标房间\n", start_room, step);// 这里相当于python中的 np.argmax()函数，即当 np.argmax(f(x))中的f(x)取最大值的时候输出 x 的取值大小。//γ，折损率，取值是 0 到 1 之间。
复制链接

扫一扫