智能体路径规划

最新推荐文章于 2024-08-09 07:37:35 发布

华丽邂逅_

最新推荐文章于 2024-08-09 07:37:35 发布

阅读量5.2k

点赞数 5

分类专栏： C++，强化学习算法文章标签： Reinforcement Learni Q-Learning Q学习智能体规划路径强化学习

本文链接：https://blog.csdn.net/Liangsm_/article/details/78464907

版权

本文通过C++和Python实现Q-learning算法，探讨智能体如何在马尔可夫决策过程中进行路径规划。通过模拟环境，智能体在探索与利用过程中逐渐学习到最优路径，最终达到目标房间。代码实现包括探索过程的记录和利用过程的可视化。

摘要由CSDN通过智能技术生成

C++实现：

代码如下：

#include <iostream>
#include <time.h>
#include <cstdlib>
#include <vector>
#include <algorithm>
#include <iterator>
#include <fstream>

using namespace std;

ofstream out_explore("d:\\explore.txt");
ofstream out_use("d:\\use.txt");

void access(int state, int q[6][6], int r[6][6])
{
vector<int> q_action;
q_action.push_back(state+1);
out_use<<"Start from : "<<state+1<<endl; //
while(state != 5)
{
int act = 0;
int max_state = q[state][0];
for(int action = 1; action < 6; ++action)
{
if(q[state][action] > max_state)
{
max_state = q[state][action];
act = action;
}
}
q_action.push_back(act+1);
state = act;
out_use<<"The most reward is from action "<<act+1 <<" ,and it is "<<max_state<<"."<<endl;
}
if(state == 5)
out_use<<"Reach the goal room!"<<endl;
copy(q_action.begin(), q_action.end(), ostream_iterator<int>(cout," -> "));
cout<<"ok"<<endl;
}

int special(int in_state, float gamma, int q[6][6], int r[6][6])
{
vector<int> r_action;
for(int action=0; action<6; ++action)
{
if(r[in_state][action] >=0 )
r_action.push_back(action);
}
int m = rand() % (r_action.size());
int next_state = r_action[m];
int max = q[next_state][0];
for(int i = 1; i < 6; ++i)
{
if(max < q[next_state][i])
max = q[next_state][i];
}
q[in_state][next_state] = r[in_state][next_state] + gamma * max;
return next_state;
}

void qlearning(float gamma, int r[6][6], int q[6][6])
{
//获得随机进入的门
int in_state = (rand() % 5); //产生0-5的随机数
out_explore<<"★ "<<in_state+1;
while (in_state != 5)
{
in_state = special(in_state, gamma, q, r);
out_explore<<"->"<<in_state+1;
if(in_state == 5)
{
special(in_state, gamma, q, r);
}
}
out_explore<<endl;
}

int main()
{
int q[6][6]= {0};
int r[6][6]=
{
{-1, 0, 0, 0,-1, -1},
{0, -1, -1, 0, -1, -1},
{0, -1, -1, -1, -1, -1},
{0, 0, -1, -1, 0, 100},
{-1, -1, -1, 0, -1, 100},
{-1, -1, -1, 0, 0, 100}
};

for(int i=0; i<100; i++) //执行多次qleaning函数，更新 q 数组
qlearning(0.8, r, q);

int max=0;