#include "iostream"
using namespace std;
const int GridCnt = 5;
const int MaxIterations = 1000;
int main()
{
int IterationCnt = 0;
double Gamma = 0.9;
double SelectProbability = 0.25;
double TemporaryValue = 0.0;
double OldValueGrid[GridCnt][GridCnt];
double NewValueGrid[GridCnt][GridCnt];
memset(OldValueGrid, 0, sizeof(double) * GridCnt * GridCnt);
memset(NewValueGrid, 0, sizeof(double) * GridCnt * GridCnt);
int a[20];
memset(a, 10, sizeof(int));
while (IterationCnt <= MaxIterations)
{
for (int Row = 0; Row < GridCnt; Row++)
{
for (int Column = 0; Column < GridCnt; Column++)
{
TemporaryValue = 0.0;
//action = UP
if (Row == 0)
TemporaryValue = TemporaryValue + SelectProbability * (-1 + Gamma * OldValueGrid[Row][Column]);
else
TemporaryValue = TemporaryValue + S
Example 3.8:Gridworld
最新推荐文章于 2022-12-22 16:03:31 发布
这是一个使用C++实现的Gridworld强化学习模型,通过迭代更新每个格子的价值。模型设置了四个可能的动作(上、下、左、右),并在特定状态(State A 和 State B)给出了奖励值。模拟使用了折扣因子Gamma,并在每次迭代中更新价值网格。
摘要由CSDN通过智能技术生成