RL-Qlearning

最新推荐文章于 2022-08-28 19:19:48 发布

kuizhao8951

最新推荐文章于 2022-08-28 19:19:48 发布

阅读量253

点赞数

分类专栏：强化学习

本文链接：https://blog.csdn.net/kuizhao8951/article/details/100436141

版权

强化学习专栏收录该内容

13 篇文章 4 订阅

订阅专栏

今天跟着莫烦学了Qlearning

自己手撸了一下

凑活着看

#include <iostream>
#include <time.h>
#include <string.h>
#include <math.h>
#include <cstdlib>
#include <map>
using namespace std;
#define random(x) rand()%(x)
#define terminal -1
const int N_STATE =6;
const int NumOfAction = 2;
const string ACTION[NumOfAction]={"left","right"};
const double EPSILON=0.9;//greedy police
const double ALPHA=0.5;//learning rate
const double LAMBDA=0.9;//discount factor
const int MAX_EPISODES=10;
const double FRESH_TIME=0.3;//fresh time for one move
double Qtable[N_STATE][NumOfAction]={0}; 
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int maxAction(int state){
	int action=0;
	for(int i=0;i<NumOfAction;i++)if(Qtable[state][i]>Qtable[state][action])action=i;
	return action;
}
int choose_action(int state){
	int flag=true,action;
	for(int i=0;i<NumOfAction;i++)flag&=(Qtable[state][i]==0);
	if((random(10)/10.0>EPSILON)||flag){
		action=random(NumOfAction);
	}
	else{
		action=maxAction(state);
	}
	return action;
}
int get_env_feedback(int *S,int A){//或者用set也可以 
	int R;
	if(A==1){
		if(*S==N_STATE-2){
			*S=terminal;
			R=1;
		}
		else{
			*S=*S+1;
			R=0;
		}
	}
	else{
		R=0;
		if(*S==0){
			*S=*S;
		}
		else{
			*S=*S-1;
		}
	}
	return R;
}
void update_env(int S,int episode,int step_counter){
	string env="";
	for(int i=0;i<N_STATE-1;i++)env+='-';
	env+='T';
	if(S==terminal){
		cout<<"Episode "<<episode+1<<": total_step = "<<step_counter<<endl;
	}
	else{
		env[S]='o';
		cout<<"Episode "<<episode+1<<":"<<env<<endl;
	}
}
void RL(){
	for(int episode=0;episode<MAX_EPISODES;episode++){
		int step_counter=0;
		int S=0;
		bool is_terminated=false;
		update_env(S,episode,step_counter);
		while(!is_terminated){
			int A=choose_action(S);
			int S_old=S;
			int R=get_env_feedback(&S,A);
			double q_predict=Qtable[S_old][A],q_target;
			if(S!=terminal){
				q_target=R+LAMBDA*Qtable[S][maxAction(S)];
			}
			else{
				q_target=R;
				is_terminated=true;
			}
			Qtable[S_old][A]+=ALPHA*(q_target-q_predict);
			//cout<<"A:"<<A<<";S_old:"<<S_old<<";R:"<<R<<";q_predict:"<<q_predict<<";"<<"Qtable[S][A]:"<<Qtable[S][A]<<endl;
			update_env(S,episode,step_counter+1);
			step_counter++;
		}
		cout<<"Qtable:"<<endl;
		for(int i=0;i<N_STATE;i++){
			for(int j=0;j<NumOfAction;j++){
				cout<<Qtable[i][j]<<' ';
			}
			cout<<endl;
		}
	}
}
int main(int argc, char** argv) {
	srand((int)time(0));
	memset(Qtable,0,N_STATE*NumOfAction);
	cout<<
	getchar();
	RL();
	for(int i=0;i<N_STATE;i++){
		for(int j=0;j<NumOfAction;j++){
			cout<<Qtable[i][j]<<' ';
		}
		cout<<endl;
	}
    return 0;
}

kuizhao8951

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
RL-Qlearning

今天跟着莫烦学了Qlearning自己手撸了一下凑活着看#include <iostream>#include <time.h>#include <string.h>#include <math.h>#include <cstdlib>#include <map>using namespace std...
复制链接

扫一扫