今天跟着莫烦学了Qlearning
自己手撸了一下
凑活着看
#include <iostream>
#include <time.h>
#include <string.h>
#include <math.h>
#include <cstdlib>
#include <map>
using namespace std;
#define random(x) rand()%(x)
#define terminal -1
const int N_STATE =6;
const int NumOfAction = 2;
const string ACTION[NumOfAction]={"left","right"};
const double EPSILON=0.9;//greedy police
const double ALPHA=0.5;//learning rate
const double LAMBDA=0.9;//discount factor
const int MAX_EPISODES=10;
const double FRESH_TIME=0.3;//fresh time for one move
double Qtable[N_STATE][NumOfAction]={0};
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int maxAction(int state){
int action=0;
for(int i=0;i<NumOfAction;i++)if(Qtable[state][i]>Qtable[state][action])action=i;
return action;
}
int choose_action(int state){
int flag=true,action;
for(int i=0;i<NumOfAction;i++)flag&=(Qtable[state][i]==0);
if((random(10)/10.0>EPSILON)||flag){
action=random(NumOfAction);
}
else{
action=maxAction(state);
}
return action;
}
int get_env_feedback(int *S,int A){//或者用set也可以
int R;
if(A==1){
if(*S==N_STATE-2){
*S=terminal;
R=1;
}
else{
*S=*S+1;
R=0;
}
}
else{
R=0;
if(*S==0){
*S=*S;
}
else{
*S=*S-1;
}
}
return R;
}
void update_env(int S,int episode,int step_counter){
string env="";
for(int i=0;i<N_STATE-1;i++)env+='-';
env+='T';
if(S==terminal){
cout<<"Episode "<<episode+1<<": total_step = "<<step_counter<<endl;
}
else{
env[S]='o';
cout<<"Episode "<<episode+1<<":"<<env<<endl;
}
}
void RL(){
for(int episode=0;episode<MAX_EPISODES;episode++){
int step_counter=0;
int S=0;
bool is_terminated=false;
update_env(S,episode,step_counter);
while(!is_terminated){
int A=choose_action(S);
int S_old=S;
int R=get_env_feedback(&S,A);
double q_predict=Qtable[S_old][A],q_target;
if(S!=terminal){
q_target=R+LAMBDA*Qtable[S][maxAction(S)];
}
else{
q_target=R;
is_terminated=true;
}
Qtable[S_old][A]+=ALPHA*(q_target-q_predict);
//cout<<"A:"<<A<<";S_old:"<<S_old<<";R:"<<R<<";q_predict:"<<q_predict<<";"<<"Qtable[S][A]:"<<Qtable[S][A]<<endl;
update_env(S,episode,step_counter+1);
step_counter++;
}
cout<<"Qtable:"<<endl;
for(int i=0;i<N_STATE;i++){
for(int j=0;j<NumOfAction;j++){
cout<<Qtable[i][j]<<' ';
}
cout<<endl;
}
}
}
int main(int argc, char** argv) {
srand((int)time(0));
memset(Qtable,0,N_STATE*NumOfAction);
cout<<
getchar();
RL();
for(int i=0;i<N_STATE;i++){
for(int j=0;j<NumOfAction;j++){
cout<<Qtable[i][j]<<' ';
}
cout<<endl;
}
return 0;
}