井字棋UCT

#include "stdafx.h"
#include <cstdio>
#include <iostream>
#include <time.h>
#include <cstdlib>
#include <math.h>
#define SIM_MAX 50
#define SIM_MIN 30
#define INF 100000
#define UCB_K 1
#define Time 1
using namespace std;
int map[4][4] = {0}, copy_map[4][4], ans = 0;
struct tree {
 tree*father = nullptr;;
 tree*brother = nullptr;
 tree*son = nullptr;
 int player = 1;
 int visit = 0;
 int win = 0;
 double UCB = INF;
 int u, v;
};
struct node {
 int x, y;
};
void Copymap() {
 for (int i = 0; i < 4; ++i)
  for (int j = 0; j < 4; ++j)
   copy_map[i][j] = map[i][j];
}
tree*build_tree(tree*Now_tree, int u, int v) {
 tree*head = new tree;
 head->u = u;
 head->v = v;
 head->player = 1;
 map[u][v] = 1;
 return head;
}
int check(int u, int v, int num) {
 int mark1, mark2;
 mark1 = mark2 = 0;
 if (u == v && copy_map[0][0] == num && copy_map[1][1] == num && copy_map[2][2] == num && copy_map[3][3] == num)
  return 1;
 if (u == 3 - v && copy_map[0][3] == num && copy_map[1][2] == num && copy_map[2][1] == num && copy_map[3][0] == num)
  return 1;
 for (int i = 0; i < 4; ++i) {
  if (copy_map[i][v] == num)
   mark1++;
  if (copy_map[u][i] == num)
   mark2++;
 }
 if (mark1 == 4 || mark2 == 4)
  return 1;
 else
  return 0;
}
//判断输赢
void search_move(tree*Now_tree) {
 int i, j, Count = 0;
 for (int i = 0; i < 4; ++i)
  for (int j = 0; j < 4; ++j)
   if (!copy_map[i][j]) {
    tree*q = new tree;
    q->u = i; q->v = j;
    q->player = 3 - Now_tree->player;
    q->father = Now_tree;
    if (Now_tree->son) {
     tree*s = Now_tree->son;
     while (s->brother)
      s = s->brother;
     s->brother = q;
    }
    else
     Now_tree->son = q;
   }
}
//建立子节点
node RandomSelect(int now_player)
{
 int u, v;
 node q;
 srand((unsigned)time(NULL));//种子生成器
 while (1) {
  u = rand() % (4);
  v = rand() % (4);
  if (copy_map[u][v] == 0)//判断此边是否选过
  {
   copy_map[u][v] = now_player;
   q.x = u;
   q.y = v;
   return q;
  }
 }
}
int MCT(int player, int now_player) {//computer is player
 node q;
 int Count = 0, mark = 0;
 for (int i = 0; i < 4; ++i)
  for (int j = 0; j < 4; ++j)
   if (copy_map[i][j])
    Count++;
 if (Count == 16)
  return 0;
 while (1) {
  q = RandomSelect(now_player);
  Count++;
  if (check(q.x, q.y, now_player)) {
   if (now_player == player)
    return 1;
   else
    return -1;
  }
  else
   now_player = 3 - now_player;
  if (Count == 16)
   break;
 }
 return 0;
}
tree*FoundMaxUCB(tree*Now_tree) {
 tree*q = Now_tree->son;
 tree*Max = nullptr;
 double max_UCB = -INF;
 while (q) {
  if (q->UCB > max_UCB) {
   max_UCB = q->UCB;
   Max = q;
  }
  q = q->brother;
 }
 return Max;
}
void UCT(tree*head) {
 int Win_flag = 0, is_Win;
 tree*q;
 search_move(head);
 int start_Time, end_Time;//开始时间,结束时间
 start_Time = end_Time = clock();
 while ((end_Time - start_Time) / CLOCKS_PER_SEC <= Time) {
  int a = (end_Time - start_Time) / CLOCKS_PER_SEC;
  q = FoundMaxUCB(head);
  copy_map[q->u][q->v] = q->player;
  if (q->UCB == INF * 2|| check(q->u, q->v, q->player)) //已经判胜
  {
   q->UCB = INF * 2; //无穷大
   q->win++;
   q->visit++;
   Win_flag = q->player; //胜者设置成q玩家
   while (1) //回溯
   {
    q = q->father;
    q->visit++;
    if (q->player == Win_flag)
     q->win++; //颜色相同,即可胜场数+1
    else if (Win_flag != 0)
     q->win--;
    if (q->father == nullptr)
     break;
    q->UCB = (double)q->win / (double)q->visit + sqrt(2 * UCB_K*log(q->father->visit) / q->visit);
   }
   Copymap();
   head = q;
  }
  else if (q->visit < SIM_MAX) {
   Win_flag = 0; 
   
   is_Win = MCT(3 - q->player, q->player);
   ans++;
   if (is_Win == 1) { //进行一次随机模拟+判断胜负
    q->win++;
   }
   else if (is_Win == -1) {
    q->win--;
    Win_flag = 3 - q->player;
   }
   q->visit++;
       if (q->visit >= SIM_MIN) //保护机制消失,走不走此节点就看其rate大小
    q->UCB = (double)q->win / (double)q->visit + sqrt(2 * UCB_K*log(q->father->visit) / q->visit);
   int start_time1 = clock(), end_time1;
   while (1) //回溯
   {
    q = q->father;
    q->visit++;
    if (q->player == Win_flag)
     q->win++; //颜色相同,即可胜场数+1
    else if (Win_flag != 0)
     q->win--;
    if (q->father == nullptr)
     break;
    q->UCB = (double)q->win / (double)q->visit + sqrt(2 * UCB_K*log(q->father->visit) / q->visit);
   }
   head = q;
   Copymap();
  }
  else if (q->visit >= SIM_MAX) {
   if (q->son)       //子节点不为空,指针下移
   {
    head = q;
   }
   else if (q->son == NULL)     //该节点的子节点为空,则扩展子节点
   {
    search_move(q); //扩展子节点
    if (q->son == NULL)      //扩展子节点失败
    {
     q->visit++;
     while (q->father != NULL){
      q = q->father;
      q->visit ++;
     }
                   Copymap();
    }
    else         //扩展子节点成功,指针下移
    {
     head = q;
    }
   }
  }
  end_Time = clock();
 }
 while (head->father) //回溯
  head = head->father;
 Copymap();
 q = FoundMaxUCB(head);
 copy_map[q->u][q->v] = q->player;
 head->son = q;
    q->brother = nullptr;
}
int main(){
 int u, v;
 tree*Now_tree = new tree;
  while (cin >> u >> v) {
  Now_tree->u = u;
  Now_tree->v = v;
  Now_tree->father = nullptr;
  Now_tree->son = nullptr;
  Now_tree->player = 1;
  map[u][v] = 1;
  Copymap();
  if (check(u, v, 1)) {
   printf("You win!");
   break;
  }
  else {
   UCT(Now_tree);
   Now_tree = Now_tree->son;
   printf("%d %d\n", Now_tree->u, Now_tree->v);
   map[Now_tree->u][Now_tree->v] = 2;
   Copymap();
   printf("%d\n", ans);
   if (check( Now_tree->u, Now_tree->v, 2)) {
    printf("Computer wins!");
   }
  }
 }
 return 0;
}


  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值