蒙特卡洛树搜索(MCTS)实现简易五子棋AI

蒙特卡洛树搜索算法可以通过自我对弈模拟得到不同状态分支中获胜的概率,从而获得最优的策略。代码部分可以分为Node类和State类。Node类通过关联父节点和子节点实现树结构,同时保存每个节点的属性;State类描述游戏的规则并判断胜负。

模型结构参考了https://ai-boson.github.io/mcts/

先导入所需要的模块

import numpy as np
import random as rd
import copy
from collections import defaultdict

VECTOR = 7  #棋盘的维度,五子棋一般是15*15,为了减少运算先使用7*7
SIMU_EPSDS = 5000  #自我对弈模拟的次数,越大代表自我对弈的次数越多,速度越慢

创建节点类

class Node():
    def __init__(self, state, parent=None, parent_action=None):  
        self.state = state
        self.parent = parent
        self.parent_action = parent_action
        self.children = []
        self._untried_actions = None
        self._untried_actions = self.untried_actions()

        self._number_of_visits = 0
        self._results = defaultdict(int)  
        self._results[1] = 0
        self._results[-1] = 0

    #q值代表当前节点胜利次数减去失败次数的差值
    def q(self):  
        wins = self._results[1]
        loses = self._results[-1]
        return wins - loses

    #n值代表当前节点的访问次数
    def n(self):  
        return self._number_of_visits
    
    #扩展子节点,即访问该节点的每个子节点 
    def expand(self):    
        action = self._untried_actions.pop()
        next_state = self.state.play(action,0)
        child_node = Node(next_state, parent=self, parent_action=action)
        self.children.append(child_node)
        return child_node
    
    def untried_actions(self):
        untried_actions = copy.deepcopy(self.state.get_legal_actions())
        return untried_actions

    #生成树的逻辑是先判断该节点是否是最终状态,如果不是再判断是否完全扩展,如果不是则继续扩展该节点的子节点,否则,
    #从子结点中随机选择一个作为下一个要扩展节点
    def _tree_policy(self):  
        current_node=self
        while not current_node.is_terminal_node():
            if not current_node.is_fully_expanded():
                return current_node.expand()
            else:
                current_node=current_node.rd_child()
        return current_node
    
    def is_terminal_node(self):
        return self.state.is_game_over()
    
    def is_fully_expanded(self):
        return len(self._untried_actions) == 0
    
    #从所有的子节点(下一状态)中选择一个最优节点
    def best_child(self, c_param=0.1):
        current_state = self.state        
        #UCT算法
        choices_weights = [(c.q() / c.n()) + c_param * np.sqrt((2*np.log(self.n()) / c.n())) for c in self.children]        
        order = current_state.state[0]
        if order==1: #如果当前玩家是先手,则选取权重最大的子节点作为最优action
            return self.children[np.argmax(choices_weights)] 
        else:  #如果当前玩家是后手,则选取权重最小的子节点(先手胜率最低的状态)作为最优action
            return self.children[np.argmin(choices_weights)]
    
    def rd_child(self):
        current_state = self.state
        return rd.choice(self.children)
        #self.children[np.argmax(choices_weights)] 
         
    #自我对弈模拟,从子结点中随机选取action
    def rollout(self):
        current_rollout_state = self.state
        while not current_rollout_state.is_game_over():
            possible_moves = current_rollout_state.get_legal_actions()
            action = self.rollout_policy(possible_moves)
            current_rollout_state = current_rollout_state.play(action,0)
        return current_rollout_state.game_result()
    
    def rollout_policy(self, possible_moves):
        return possible_moves[np.random.randint(len(possible_moves))] #rd.choice(possible_moves)
    
    #向上回溯,将该节点的胜负信息传到父节点
    def backpropagate(self, result):
        self._number_of_visits += 1.
        self._results[result] += 1.
        if self.parent:
            self.parent.backpropagate(result)
    
    #每个节点计算best action都要自我对弈(次数是SIMU_EPSDS)并记录结果,从中选出最优的子节点   
    def best_action(self):
        for i in range(SIMU_EPSDS):
            v=self._tree_policy()
            reward=v.rollout()
            v.backpropagate(reward)
        return self.best_child() 

创建状态类

class State():
    def __init__(self,ini_state):  #初始化时导入棋盘的初始状态
        self.state=ini_state


    def play(self,action,is_expl):#player:1 - attack, -1 - defensive
        order = self.state[0]
        board = copy.deepcopy(self.state[1])
            
        x=action[0]
        y=action[1]
        
        board[x][y]=order
                
        new_state=[-order,board]

        return State(new_state)
    
    def is_game_over(self):
        return self.game_result()!=999

    #判断当前游戏结果,返回结果是1 - 先手赢,-1 - 后手赢,0 - 未分胜负,没有空余点,游戏和棋结束,999 - 未分胜负,继续游戏
    def game_result(self):

        #遍历判断纵向是否五连,前两行和倒数两行不用判断
        for i in range(2,VECTOR-2):
            for j in range(VECTOR):
                if self.state[1][i-2][j]==self.state[1][i-1][j]==self.state[1][i][j]==self.state[1][i+1][j]==self.state[1][i+2][j]!=0:
                    return self.state[1][i][j]
        
        #遍历判断横向是否五连,前两列和倒数两列不用判断
        for i in range(VECTOR):
            for j in range(2,VECTOR-2):
                if self.state[1][i][j-2]==self.state[1][i][j-1]==self.state[1][i][j]==self.state[1][i][j+1]==self.state[1][i][j+1]!=0:
                    return self.state[1][i][j] 
        
        #遍历斜向是否五连,前两行、前两列、倒数两行、倒数两列不用判断
        for i in range(2,VECTOR-2):
            for j in range(2,VECTOR-2):
                if (self.state[1][i][j-2]==self.state[1][i][j-1]==self.state[1][i][j]==self.state[1][i][j+1]==self.state[1][i][j+1]!=0)\
                |\
                (self.state[1][i][j-2]==self.state[1][i][j-1]==self.state[1][i][j]==self.state[1][i][j+1]==self.state[1][i][j+1]!=0):
                    return self.state[1][i][j]
        
        #若未分胜负,遍历是否还有空余落子点,若没有,结果是和棋结束游戏,返回0
        blank_count=0
        for i in range(VECTOR):
            for j in range(VECTOR):
                if (self.state[1][i][j]==0):
                    blank_count+=1
        if blank_count==0:
            return 0
        else: #未分胜负胜负,游戏继续
            return 999 
    
    #获得当前棋盘中所有可落子的点
    def get_legal_actions(self): 
        legal_actions=[] 
        for i in range(VECTOR):
            for j in range(VECTOR):
                if self.state[1][i][j] == 0:
                    legal_actions.append([i,j])
        return legal_actions

主函数,人机对弈模式,可选先后手

if __name__ == '__main__':
    #初始化状态
    state=np.zeros([VECTOR,VECTOR])
    order=1
    state=State([order,state])
    first='player'  #'player' - 玩家先手  'ai' - 玩家后手

    while True:
        if first=='player':
            for i in state.state[1]:
                print(i)

            player_choice=[int(i) for i in input('input the loacation (x,y):').split(',')]
            state=state.play(player_choice,0)
            print('player:',player_choice)
            
            state=State([state.state[0],state.state[1]])
            for i in state.state[1]:                  
                print(i)
        
            tree=Node(state)
            ai_choice=tree.best_action().parent_action
            state=state.play(ai_choice,0)
            state=State([state.state[0],state.state[1]])
            
            print('ai:',ai_choice)

        else:
            state=State([state.state[0],state.state[1]])
            tree=Node(state)
            
            ai_choice=tree.best_action().state.state[1][-1]
            state=state.play(ai_choice,0)

            print('ai:',ai_choice)

            for i in state.state[1]:
                print(i)
            
            player_choice=[int(i) for i in input('input the loacation (x,y):').split(',')]
            state=state.play(player_choice,0)
            print('player:',player_choice)

            for i in state.state[1]:
                print(i)

暂时没有添加游戏结束后选项,游戏结束时会报错,while循环自动终止

好的,以下是一个基于Python实现的基于MCTS和UCT的五子棋游戏AI的代码示例: ``` python import random import math class TreeNode: def __init__(self, state, parent=None): self.state = state self.parent = parent self.children = [] self.visits = 0 self.score = 0 def UCT(node): C = 1.4 if node.visits == 0: return float('inf') return (node.score / node.visits) + C * math.sqrt(math.log(node.parent.visits) / node.visits) def MCTS(state, iterations): root = TreeNode(state) for i in range(iterations): node = root # selection while node.children: node = max(node.children, key=UCT) # expansion if node.visits > 0: moves = node.state.get_moves() for move in moves: if move not in [c.state.last_move for c in node.children]: child_state = node.state.apply_move(move) child_node = TreeNode(child_state, node) node.children.append(child_node) # simulation sim_node = node while sim_node.children: sim_node = random.choice(sim_node.children) score = simulate(sim_node.state) # backpropagation while node: node.visits += 1 node.score += score node = node.parent return max(root.children, key=lambda c: c.visits).state.last_move def simulate(state): player = state.get_current_player() while not state.is_terminal(): move = random.choice(state.get_moves()) state = state.apply_move(move) player = state.get_current_player() if state.get_winner() == player: return 1 elif state.get_winner() == None: return 0.5 else: return 0 class Board: def __init__(self, width=15, height=15, win_length=5): self.width = width self.height = height self.win_length = win_length self.board = [[None for y in range(height)] for x in range(width)] self.last_move = None def get_moves(self): moves = [] for x in range(self.width): for y in range(self.height): if self.board[x][y] == None: moves.append((x, y)) return moves def apply_move(self, move): x, y = move player = self.get_current_player() new_board = Board(self.width, self.height, self.win_length) new_board.board = [row[:] for row in self.board] new_board.board[x][y] = player new_board.last_move = move return new_board def get_current_player(self): if sum(row.count(None) for row in self.board) % 2 == 0: return "X" else: return "O" def is_terminal(self): if self.get_winner() != None: return True for x in range(self.width): for y in range(self.height): if self.board[x][y] == None: return False return True def get_winner(self): for x in range(self.width): for y in range(self.height): if self.board[x][y] == None: continue if x + self.win_length <= self.width: if all(self.board[x+i][y] == self.board[x][y] for i in range(self.win_length)): return self.board[x][y] if y + self.win_length <= self.height: if all(self.board[x][y+i] == self.board[x][y] for i in range(self.win_length)): return self.board[x][y] if x + self.win_length <= self.width and y + self.win_length <= self.height: if all(self.board[x+i][y+i] == self.board[x][y] for i in range(self.win_length)): return self.board[x][y] if x + self.win_length <= self.width and y - self.win_length >= -1: if all(self.board[x+i][y-i] == self.board[x][y] for i in range(self.win_length)): return self.board[x][y] return None def __str__(self): return "\n".join(" ".join(self.board[x][y] or "-" for x in range(self.width)) for y in range(self.height)) if __name__ == "__main__": board = Board() while not board.is_terminal(): if board.get_current_player() == "X": x, y = map(int, input("Enter move (x y): ").split()) board = board.apply_move((x, y)) else: move = MCTS(board, 1000) print("AI move:", move) board = board.apply_move(move) print(board) print("Winner:", board.get_winner()) ``` 该代码定义了一个 `TreeNode` 类来保存节点的状态和统计信息,实现了基于UCB公式的UCT算法和基于MCTS和UCT的五子棋AI。同时,代码还定义了一个 `Board` 类来表示五子棋游戏的状态和规则,并实现了判断胜负、获取可行落子位置等方法。在 `__main__` 函数中,代码通过交替输入玩家落子位置和调用AI选择落子位置的方式,实现了人机对战的功能。 希望这个代码对你有所帮助!
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值