用蒙特卡洛树搜索(UCT)算法解决黑白棋问题

游戏规则:

1. 棋盘大小为8*8,横向从A到H,纵向从1到8。棋局开始时黑棋位于E4和D5,白棋位于D4和E5

2. 黑方先行,双方交替下棋

3. 在空格处落子后,必须能翻转对方的棋子才行

4. 新落子与已有同色棋子间被夹住的对方棋子翻转颜色,包括横夹、竖夹、斜夹。夹住的位置上必须全部是对方棋子,不能有空格。

5. 除非翻转了对手至少一个棋子,否则不能落子,只能弃权,由对手继续落子。

6. 结束条件为棋盘填满或双方都无法落子,棋子多者胜。

代码示例:

from func_timeout import func_timeout, FunctionTimedOut
import datetime
import random
from math import log, sqrt
from time import time
from copy import deepcopy

class ReversiBoard(object):
    def __init__(self):
        self.board_init()
    def board_init(self):
        self.empty = '.'
        self._board = [[self.empty for _ in range(8)] for _ in range(8)]
        self._board[3][4], self._board[4][3] = 'X', 'X'
        self._board[3][3], self._board[4][4] = 'O', 'O'
    def display(self, step_time=None, total_time=None):
        board = self._board
        print(' ', ' '.join(list('ABCDEFGH')))
        for i in range(8):
            print(str(i + 1), ' '.join(board[i]))
        if (not step_time) or (not total_time):
            step_time = {"X": 0, "O": 0}
            total_time = {"X": 0, "O": 0}
            print("统计棋局: 棋子总数 / 每一步耗时 / 总时间 ")
            print("黑   棋: " + str(self.count('X')) + ' / ' + str(step_time['X']) + ' / ' + str(total_time['X']))
            print("白   棋: " + str(self.count('O')) + ' / ' + str(step_time['O']) + ' / ' + str(total_time['O']) + '\n')
        else:
            print("统计棋局: 棋子总数 / 每一步耗时 / 总时间 ")
            print("黑   棋: " + str(self.count('X')) + ' / ' + str(step_time['X']) + ' / ' + str(total_time['X']))
            print("白   棋: " + str(self.count('O')) + ' / ' + str(step_time['O']) + ' / ' + str(total_time['O']) + '\n')
    def count(self, color):
        count = 0
        for y in range(8):
            for x in range(8):
                if self._board[x][y] == color:
                    count += 1
        return count
    def get_winner(self):
        black_count, white_count = 0, 0
        for i in range(8):
            for j in range(8):
                if self._board[i][j] == 'X':
                    black_count += 1
                if self._board[i][j] == 'O':
                    white_count += 1
        if black_count > white_count:
            return 0, black_count - white_count
        elif black_count < white_count:
            return 1, white_count - black_count
        elif black_count == white_count:
            return 2, 0
    def _move(self, action, color):
        if isinstance(action, str):
            action = self.board_num(action)
        fliped = self._can_fliped(action, color)
        if fliped:
            for flip in fliped:
                x, y = self.board_num(flip)
                self._board[x][y] = color
            x, y = action
            self._board[x][y] = color
            return fliped
        else:
            return False
    def backpropagation(self, action, flipped_pos, color):
        if isinstance(action, str):
            action = self.board_num(action)
        self._board[action[0]][action[1]] = self.empty
        op_color = "O" if color == "X" else "X"

        for p in flipped_pos:
            if isinstance(p, str):
                p = self.board_num(p)
            self._board[p[0]][p[1]] = op_color
    def is_on_board(self, x, y):
        return x >= 0 and x <= 7 and y >= 0 and y <= 7
    def _can_fliped(self, action, color):
        if isinstance(action, str):
            action = self.board_num(action)
        xstart, ystart = action
        if not self.is_on_board(xstart, ystart) or self._board[xstart][ystart] != self.empty:
            return False
        self._board[xstart][ystart] = color
        op_color = "O" if color == "X" else "X"
        flipped_pos = []
        flipped_pos_board = []
        for xdirection, ydirection in [[0, 1], [1, 1], [1, 0], [1, -1], [0, -1], [-1, -1], [-1, 0], [-1, 1]]:
            x, y = xstart, ystart
            x += xdirection
            y += ydirection
            if self.is_on_board(x, y) and self._board[x][y] == op_color:
                x += xdirection
                y += ydirection
                if not self.is_on_board(x, y):
                    continue
                while self._board[x][y] == op_color:
                    x += xdirection
                    y += ydirection
                    if not self.is_on_board(x, y):
                        break
                if not self.is_on_board(x, y):
                    continue
                if self._board[x][y] == color:
                    while True:
                        x -= xdirection
                        y -= ydirection
                        if x == xstart and y == ystart:
                            break
                        flipped_pos.append([x, y])
        self._board[xstart][ystart] = self.empty
        if len(flipped_pos) == 0:
            return False
        for fp in flipped_pos:
            flipped_pos_board.append(self.num_board(fp))
        return flipped_pos_board
    def get_legal_actions(self, color):
        direction = [(-1, 0), (-1, 1), (0, 1), (1, 1), (1, 0), (1, -1), (0, -1), (-1, -1)]
        op_color = "O" if color == "X" else "X"
        op_color_near_points = []
        board = self._board
        for i in range(8):
            for j in range(8):
                if board[i][j] == op_color:
                    for dx, dy in direction:
                        x, y = i + dx, j + dy
                        if 0 <= x <= 7 and 0 <= y <= 7 and board[x][y] == self.empty and (
                                x, y) not in op_color_near_points:
                            op_color_near_points.append((x, y))
        l = [0, 1, 2, 3, 4, 5, 6, 7]
        for p in op_color_near_points:
            if self._can_fliped(p, color):
                if p[0] in l and p[1] in l:
                    p = self.num_board(p)
                yield p
    def board_num(self, action):
        row, col = str(action[1]).upper(), str(action[0]).upper()
        if row in '12345678' and col in 'ABCDEFGH':
            x, y = '12345678'.index(row), 'ABCDEFGH'.index(col)
            return x, y
    def num_board(self, action):
        row, col = action
        l = [0, 1, 2, 3, 4, 5, 6, 7]
        if col in l and row in l:
            return chr(ord('A') + col) + str(row + 1)
class Game(object):
    def __init__(self, black_player, white_player):
        self.game_init()
    def game_init(self):
        self.board = ReversiBoard()
        self.current_player = None
        self.black_player = black_player
        self.white_player = white_player
        self.black_player.color = "X"
        self.white_player.color = "O"
    def switch_player(self, black_player, white_player):
        if self.current_player is None:
            return black_player
        else:
            if self.current_player == self.black_player:
                return white_player
            else:
                return black_player
    def print_winner(self, winner):
        print(['黑棋获胜!', '白棋获胜!', '平局'][winner])
    def force_loss(self, is_timeout=False, is_board=False, is_legal=False):
        if self.current_player == self.black_player:
            win_color = '白棋 - O'
            loss_color = '黑棋 - X'
            winner = 1
        else:
            win_color = '黑棋 - X'
            loss_color = '白棋 - O'
            winner = 0
        if is_timeout:
            print('\n{} 思考超过 60s, {} 胜'.format(loss_color, win_color))
        if is_legal:
            print('\n{} 落子 3 次不符合规则,故 {} 胜'.format(loss_color, win_color))
        if is_board:
            print('\n{} 擅自改动棋盘判输,故 {} 胜'.format(loss_color, win_color))
        diff = 0
        return winner, diff
    def run(self):
        total_time = {"X": 0, "O": 0}
        step_time = {"X": 0, "O": 0}
        winner = None
        diff = -1
        print('\n=====开始游戏!=====\n')
        self.board.display(step_time, total_time)
        while True:
            self.current_player = self.switch_player(self.black_player, self.white_player)
            start_time = datetime.datetime.now()
            color = "X" if self.current_player == self.black_player else "O"
            legal_actions = list(self.board.get_legal_actions(color))
            if len(legal_actions) == 0:
                if self.game_over():
                    winner, diff = self.board.get_winner()
                    break
                else:
                    continue
            board = deepcopy(self.board._board)
            try:
                for i in range(0, 3):
                    action = func_timeout(60, self.current_player.get_move,kwargs={'board': self.board})
                    if action == "Q":
                        break
                    if action not in legal_actions:
                        print("你落子不符合规则,请重新落子!")
                        continue
                    else:
                        break
                else:
                    winner, diff = self.force_loss(is_legal=True)
                    break
            except FunctionTimedOut:
                winner, diff = self.force_loss(is_timeout=True)
                break
            end_time = datetime.datetime.now()
            if board != self.board._board:
                winner, diff = self.force_loss(is_board=True)
                break
            if action == "Q":
                winner, diff = self.board.get_winner()
                break
            if action is None:
                continue
            else:
                es_time = (end_time - start_time).seconds
                if es_time > 60:
                    print('\n{} 思考超过 60s'.format(self.current_player))
                    winner, diff = self.force_loss(is_timeout=True)
                    break
                self.board._move(action, color)
                if self.current_player == self.black_player:
                    step_time["X"] = es_time
                    total_time["X"] += es_time
                else:
                    step_time["O"] = es_time
                    total_time["O"] += es_time
                self.board.display(step_time, total_time)
                if self.game_over():
                    winner, diff = self.board.get_winner()
                    break
        print('\n=====游戏结束!=====\n')
        self.board.display(step_time, total_time)
        self.print_winner(winner)
        if winner is not None and diff > -1:
            result = {0: 'black_win', 1: 'white_win', 2: 'draw'}[winner]
    def game_over(self):
        b_list = list(self.board.get_legal_actions('X'))
        w_list = list(self.board.get_legal_actions('O'))
        is_over = len(b_list) == 0 and len(w_list) == 0
        return is_over
class HumanPlayer:
    def __init__(self, color):
        self.color = color
    def get_move(self, board):
        if self.color == "X":
            player = "黑棋"
        else:
            player = "白棋"
        while True:
            action = input("请'{}-{}'方输入一个合法的坐标(e.g. 'D3',若不想进行,请务必输入'Q'结束游戏。): ".format(player,self.color))
            if action == "Q" or action == 'q':
                return "Q"
            else:
                row, col = action[1].upper(), action[0].upper()
                if row in '12345678' and col in 'ABCDEFGH':
                    if action in board.get_legal_actions(self.color):
                        return action
                else:
                    print("你的输入不合法,请重新输入!")
def oppo(color):
    if color == 'X':
        return 'O'
    return 'X'
class TreeNode():
    def __init__(self, parent, color):
        self.parent = parent
        self.w = 0
        self.n = 0
        self.color = color
        self.child = dict()
class SilentGame(object):
    def __init__(self, black_player, white_player, board=ReversiBoard(), current_player=None):
        self.board = deepcopy(board)
        self.current_player = current_player
        self.black_player = black_player
        self.white_player = white_player
        self.black_player.color = "X"
        self.white_player.color = "O"
    def switch_player(self, black_player, white_player):
        if self.current_player is None:
            return black_player
        else:
            if self.current_player == self.black_player:
                return white_player
            else:
                return black_player
    def print_winner(self, winner):
        print(['黑棋获胜!', '白棋获胜!', '平局'][winner])
    def force_loss(self, is_timeout=False, is_board=False, is_legal=False):
        if self.current_player == self.black_player:
            win_color = '白棋 - O'
            loss_color = '黑棋 - X'
            winner = 1
        else:
            win_color = '黑棋 - X'
            loss_color = '白棋 - O'
            winner = 0
        if is_timeout:
            print('\n{} 思考超过 60s, {} 胜'.format(loss_color, win_color))
        if is_legal:
            print('\n{} 落子 3 次不符合规则,故 {} 胜'.format(loss_color, win_color))
        if is_board:
            print('\n{} 擅自改动棋盘判输,故 {} 胜'.format(loss_color, win_color))
        diff = 0
        return winner, diff
    def run(self):
        total_time = {"X": 0, "O": 0}
        step_time = {"X": 0, "O": 0}
        winner = None
        diff = -1
        while True:
            self.current_player = self.switch_player(self.black_player, self.white_player)
            start_time = datetime.datetime.now()
            color = "X" if self.current_player == self.black_player else "O"
            legal_actions = list(self.board.get_legal_actions(color))
            if len(legal_actions) == 0:
                if self.game_over():
                    winner, diff = self.board.get_winner()
                    break
                else:
                    continue
            action = self.current_player.get_move(self.board)
            if action is None:
                continue
            else:
                self.board._move(action, color)
                if self.game_over():
                    winner, diff = self.board.get_winner()
                    break
        return winner, diff
    def game_over(self):
        b_list = list(self.board.get_legal_actions('X'))
        w_list = list(self.board.get_legal_actions('O'))
        is_over = len(b_list) == 0 and len(w_list) == 0
        return is_over
class RoxannePlayer(object):
    def __init__(self, color):
        self.roxanne_table = [
            ['A1', 'H1', 'A8', 'H8'],
            ['C3', 'F3', 'C6', 'F6'],
            ['C4', 'F4', 'C5', 'F5', 'D3', 'E3', 'D6', 'E6'],
            ['A3', 'H3', 'A6', 'H6', 'C1', 'F1', 'C8', 'F8'],
            ['A4', 'H4', 'A5', 'H5', 'D1', 'E1', 'D8', 'E8'],
            ['B3', 'G3', 'B6', 'G6', 'C2', 'F2', 'C7', 'F7'],
            ['B4', 'G4', 'B5', 'G5', 'D2', 'E2', 'D7', 'E7'],
            ['B2', 'G2', 'B7', 'G7'],
            ['A2', 'H2', 'A7', 'H7', 'B1', 'G1', 'B8', 'G8']
        ]
        self.color = color
    def roxanne_select(self, board):
        action_list = list(board.get_legal_actions(self.color))
        if len(action_list) == 0:
            return None
        else:
            for move_list in self.roxanne_table:
                random.shuffle(move_list)
                for move in move_list:
                    if move in action_list:
                        return move
    def get_move(self, board):
        if self.color == 'X':
            player_name = '黑棋'
        else:
            player_name = '白棋'
        action = self.roxanne_select(board)
        return action
class AIPlayer(object):
    def __init__(self, color, time_limit=2):
        self.time_limit = time_limit
        self.tick = 0
        self.sim_black = RoxannePlayer('X')
        self.sim_white = RoxannePlayer('O')
        self.color = color
    def mcts(self, board):
        root = TreeNode(None, self.color)
        while time() - self.tick < self.time_limit - 1:
            sim_board = deepcopy(board)
            choice = self.select(root, sim_board)
            self.expand(choice, sim_board)
            winner, diff = self.simulate(choice, sim_board)
            back_score = [1, 0, 0.5][winner]
            if choice.color == 'X':
                back_score = 1 - back_score
            self.back_prop(choice, back_score)
        best_n = -1
        best_move = None
        for k in root.child.keys():
            if root.child[k].n > best_n:
                best_n = root.child[k].n
                best_move = k
        return best_move
    def select(self, node, board):
        if len(node.child) == 0:
            return node
        else:
            best_score = -1
            best_move = None
            for k in node.child.keys():
                if node.child[k].n == 0:
                    best_move = k
                    break
                else:
                    N = node.n
                    n = node.child[k].n
                    w = node.child[k].w
                    score = w / n + sqrt(2 * log(N) / n)
                    if score > best_score:
                        best_score = score
                        best_move = k
            board._move(best_move, node.color)
            return self.select(node.child[best_move], board)
    def expand(self, node, board):
        for move in board.get_legal_actions(node.color):
            node.child[move] = TreeNode(node, oppo(node.color))
    def simulate(self, node, board):
        if node.color == 'O':
            current_player = self.sim_black
        else:
            current_player = self.sim_white
        sim_game = SilentGame(self.sim_black, self.sim_white, board, current_player)
        return sim_game.run()
    def back_prop(self, node, score):
        node.n += 1
        node.w += score
        if node.parent is not None:
            self.back_prop(node.parent, 1 - score)
    def get_move(self, board):
        self.tick = time()
        if self.color == 'X':
            player_name = '黑棋'
        else:
            player_name = '白棋'
        action = self.mcts(deepcopy(board))
        return action
black_player =  HumanPlayer("X")
white_player = AIPlayer("O")
game = Game(black_player, white_player)
game.run()

运行示例: 

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值