αβ剪枝

from collections import namedtuple
import random

from utils import argmax
from canvas import Canvas
"""负无穷"""
infinity = float('inf')
"""使用namedtuple存储游戏状态
GameState:名称为“棋盘状态”
to_move:轮到谁下子
utility:用来在算法递归计算时存储每一个棋盘状态的效用值:
board:棋盘黑白子下子位置
moves:还可以走子的空位置
"""
GameState = namedtuple('GameState', 'to_move, utility, board, moves')

# ______________________________________________________________________________
# Minimax Search


def minimax_decision(state, game):
    """Given a state in a game, calculate the best move by searching
    forward all the way to the terminal states. [Figure 5.3]"""
    player = game.to_move(state)

    def max_value(state):
        #如果是最终结果,返回当前效用值
        if game.terminal_test(state):
            return game.utility(state, player)
        v = -infinity
        # 在MAX当前状态的所有可能的移动
        for a in game.actions(state):
            # 每个移动产生一个结果状态,对应一个MIN值
            # 找出这些MIN值中最小的,作为MAX当前状态的MAX值
            v = max(v, min_value(game.result(state, a)))
        return v

    def min_value(state):
        """求MIN的功效值
        当是最终节点时"""
        if game.terminal_test(state):
            return game.utility(state, player)
        v = infinity
        #在MIN当前状态的所有可能的移动
        for a in game.actions(state):
            # 每个移动产生一个结果状态,对应一个MAX值
            #找出这些MAX值中最小的,作为MIN当前状态的MIN值
            v = min(v, max_value(game.result(state, a)))
        return v

    # Body of minimax_decision:
    #比较当前状态所有行为a 的大小
    #比较方法:通过min_value(state,a)函数的结果比较
    return argmax(game.actions(state),
                  key=lambda a: min_value(game.result(state, a)))

# ______________________________________________________________________________


def alphabeta_full_search(state, game):
    """Search game to determine best action; use alpha-beta pruning.
    As in [Figure 5.7], this version searches all the way to the leaves."""

    player = game.to_move(state)

    # Functions used by alphabeta
    def max_value(state, alpha, beta):
        """state:当前Max节点状态
        alpha:当前MAX节点α值
        beta:父节点β值"""
        """若果是终止状态,停止并返回功效值"""
        if game.terminal_test(state):
            return game.utility(state, player)
        v = -infinity
        '''对于MAX的每一种动作情况'''
        for a in game.actions(state):
            '''计算每个MIN节点的MIN值v'''
            v = max(v, min_value(game.result(state, a), alpha, beta))
            '''如果本节点的α值(即当前所有v中最大的值)比父节点的β值大
            α值不变,停止α节点以下搜索
            把上一个MAX节点的α值作为当前MAX节点α值的初始值'''
            if v >= beta:
                return v
            '''更改本节点的α值为当前所有v的最大值'''
            alpha = max(alpha, v)
        return v

    def min_value(state, alpha, beta):
        if game.terminal_test(state):
            return game.utility(state, player)
        v = infinity
        for a in game.actions(state):
            v = min(v, max_value(game.result(state, a), alpha, beta))
            # 如果本节点的β值比父节点的α值小
            # β值不变,停止β节点以下搜索
            """把上一个MIN节点的β值作为当前MIN节点β值的初始值'''"""
            if v <= alpha:
                return v
            '''更改本节点的β值为当前所有v的最大值'''
            beta = min(beta, v)
        return v

    # Body of alphabeta_search:
    best_score = -infinity
    beta = infinity
    best_action = None
    for a in game.actions(state):
        v = min_value(game.result(state, a), best_score, beta)
        """找出MIN节点中最大的MIN值v作为MAX的值
        最大值V对应的行动为a"""
        if v > best_score:
            best_score = v
            best_action = a
    return best_action


def alphabeta_search(state, game, d=4, cutoff_test=None, eval_fn=None):
    """Search game to determine best action; use alpha-beta pruning.
    This version cuts off search and uses an evaluation function."""

    player = game.to_move(state)

    # Functions used by alphabeta
    """当深度达到要求后停止搜索"""
    def max_value(state, alpha, beta, depth):
        if cutoff_test(state, depth):
            return eval_fn(state)
        v = -infinity
        for a in game.actions(state):
            v = max(v, min_value(game.result(state, a),
                                 alpha, beta, depth + 1))

            if v >= beta:
                return v
            alpha = max(alpha, v)
        return v

    def min_value(state, alpha, beta, depth):
        if cutoff_test(state, depth):
            return eval_fn(state)
        v = infinity
        for a in game.actions(state):
            v = min(v, max_value(game.result(state, a),
                                 alpha, beta, depth + 1))
            if v <= alpha:
                return v
            beta = min(beta, v)
        return v

    # Body of alphabeta_search starts here:
    # The default test cuts off at depth d or at a terminal state
    cutoff_test = (cutoff_test or
                   (lambda state, depth: depth > d or
                    game.terminal_test(state)))
    eval_fn = eval_fn or (lambda state: game.utility(state, player))
    best_score = -infinity
    beta = infinity
    best_action = None
    for a in game.actions(state):
        v = min_value(game.result(state, a), best_score, beta, 1)
        if v > best_score:
            best_score = v
            best_action = a
    return best_action
def query_player(game, state):
    """Make a move by querying standard input.
    手动输入"""
    print("current state:")
    game.display(state)
    print("available moves: {}".format(game.actions(state)))
    print("")
    move_string = input('Your move? ')
    try:
        move = eval(move_string)
    except NameError:
        move = move_string
    return move


def random_player(game, state):
    """A player that chooses a legal move at random.
    随机选择一个动作"""
    return random.choice(game.actions(state))


def alphabeta_player(game, state):
    """用剪枝方法"""
    return alphabeta_full_search(state, game)


class Game:
    """A game is similar to a problem, but it has a utility for each
    state and a terminal test instead of a path cost and a goal
    test. To create a game, subclass this class and implement actions,
    result, utility, and terminal_test. You may override display and
    successors or you can inherit their default methods. You will also
    need to set the .initial attribute to the initial state; this can
    be done in the constructor."""

    def actions(self, state):
        """Return a list of the allowable moves at this point.
        返回一个当前状态的所有可能移动的列表,没有重写则报异常"""
        raise NotImplementedError

    def result(self, state, move):
        """给一个状态和移动动作,返回移动后的状态,没有重写则报异常
        Return the state that results from making a move from a state."""
        raise NotImplementedError

    def utility(self, state, player):
        """给出游戏结束时的状态和游戏者类型,返回效用值,
        这个函数只有在游戏结束状态才被调用
        Return the value of this final state to player."""
        raise NotImplementedError

    def terminal_test(self, state):
        """判断当前状态是否是结束状态,是则返回TRUE,否则返回FALSE
        如果动作列表为空则是结束状态
        Return True if this is a final state for the game."""
        return not self.actions(state)

    def to_move(self, state):
        """Return the player whose move it is in this state.
        返回当前状态应该轮到谁移动"""
        return state.to_move

    def display(self, state):
        """Print or otherwise display the state.
        显示当前状态"""
        print(state)

    def __repr__(self):
        return '<{}>'.format(self.__class__.__name__)

    def play_game(self, *players):
        """Play an n-person, move-alternating game."""
        state = self.initial
        while True:
            for player in players:
                move = player(self, state)
                state = self.result(state, move)
                if self.terminal_test(state):
                    self.display(state)
                    return self.utility(state, self.to_move(self.initial))

"""ttt类继承Game类"""
class TicTacToe(Game):
    """Play TicTacToe on an h x v board,
    with Max (first player) playing 'X'.
    A state has the player to move,
    a cached utility,a list of moves
    in the form of a list of (x, y) positions,
    and a board, in the form of a dict of {(x, y): Player} entries,
    where Player is 'X' or 'O'."""

    def __init__(self, h=3, v=3, k=3):
        #棋盘是h行v列k个子连在一起算赢
        self.h = h
        self.v = v
        self.k = k
       # 所有棋盘都可以走子
        moves = [(x, y) for x in range(1, h + 1)
                 for y in range(1, v + 1)]
        #设置初始棋盘状态
        self.initial = GameState(to_move='X',
                                 utility=0,
                                 board={},
                                 moves=moves)

    def actions(self, state):
        """Legal moves are any square not yet taken."""
        return state.moves

    def result(self, state, move):
        if move not in state.moves:
            return GameState(to_move=('O' if state.to_move == 'X' else 'X'),
                             utility=self.compute_utility(state.board, move, state.to_move),
                             board=state.board, moves=state.moves)  # Illegal move has no effect
        board = state.board.copy()
        board[move] = state.to_move
        moves = list(state.moves)
        moves.remove(move)
        return GameState(to_move=('O' if state.to_move == 'X' else 'X'),
                         utility=self.compute_utility(board, move, state.to_move),
                         board=board, moves=moves)

    def utility(self, state, player):
        """Return the value to player; 1 for win, -1 for loss, 0 otherwise.
        只有在最终状态时调用:赢则1
        """
        return state.utility if player == 'X' else -state.utility

    def terminal_test(self, state):
        """A state is terminal if it is won or there are no empty squares.
        len对象长度
        当为"""
        return state.utility != 0 or len(state.moves) == 0

    def display(self, state):
        board = state.board
        print("Now board state:")
        for x in range(1, self.h + 1):
            for y in range(1, self.v + 1):
                print(board.get((x, y), '.'), end=' ')
            print()

    def compute_utility(self, board, move, player):
        """If 'X' wins with this move, return 1; if 'O' wins return -1; else return 0."""
        if (self.k_in_row(board, move, player, (0, 1)) or
                self.k_in_row(board, move, player, (1, 0)) or
                self.k_in_row(board, move, player, (1, -1)) or
                self.k_in_row(board, move, player, (1, 1))):
            return +1 if player == 'X' else -1
        else:
            return 0

    def k_in_row(self, board, move, player, delta_x_y):
        """Return true if there is a line through move on board for player."""
        (delta_x, delta_y) = delta_x_y
        x, y = move
        n = 0  # n is number of moves in row
        while board.get((x, y)) == player:
            n += 1
            x, y = x + delta_x, y + delta_y
        x, y = move
        while board.get((x, y)) == player:
            n += 1
            x, y = x - delta_x, y - delta_y
        n -= 1  # Because we counted move itself twice
        return n >= self.k




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值