boardgame.io 五子棋(二): AI 改造

太长不看: 点击这里查看完整代码。

上一篇我们使用 boardgame.io + wgo.js + React 制作了一个简单的五子棋游戏,并使用了引擎给我们搭配的 AI。不过 AI 弱得很,让我们试着把它往 AlphaGo 的方向上改造,打造一个自己的 AI。

蒙特卡洛

我们知道 AlphaGo 使用的是蒙特卡洛搜索和神经网络,通过 Debug 面板我们可以发现, boardgame.io 搭配的 AI 也是基于蒙特卡洛算法的。不过,如果我们想使用它的蒙特卡洛搜索算法来搭配 AlphaGo 那样的神经网络,就会发现它的蒙特卡洛树节点缺少一些信息。它的算法不支持如添加噪声、不支持先验概率、不支持基于权重选择节点等功能,所以我们得要自己做一个蒙特卡洛搜索算法。

根据上面的分析,我们的蒙特卡洛算法需提供如下功能:

  1. 自定义评估器。这样我们就能实现神经网络评估和随机模拟评估
  2. 支持噪声
  3. 基于权重和温度进行节点选择
  4. 虚拟失败(Virtual loss)。这样就能并行搜索,尤其在使用神经网络评估器的时候可以一次评估多个局面

创建 src/MCTS.js 文件并添加如下内容:

import { dirichletK, randomPick } from "./Random";

export function Node(a) {
    return {
        a: a,
        p: 1,
        q: 0,
        w: 0,
        n: 0,
        // null - unexpanded, [] - terminal node, [...] - intermediate node
        children: null
    };
}

function isLeaf(node) {
    return node.n === 0 || node.children === null || node.children.length === 0;
}

function ucbScore(node, c) {
    return node.q + (c * node.p) / (node.n + 1);
}

function ucbSelectChild(node) {
    const c = 5 * Math.sqrt(node.n);
    let best = node.children[0];
    let bestScore = ucbScore(best, c);

    for (let i = 1; i < node.children.length; i++) {
        const child = node.children[i];
        const score = ucbScore(child, c);

        if (score > bestScore) {
            best = child;
            bestScore = score;
        }
    }

    return best;
}

function applyPrioProb(root, probs, useNoise) {
    if (!useNoise) {
        for (let i = 0; i < root.children.length; ++i) {
            const child = root.children[i];
            child.p = probs[child.a];
        }

        return;
    }

    const dir = dirichletK(root.children.length, 0.03);

    for (let i = 0; i < root.children.length; ++i) {
        const child = root.children[i];
        child.p = dir[i] * 0.25 + 0.75 * probs[child.a];
    }
}

function backprop(path, r) {
    let i = path.length;

    while (i-- > 0) {
        let leaf = path[i];
        leaf.n += 1;
        leaf.w += r;
        leaf.q = leaf.w / leaf.n;
        r = -r;
    }
}

function backpropAndRevertVirtualLoss(path, r) {
    let i = path.length;

    while (i-- > 0) {
        let leaf = path[i];
        leaf.w += r;
        leaf.q = leaf.w / leaf.n;
        r = -r;
    }
}

function applyVirtualLoss(path) {
    let i = path.length;

    while (i-- > 0) {
        let leaf = path[i];
        leaf.n += 1;
        leaf.q = leaf.w / leaf.n;
    }
}

function revertVirtualLoss(path) {
    let i = path.length;

    while (i-- > 0) {
        let leaf = path[i];
        leaf.n -= 1;
        leaf.q = leaf.w / leaf.n;
    }
}

export class MCTS {
    constructor({ evaluator, maxIteration, maxTime, useNoise }) {
        if (!maxIteration && !maxTime)
            throw new Error("maxIteration and maxTime cannot be 0 at same time");
        this._eval = evaluator;
        this._maxIteration = maxIteration;
        this._maxTime = maxTime;
        this._batch = new Set();
        this._batchSize = 8;
        this._useNoise = useNoise;
        this._searching = false;
        this._timer = null;
    }

    async exec(root, state, opts) {
        if (this._searching)
            throw new Error("another searching is in progress!");
        let { maxIteration, maxTime, tao } = {
            maxIteration: this._maxIteration,
            maxTime: this._maxTime,
            tao: 0.001,
            ...opts
        };
        if (maxIteration === 0 && maxTime === 0)
            throw new Error(
                "maxIteration and maxTime cannot be 0 at same time"
            );

        if (maxTime > 0) {
            this._timer = setTimeout(() => {
                this.stop();
            }, maxTime);
        }

        if (!maxIteration) maxIteration = Number.MAX_SAFE_INTEGER;
        this._searching = true;

        for (let it = 0; it < maxIteration && !this._stop; ++it)
            await this._step(root, state.clone());

        await this._flush();
        this._searching = false;
        this._stop = false;
        clearTimeout(this._timer);

        let probs = getActionProbs(root, tao);

        return {
            bestChild: randomPick(root.children, probs),
            actionProbs: probs.reduce((acc, p, i) => {
                acc[root.children[i].a] = p;
                return acc;
            }, {})
        };
    }

    stop() {
        if (!this._searching) return;
        this._stop = true;
        clearTimeout(this._timer);
    }

    async _step(root, st) {
        const path = [root];
        let leaf = root;

        while (!isLeaf(leaf)) {
            leaf = ucbSelectChild(leaf);
            path.push(leaf);

            st.makeMove(leaf.a);
        }

        const gameover = st.gameover();
        if (gameover) {
            leaf.children = [];
            let score = 0;
            if (gameover.winner === st.currentPlayer) score = 1;
            else if (gameover.draw) score = 0;
            else score = -1;

            return backprop(path, -score);
        } else if (leaf.children === null) { // 提前展开(注意:也可能存在节点冲突,未处理)
            let actions = st.legalMoves();
            leaf.children = actions.map((a) => Node(a));
        }
        
        applyVirtualLoss(path);

		let job = {
            state: st,
            node: leaf,
            path: path
        };
        if (this._batch.add(job).size === this._batchSize) {
            await this._flush();
        }
    }

    async _flush() {
        if (this._batch.size === 0) return;
        const list = Array.from(this._batch.values());
        const vals = await this._eval(list.map((b) => b.state));

        for (let i = 0; i < list.length; i++) {
            const info = list[i];
            const leaf = info.node;

            applyPrioProb(leaf, vals[i].probs, this._useNoise);
            backpropAndRevertVirtualLoss(info.path, -vals[i].value);
        }

        this._batch.clear();
    }
}

function getActionProbs(root, tao) {
    tao = 1 / tao;
    let maxv = root.children.reduce((x, c) => Math.max(x, c.n), 0);
    let sum = 0;
    let probs = root.children.map((child) => {
        const p = Math.pow(child.n / maxv, tao);
        sum += p;
        return p;
    });

    for (let i = 0; i < probs.length; i++) probs[i] /= sum;

    return probs;
}

exec 函数需要两个参数是为了可以复用旧的节点。如果每次执行搜索都根据当前状态重新创建节点,则上一次搜索的信息(比如,已经展开的节点、节点的模拟次数等)完全无法复用。

为了让今后的训练与 boardgame.io 分离,我们也重新抽象了 State,并留下一组必须实现的接口:

  1. legalMoves() 用于列举所有合法着法,着法采用数字表示
  2. makeMove() 执行着法,并交换当前玩家
  3. gameover() 获取游戏结果
  4. currentPlayer 当前玩家
  5. clone() 克隆当前状态

新状态

接下来,我们就创建新的状态。

创建 src/State.js 并添加如下内容:

export const PLAYER_BLACK = 1;
export const PLAYER_WHITE = -1;

function checkWinnerByLine(stones, clr, start, end, stride) {
    let cnt = 0;

    for (; cnt < 5 && start !== end; start += stride) {
        if (stones[start] === clr) cnt++;
        else cnt = 0;
    }

    return cnt >= 5;
}

export function checkWinnerByMove(boardSize, stones, p) {
    const _min = 4;
    const c = stones[p];
    if (c === 0) return 0;
    let x0 = p % boardSize;
    let y0 = Math.floor(p / boardSize);
    let x1 = boardSize - 1 - x0;
    let y1 = boardSize - 1 - y0;
    let start = 0,
        end = 0,
        stride = 1;
    x0 = Math.min(x0, _min);
    x1 = Math.min(x1, _min);
    start = p - x0;
    end = p + x1 + 1;
    if (checkWinnerByLine(stones, c, start, end, 1)) return c;

    stride = boardSize;
    y0 = Math.min(y0, _min);
    y1 = Math.min(y1, _min);
    start = p - y0 * stride;
    end = p + (y1 + 1) * stride;
    if (checkWinnerByLine(stones, c, start, end, stride)) return c;

    stride = boardSize + 1;
    let ma = Math.min(x0, y0),
        mb = Math.min(x1, y1);
    start = p - ma * stride;
    end = p + (mb + 1) * stride;
    if (checkWinnerByLine(stones, c, start, end, stride)) return c;

    stride = boardSize - 1;
    ma = Math.min(x1, y0);
    mb = Math.min(x0, y1);
    start = p - ma * stride;
    end = p + (mb + 1) * stride;
    if (checkWinnerByLine(stones, c, start, end, stride)) return c;

    return 0;
}

export class State {
    constructor({ boardSize }) {
        this.boardSize = boardSize;
        this.stones = new Array(boardSize * boardSize).fill(0);
        this.currentPlayer = PLAYER_BLACK;
        this.moveHistory = [];
        this._gameover = null;
    }

    clone() {
        let newObj = new State({
            boardSize: this.boardSize
        });
        newObj.copy(this);
        return newObj;
    }

    copy(src) {
        if (src.boardSize !== this.boardSize)
            throw new Error("incompatible board");

        for (let i = 0; i < src.stones.length; i++) {
            this.stones[i] = src.stones[i];
        }

        this.currentPlayer = src.currentPlayer;

        for (let i = 0; i < src.moveHistory.length; i++) {
            this.moveHistory[i] = src.moveHistory[i];
        }

        this.moveHistory.length = src.moveHistory.length;
        this._gameover = src._gameover;
    }

    makeMove(mov) {
        if (this._gameover) return this;
        this.stones[mov] = this.currentPlayer;
        this.moveHistory.push(mov);
        this.currentPlayer = -this.currentPlayer;
        return this;
    }

    legalMoves() {
        let moves = [];
        for (let i = 0; i < this.stones.length; i++) {
            if (this.stones[i] === 0) moves.push(i);
        }
        return moves;
    }

    gameover() {
        if (this._gameover || this.moveHistory.length === 0)
            return this._gameover;
        const mov = this.moveHistory[this.moveHistory.length - 1];
        const winner = checkWinnerByMove(this.boardSize, this.stones, mov);
        if (winner !== 0) {
            this._gameover = { winner };
        } else if (this.moveHistory.length === this.stones.length) {
            this._gameover = { draw: true };
        }
        return this._gameover;
    }
}

评估器

接下来,我们实现评估器。评估器用来评估当前状态的胜率和可行着法的概率,它应当支持一次评估多个状态。当前我们没有神经网络,所以我们先实现一个基于蒙特卡洛法的评估器,也就是对当前局面进行随机落子。基于蒙特卡洛的评估器与上面的搜索算法一起,就构成了传统的蒙特卡洛搜索算法。

创建 src/Evals.js 并粘贴如下内容:

import { BOARD_SIZE } from "./Consts";

export function MCEvaluator() {
    return async function evaluator(ss) {
        await new Promise((resolve) => setImmediate(resolve));
        return ss.map((s) => {
            const boardSize = BOARD_SIZE;
            const acts = s.legalMoves();
            const v = randomPlay(s.clone(), acts);
            const p = new Array(boardSize * boardSize).fill(0);

            for (let i = 0; i < acts.length; i++) p[acts[i]] = 1 / acts.length;

            return {
                value: v,
                probs: p
            };
        });
    };
}

function randomPlay(st, acts) {
    const p = st.currentPlayer;
    let gameover = st.gameover();

    for (let i = 0; i < acts.length && !gameover; i++) {
        const j = i + Math.floor(Math.random() * (acts.length - i));
        const x = acts[j];
        acts[j] = acts[i];
        acts[i] = x;

        gameover = st.makeMove(x).gameover();
    }

    if (p === gameover.winner) return 1;
    else if (gameover.draw) return 0;
    return -1;
}

整合

有了蒙特卡洛、新的状态,现在我们把它们整合在一起,做成一个带 AI 的新游戏。定义一个新的 Game 类,用于整合 ClientMCTS 等,每生成一个 Game 对象就代表一局新的游戏。

src/Game.js 重命名为 src/GameDef.js,然后创建新的 src/Game.js,并添加如下内容:

import { Client } from "boardgame.io/client";
import { MCTS, Node } from "./MCTS";
import { MCEvaluator } from "./Evals";
import { State } from "./State";
import { BOARD_SIZE } from "./Consts";
import { Gomoku } from "./GameDef";

export class Game {
    constructor({ playAs }) {
        this._client = Client({ game: Gomoku });
        this._mcts = new MCTS({
            evaluator: MCEvaluator(),
            useNoise: false,
            maxIteration: 3200 * 2
        });
        this._root = Node(null);
        this._state = new State({ boardSize: BOARD_SIZE });
        this._playAs = playAs;
        this._started = false;
        this._stopped = false;
        this._aiPlayer = { 0: "1", 1: "0" }[this._playAs];
        this._currentPlayer = this.getState().ctx.currentPlayer;
        this._stateId = this.getState()._stateID - 1;
    }

    get currentPlayer() {
        return this._currentPlayer;
    }

    get playAs() {
        return this._playAs;
    }

    getState() {
        return this._client.getState();
    }

    putStone(id) {
        if (!this._started || this._stopped) return;
        if (this._playAs && this._playAs !== this.currentPlayer) return;
        this._client.moves.putStone(id);
    }

    subscribe(f) {
        return this._client.subscribe(f);
    }

    start() {
        if (this._started || this._stopped) return;

        this._client.subscribe((s) => {
            this._currentPlayer = s.ctx.currentPlayer;

            let moves = (s.deltalog || [])
                .filter((log) => log.action.type === "MAKE_MOVE")
                .map((log) => log.action.payload.args[0]);
            for (let mov of moves) {
                this._advance(mov);
            }

            if (s.ctx.gameover) return;
            if (s._stateID === this._stateId) return;

            this._stateId = s._stateID;

            if (this._aiPlayer === s.ctx.currentPlayer) {
                this._mcts.exec(this._root, this._state).then((result) => {
                    if (this._stopped) return;

                    this._client.moves.putStone(result.bestChild.a);
                });
            }
        });

        this._client.start();

        this._started = true;
    }

    stop() {
        this._client.stop();
        this._mcts.stop();
        this._stopped = true;
    }

    _advance(mov) {
        this._state.makeMove(mov);
        let root = this._root;
        if (!root.children) root = Node(mov);
        else if (root.children.length === 0)
            throw new Error("try to make move on terminal node");
        else root = root.children.find((c) => c.a === mov);
        this._root = root;
    }
}

接下来,我们吧 src/App.js 整改一下:

//import { Client } from "boardgame.io/react";

import React, {
    useState,
    useMemo,
    useEffect,
    useReducer
} from "react";

import { GomokuBoard } from "./Board";
import { Game } from "./Game";

//const App = Client({ game: Gomoku });

function App() {
    const [gameId, newGame] = useReducer((id) => id + 1, 1);
    const game = useMemo(() => {
        let game = new Game({
            playAs: Math.random() > 0.5 ? "0" : "1"
        });
        return game;
    }, [gameId]);
    const [state, setState] = useState(game.getState());

    const moves = useMemo(
        () => ({
            putStone: (id) => game.putStone(id)
        }),
        [game]
    );

    useEffect(() => {
        game.start();
        let unsub = game.subscribe(setState);
        return () => {
            unsub();
            game.stop();
        };
    }, [game]);

    const currentPlayer = game.currentPlayer;
    let status = "请落子";
    const gameover = state.ctx.gameover;

    if (gameover) {
        if (gameover.winner === "0") status = "黑方胜";
        else if (gameover.winner === "1") status = "白方胜";
        else status = "和棋";
    } else if (game.playAs && game.playAs !== currentPlayer) {
        status = "思考中...";
    }

    return (
        <>
            <div>
                <button onClick={newGame}>重来</button>
            </div>
            <GomokuBoard {...state} moves={moves} />
            <div>
                <p>{status}</p>
            </div>
        </>
    );
}

export default App;

好了,一切已经妥当,来看看效果吧。

我们新的 Game 类会自己同步状态并自动触发 AI,这将与 Debug 面板中 Log 回溯功能冲突。Debug 面板中的 AI 功能也应酌情使用,可能与 Game 类冲突

总结

本文我们实现了自己的蒙特卡洛搜索算法以及自己的 State,并将自己的 State 与 boardgame.io 的状态进行了同步。还利用自己的蒙特卡洛搜索算法,做了一个支持 AI 对战的五子棋游戏。有了这些基础设施,我们就能逐步向 AlphaGo 那样的五子棋 AI 靠拢啦。接下来,我们给我们的游戏稍微增加一点趣味,这个趣味也是向 AlphaGo 五子棋靠拢的重要一步。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值