[BZOJ2246][SDOI2011]迷宫探险（状压&概率DP）

最新推荐文章于 2019-06-06 16:22:03 发布

xyz32768

最新推荐文章于 2019-06-06 16:22:03 发布

阅读量436

点赞数

分类专栏： BZOJ、UOJ、LOJ 文章标签： dp 搜索

本文链接：https://blog.csdn.net/xyz32768/article/details/78118221

版权

BZOJ、UOJ、LOJ 专栏收录该内容

341 篇文章 1 订阅

订阅专栏

1、DP模型

用 $3$ 进制数表示陷阱的状态， $0$ 表示无害， $1$ 表示有害， $2$ 表示未知。可建立DP模型：
$f[x][y][S][h]$ 表示从 $(x,y)$ 开始，当前陷阱的状态为 $S$ ，血量为 $h$ ，活着走出迷宫的概率。使用记忆化搜索。

2、边界&转移

边界为：
$f[x][y][S][0]=0$
当 $(x,y)$ 为终点时 $f[x][y][S][h]=1$
转移为（以下 $(tx,ty)$ 为 $(x,y)$ 走一步能到达的格子，且 $(tx,ty)$ 不为墙）：
当 $(tx,ty)$ 为平地，起点，终点或无害陷阱时，
$f[x][y][S][h]=\max(f[x][y][S][h],f[tx][ty][S][h])$
当 $(tx,ty)$ 为有害陷阱时，
$f[x][y][S][h]=\max(f[x][y][S][h],f[tx][ty][S][h-1])$
当 $(tx,ty)$ 为未知陷阱时，设陷阱编号为 $tt$ ，则
$f[x][y][S][h]=\max(f[x][y][S][h],f[tx][ty][S-3^{tt-1}][h-1]*g[S][tt]+$
$f[tx][ty][S-2*3^{tt-1}][h]*(1-g[S][tt]))$ 。

3、关于g数组

上面 $g[S][tt]$ 表示当前状态为 $S$ 时陷阱 $tt$ 有害的概率。
预处理 $g$ ，也就是枚举 $S$ ，再枚举 $0$ 到 $2^K-1$ ，选取有用的概率计入 $g$ 。

4、总结

结合「SCOI2008奖励关」「JLOI2013卡牌游戏」两题可以得出，像这样有限制条件，且在转移的过程中限制条件不断变化的概率DP，一般模型为：
$f[state]$ 表示从 $state$ 状态到达目标状态的最大/小概率/期望。

5、代码

#include <cmath>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
inline int read() {
    int res = 0; bool bo = 0; char c;
    while (((c = getchar()) < '0' || c > '9') && c != '-');
    if (c == '-') bo = 1; else res = c - 48;
    while ((c = getchar()) >= '0' && c <= '9')
        res = (res << 3) + (res << 1) + (c - 48);
    return bo ? ~res + 1 : res;
}
inline char get() {
    char c; while (((c = getchar()) < 'A' || c > 'E') && c != '@'
        && c != '$' && c != '.' && c != '#'); return c;
}
const int N = 35, C = 267, R = 45, V = 10;
int m, n, K, H, dx[] = {1, 0, -1, 0}, dy[] = {0, -1, 0, 1}, pw[V], pb[R];
char s[N][N]; double f[N][N][C][V], gw[C][V];
bool vis[N][N][C][V];
double chkmax(double &a, double b) {a = max(a, b);}
int cyx(int S, int x) {
    return S / pw[x - 1] % 3;
}
int lpf(int S, int x, int y) {
    int res = S - cyx(S, x) * pw[x - 1];
    return res + y * pw[x - 1];
}
void init() {
    int i, j, S; for (S = 0; S < pw[K]; S++) {
        int s1 = 0;
        for (i = 0; i < (1 << K); i++) {
            bool flag = 1; for (j = 1; j <= K; j++) {
                int xx = cyx(S, j); if (xx == 2) continue;
                if (xx != ((i >> j - 1) & 1)) {flag = 0; break;}
            }
            if (!flag) continue; s1 += pb[i];
            for (j = 1; j <= K; j++) {
                if (cyx(S, j) != 2 || !((i >> j - 1) & 1)) continue;
                gw[S][j] += pb[i];
            }
        }
        for (i = 1; i <= K; i++) gw[S][i] /= s1;
    }
}
double DP(int x, int y, int S, int h) {
    if (vis[x][y][S][h]) return f[x][y][S][h];
    if (s[x][y] == '@') return vis[x][y][S][h] = 1, f[x][y][S][h] = 1;
    if (h == 0) return vis[x][y][S][h] = 1, f[x][y][S][h] = 0;
    vis[x][y][S][h] = 1; int i;
    for (i = 0; i < 4; i++) {
        int tx = x + dx[i], ty = y + dy[i], tt = s[tx][ty] - 'A' + 1;
        if (tx < 1 || tx > m || ty < 1 || ty > n || s[tx][ty] == '#')
            continue;
        if (s[tx][ty] == '.' || s[tx][ty] == '@' || s[tx][ty] == '$' ||
            (tt >= 1 && tt <= K && cyx(S, tt) == 0))
                chkmax(f[x][y][S][h], DP(tx, ty, S, h));
        if (tt >= 1 && tt <= K && cyx(S, tt) == 1)
            chkmax(f[x][y][S][h], DP(tx, ty, S, h - 1));
        if (tt >= 1 && tt <= K && cyx(S, tt) == 2)
            chkmax(f[x][y][S][h], DP(tx, ty, lpf(S, tt, 1), h - 1) * gw[S][tt]
                + DP(tx, ty, lpf(S, tt, 0), h) * (1.0 - gw[S][tt]));
    }
    return f[x][y][S][h];
}
int main() {
    int i, j, Sx, Sy;
    m = read(); n = read(); K = read(); H = read();
    for (i = 1; i <= m; i++) for (j = 1; j <= n; j++)
        if ((s[i][j] = get()) == '$') Sx = i, Sy = j; pw[0] = 1;
    for (i = 0; i < (1 << K); i++) pb[i] = read();
    for (i = 1; i <= K; i++) pw[i] = pw[i - 1] * 3; init();
    printf("%.3lf\n", DP(Sx, Sy, pw[K] - 1, H));
    return 0;
}

xyz32768

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
[BZOJ2246][SDOI2011]迷宫探险（状压&概率DP）

1、DP模型用33进制数表示陷阱的状态，00表示无害，11表示有害，22表示未知。可建立DP模型： f[x][y][S][h]f[x][y][S][h]表示从(x,y)(x,y)开始，当前陷阱的状态为SS，血量为hh，活着走出迷宫的概率。使用记忆化搜索。2、边界&转移边界为： f[x][y][S][0]=0f[x][y][S][0]=0 当(x,y)(x,y)为终点时f[x][y][S][h]
复制链接

扫一扫

专栏目录