下面是一个简单的马尔可夫博弈的python代码:
import numpy as np
def play_game(T, reward, start_state=0):
# 初始化状态
state = start_state
total_reward = 0
# 循环T次
for i in range(T):
# 选择下一步的状态
state = np.random.choice([0, 1], p=[0.5, 0.5])
# 根据当前状态计算收益
total_rewa