代码
import numpy as np
import tkinter as tk
import time
import random
class TreasureHuntEnv:
def __init__(self):
self.grid_size = 10
self.actions = ['up', 'down', 'left', 'right']
self.reset()
def reset(self):
"""重置环境,返回初始状态"""
self.grid = np.zeros((self.grid_size, self.grid_size), dtype=int)
self.agent_pos = (0, 0)
self.grid[self.agent_pos] = 1
self.treasure_pos = (self.grid_size - 1, self.grid_size - 1)
self.grid[self.treasure_pos] = 2
for _ in range(10):
x, y = random.randint(0, self.grid_size - 1), random.randint(0, self.grid_size - 1)
if (x, y) not in [self.agent_pos, self.treasure_pos]:
self.grid[x, y] = 3
for _ in range(5):
x, y = random.randint(0, self.grid_size - 1