import pygame
import numpy as np
import random
import sys
# 定义迷宫环境
class Maze:
def __init__(self):
self.size = 10
self.maze = np.zeros((self.size, self.size))
self.start = (0, 0)
self.goal = (9, 9)
self.maze[4, 2:7] = 1 # 添加墙壁
self.maze[2, 1] = 1
self.current_position = self.start
def reset(self):
self.current_position = self.start
return self.current_position
def manhattan_distance(self):
return abs(self.current_position[0] - self.goal[0]) + abs(self.current_position[1] - self.goal[1])
def step(self, action):
x, y = self.current_position
if action == 0: # 上
x -= 1
elif action == 1: # 右
y += 1
elif action == 2: # 下
x += 1
elif action == 3: # 左
y -= 1
if 0 <= x < self.size and 0 <= y < self.size and self.maze[x, y] == 0:
self.current_position = (x, y)
if self.current_position == self.goal:
reward = 100
done = True
else:
reward = -1
done = False
else:
reward = -100
done = True
# done = self.current_position == self.goal
return self.current_position, reward, done
def render(self, screen):
for x in range(self.size):
for y in range(self.size):
color = (255, 255, 255) if self.maze[x, y] == 0 else (0, 0, 0)
if (x, y) == self.current_position:
color = (0, 255, 0)
if (x, y) == self.goal:
color = (255, 0, 0)
pygame.draw.rect(screen, color, (y*40, x*40, 40, 40))
pygame.display.flip()
# Q-learning
class QLearning:
def __init__(self, env):
self.env = env
self.q_table = np.zeros((env.size, env.size, 4))
self.gamma = 0.9
self.epsilon = 0.1
self.alpha = 0.1
def select_action(self, state):
if random.random() < self.epsilon:
return random.randint(0, 3)
else:
x, y = state
return np.argmax(self.q_table[x, y])
def update(self, state, action, reward, next_state):
x, y = state
nx, ny = next_state
future_rewards = np.max(self.q_table[nx, ny])
self.q_table[x, y, action] += self.alpha * (reward + self.gamma * future_rewards - self.q_table[x, y, action])
# 主程序
def main():
pygame.init()
screen = pygame.display.set_mode((400, 400))
clock = pygame.time.Clock()
maze = Maze()
agent = QLearning(maze)
for episode in range(10000):
state = maze.reset()
done = False
while not done:
action = agent.select_action(state)
next_state, reward, done = maze.step(action)
agent.update(state, action, reward, next_state)
state = next_state
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
if episode >= 8000:
screen.fill((0, 0, 0))
maze.render(screen)
clock.tick(10)
if __name__ == '__main__':
main()
运行效果: