import numpy as np
import random
import copy
import time,re
Qtable=np.zeros((6,6,4))
at=["up","down","left","right"]
at1=[0,1,2,3]
cc=dict(zip(at,at1))
def choice(s,qt):
if random.random()<0.7:
a=random.choice(at)
else:
a=np.argmax(qt[s[0],s[1],:])
a=at[a]
return a
def next(s,a):
s1=copy.deepcopy(s)
if a=="up":
if s[0]==0:
s1=s
else:s1[0]=s[0]-1
elif a=="down":
if s[0]==5:
s1=s
else:s1[0]=s[0]+1
elif a=="left":
if s[1]==0:
s1=s
else:s1[1]=s[1]-1
else:
if s[1]==5:
s1=s
else:s1[1]=s[1]+1
if s1==[5,5]:
r=1
elif s1==[2,2] or s1==[2,4] or s1==[3,1]:
r=-1
else:r=0
return s1,r
#z=next([3,3],"left")
#print(z)
def f(qt):