cspython代码_敲代码学Python：CS188之Q-Learning

最新推荐文章于 2023-12-10 10:24:53 发布

weixin_39933356

最新推荐文章于 2023-12-10 10:24:53 发布

阅读量365

点赞数

文章标签： cspython代码

# qlearningAgents.py

# ------------------

# Licensing Information: You are free to use or extend these projects for

# educational purposes provided that (1) you do not distribute or publish

# solutions, (2) you retain this notice, and (3) you provide clear

# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.

# Attribution Information: The Pacman AI projects were developed at UC Berkeley.

# The core projects and autograders were primarily created by John DeNero

# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).

# Student side autograding was added by Brad Miller, Nick Hay, and

# Pieter Abbeel (pabbeel@cs.berkeley.edu).

from game import *

from learningAgents import ReinforcementAgent

from featureExtractors import *

import random,util,math

class QLearningAgent(ReinforcementAgent):

"""Q-Learning AgentFunctions you should fill in:- computeValueFromQValues- computeActionFromQValues- getQValue- getAction- updateInstance variables you have access to- self.epsilon (exploration prob)- self.alpha (learning rate)- self.discount (discount rate)Functions you should use- self.getLegalActions(state)which returns legal actions for a state"""

def __init__(self, **args):

"You can initialize Q-values here..."

ReinforcementAgent.__init__(self, **args)

"*** YOUR CODE HERE ***"

# 设置初始Q值

self.q_values = util.Counter()

def getQValue(self, state, action):

"""Returns Q(state,action)Should return 0.0 if we have never seen a stateor the Q node value otherwise"""

"*** YOUR CODE HERE ***"

# 将参数指定的Q值返回出去

return self.q_values[(state, action)]

def computeValueFromQValues(self, state):

"""Returns max_action Q(state,action)where the max is over legal actions. Note that ifthere are no legal actions, which is the case at theterminal state, you should return a value of 0.0."""

"*** YOUR CODE HERE ***"

# 获取所有可行的action

legalActions = self.getLegalActions(state)

if len(legalActions)==0:

return 0.0

tmp = util.Counter()

for action in legalActions:

tmp[action] = self.getQValue(state, action)

# util.Counter()这个增强版的字典，可以直接返回其中的最大值，嘿嘿

return tmp[tmp.argMax()]

def computeActionFromQValues(self, state):

"""Compute the best action to take in a state. Note that if thereare no legal actions, which is the case at the terminal state,you should return None."""

"*** YOUR CODE HERE ***"

# 获取所有可行的action

actions = self.getLegalActions(state)

best_action = None

# 求所有action中Q值最大的那一个，并返回对应的action

max_val = float('-inf')

for action in actions:

q_value = self.q_values[(state, action)]

if max_val < q_value:

max_val = q_value

best_action = action

return best_action

def getAction(self, state):

"""Compute the action to take in the current state. Withprobability self.epsilon, we should take a random action andtake the best policy action otherwise. Note that if there areno legal actions, which is the case at the terminal state, youshould choose None as the action.HINT: You might want to use util.flipCoin(prob)HINT: To pick randomly from a list, use random.choice(list)"""

# Pick Action

legalActions = self.getLegalActions(state)

action = None

"*** YOUR CODE HERE ***"

# 以一定的概率访问最优的action，这个是Q7的要求，可以一并做了

explore = util.flipCoin(self.epsilon)

if explore:

return random.choice(legalActions)

else:

return self.getPolicy(state)

def update(self, state, action, nextState, reward):

"""The parent class calls this to observe astate = action => nextState and reward transition.You should do your Q-Value update hereNOTE: You should never call this function,it will be called on your behalf"""

"*** YOUR CODE HERE ***"

# 第1步，获取旧的Q值，为了程序的简洁，重命名变量

old_Q = self.getQValue(state, action)

a = self.alpha

r = reward

g = self.discount

if nextState:

self.q_values[(state, action)] = (1 - a) * old_Q + a * (r + g * self.getValue(nextState))

else:

self.q_values[(state, action)] = (1 - a) * old_Q + a * r

def getPolicy(self, state):

return self.computeActionFromQValues(state)

def getValue(self, state):

return self.computeValueFromQValues(state)

class PacmanQAgent(QLearningAgent):

"Exactly the same as QLearningAgent, but with different default parameters"

def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):

"""These default parameters can be changed from the pacman.py command line.For example, to change the exploration rate, try:python pacman.py -p PacmanQLearningAgent -a epsilon=0.1alpha - learning rateepsilon - exploration rategamma - discount factornumTraining - number of training episodes, i.e. no learning after these many episodes"""

args['epsilon'] = epsilon

args['gamma'] = gamma

args['alpha'] = alpha

args['numTraining'] = numTraining

self.index = 0 # This is always Pacman

QLearningAgent.__init__(self, **args)

def getAction(self, state):

"""Simply calls the getAction method of QLearningAgent and theninforms parent of action for Pacman. Do not change or remove thismethod."""

action = QLearningAgent.getAction(self,state)

self.doAction(state,action)

return action

class ApproximateQAgent(PacmanQAgent):

"""ApproximateQLearningAgentYou should only have to overwrite getQValueand update. All other QLearningAgent functionsshould work as is."""

def __init__(self, extractor='IdentityExtractor', **args):

self.featExtractor = util.lookup(extractor, globals())()

PacmanQAgent.__init__(self, **args)

self.weights = util.Counter()

def getWeights(self):

return self.weights

def getQValue(self, state, action):

"""Should return Q(state,action) = w * featureVectorwhere * is the dotProduct operator"""

"*** YOUR CODE HERE ***"

# 把所有的特征值给取出来

features = self.featExtractor.getFeatures(state, action)

# 接下来做矩阵乘法，得到最后的Q值

total = 0

for i in features:

total += features[i] * self.weights[i]

return total

def update(self, state, action, nextState, reward):

"""Should update your weights based on transition"""

"*** YOUR CODE HERE ***"

# 根据公式计算diff值，并更新系数矩阵w

diff = (reward + self.discount * self.getValue(nextState)) - self.getQValue(state, action)

features = self.featExtractor.getFeatures(state, action)

for i in features:

self.weights[i] = self.weights[i] + self.alpha * diff * features[i]

def final(self, state):

"Called at the end of each game."

# call the super-class final method

PacmanQAgent.final(self, state)

# did we finish training?

if self.episodesSoFar == self.numTraining:

# you might want to print your weights here for debugging

"*** YOUR CODE HERE ***"

# 这里用于输出调试信息，随便各位啦~~

print("Approximate Q-Learning Summary")

print("Learning rate(alpha) :{0}".format(self.alpha))

print("Discount rate(gamma) :{0}".format(self.gamma))

print("Exploration rate(epsilon) :{0}".format(self.epsilon))

print("Training episodes :{0}".format(self.numTraining))

print("=======Feature Weights=======")

for i in features:

print("{0}:{1}".format(i, self.weights[i]))

weixin_39933356

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫