Google dopamine 搜索框架算法 -py 语言-立哥开发

最新推荐文章于 2024-07-11 16:57:30 发布

上海交大果粒人工智能学者全栈工程师

最新推荐文章于 2024-07-11 16:57:30 发布

阅读量163

点赞数

分类专栏：网络爬虫代码文章标签： tensorflow docker 深度学习 maven 分布式

本文链接：https://blog.csdn.net/weixin_45806384/article/details/108694063

版权

网络爬虫代码专栏收录该内容

3 篇文章 0 订阅

订阅专栏

# Copy Right 2020 Jacky Zong. All rights reserved.
#coding=utf-8

"""Tests for dopamine.agents.rainbow.rainbow_agent.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from dopamine.agents.dqn import dqn_agent
from dopamine.agents.rainbow import rainbow_agent
from dopamine.discrete_domains import atari_lib
from dopamine.utils import test_utils
import numpy as np
import tensorflow as tf

class ProjectDistributionTest(tf.test.TestCase):

def testInconsistentSupportsAndWeightsParameters(self):
supports = tf.constant([[0, 2, 4, 6, 8], [3, 4, 5, 6, 7]], dtype=tf.float32)
weights = tf.constant(
[[0.1, 0.2, 0.3, 0.2], [0.1, 0.2, 0.3, 0.2]], dtype=tf.float32)
target_support = tf.constant([4, 5, 6, 7, 8], dtype=tf.float32)
with self.assertRaisesRegexp(ValueError, 'are incompatible'):
rainbow_agent.project_distribution(supports, weights, target_support)

def testInconsistentSupportsAndWeightsWithPlaceholders(self):
supports = [[0, 2, 4, 6, 8], [3, 4, 5, 6, 7]]
supports_ph = tf.compat.v1.placeholder(tf.float32, None)
weights = [[0.1, 0.2, 0.3, 0.2], [0.1, 0.2, 0.3, 0.2]]
weights_ph = tf.compat.v1.placeholder(tf.float32, None)
target_support = [4, 5, 6, 7, 8]
target_support_ph = tf.compat.v1.placeholder(tf.float32, None)
projection = rainbow_agent.project_distribution(
supports_ph, weights_ph, target_support_ph, validate_args=True)
with self.test_session() as sess:
tf.compat.v1.global_variables_initializer().run()
with self.assertRaisesRegexp(tf.errors.InvalidArgumentError,
'assertion failed'):
sess.run(
projection,
feed_dict={
supports_ph: supports,
weights_ph: weights,
target_support_ph: target_support
})

def testInconsistentSupportsAndTargetSupportParameters(self):
supports = tf.constant([[0, 2, 4, 6, 8], [3, 4, 5, 6, 7]], dtype=tf.float32)
weights = tf.constant(
[[0.1, 0.2, 0.3, 0.2, 0.2], [0.1, 0.2, 0.3, 0.2, 0.2]],
dtype=tf.float32)
target_support = tf.constant([4, 5, 6], dtype=tf.float32)
with self.assertRaisesRegexp(ValueError, 'are incompatible'):
rainbow_agent.project_distribution(supports, weights, target_support)

def testInconsistentSupportsAndTargetSupportWithPlaceholders(self):
supports = [[0, 2, 4, 6, 8], [3, 4, 5, 6, 7]]
supports_ph = tf.compat.v1.placeholder(tf.float32, None)
weights = [[0.1, 0.2, 0.3, 0.2, 0.2], [0.1, 0.2, 0.3, 0.2, 0.2]]
weights_ph = tf.compat.v1.placeholder(tf.float32, None)
target_support = [4, 5, 6]
target_support_ph = tf.compat.v1.placeholder(tf.float32, None)
projection = rainbow_agent.project_distribution(
supports_ph, weights_ph, target_support_ph, validate_args=True)
with self.test_session() as sess:
tf.compat.v1.global_variables_initializer().run()
with self.assertRaisesRegexp(tf.errors.InvalidArgumentError,
'assertion failed'):
sess.run(
projection,
feed_dict={
supports_ph: supports,
weights_ph: weights,
target_support_ph: target_support
})

def testZeroDimensionalTargetSupport(self):
supports = tf.constant([[0, 2, 4, 6, 8], [3, 4, 5, 6, 7]], dtype=tf.float32)
weights = tf.constant(
[[0.1, 0.2, 0.3, 0.2, 0.2], [0.1, 0.2, 0.3, 0.2, 0.2]],
dtype=tf.float32)
target_support = tf.constant(3, dtype=tf.float32)
with self.assertRaisesRegexp(ValueError, 'Index out of range'):
rainbow_agent.project_distribution(supports, weights, target_support)

def testZeroDimensionalTargetSupportWithPlaceholders(self):
supports = [[0, 2, 4, 6, 8], [3, 4, 5, 6, 7]]
supports_ph = tf.compat.v1.placeholder(tf.float32, None)
weights = [[0.1, 0.2, 0.3, 0.2, 0.2], [0.1, 0.2, 0.3, 0.2, 0.2]]
weights_ph = tf.compat.v1.placeholder(tf.float32, None)
target_support = 3
target_support_ph = tf.compat.v1.placeholder(tf.float32, None)
projection = rainbow_agent.project_distribution(
supports_ph, weights_ph, target_support_ph, validate_args=True)
with self.test_session() as sess:
tf.compat.v1.global_variables_initializer().run()
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(
projection,
feed_dict={
supports_ph: supports,
weights_ph: weights,
target_support_ph: target_support
})

def testMultiDimensionalTargetSupport(self):
supports = tf.constant([[0, 2, 4, 6, 8], [3, 4, 5, 6, 7]], dtype=tf.float32)
weights = tf.constant(
[[0.1, 0.2, 0.3, 0.2, 0.2], [0.1, 0.2, 0.3, 0.2, 0.2]],
dtype=tf.float32)
target_support = tf.constant([[3]], dtype=tf.float32)
with self.assertRaisesRegexp(ValueError, 'out of bounds'):
rainbow_agent.project_distribution(supports, weights, target_support)

def testMultiDimensionalTargetSupportWithPlaceholders(self):
supports = [[0, 2, 4, 6, 8], [3, 4, 5, 6, 7]]
supports_ph = tf.compat.v1.placeholder(tf.float32, None)
weights = [[0.1, 0.2, 0.3, 0.2, 0.2], [0.1, 0.2, 0.3, 0.2, 0.2]]
weights_ph = tf.compat.v1.placeholder(tf.float32, None)
target_support = [[3]]
target_support_ph = tf.compat.v1.placeholder(tf.float32, None)
projection = rainbow_agent.project_distribution(
supports_ph, weights_ph, target_support_ph, validate_args=True)
with self.test_session() as sess:
tf.compat.v1.global_variables_initializer().run()
with self.assertRaises(tf.errors.InvalidArgumentError):
sess.run(
projection,
feed_dict={
supports_ph: supports,
weights_ph: weights,
target_support_ph: target_support
})

上海交大果粒人工智能学者全栈工程师

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Google dopamine 搜索框架算法 -py 语言-立哥开发

# Copy Right 2020 Jacky Zong. All rights reserved.#coding=utf-8"""Tests for dopamine.agents.rainbow.rainbow_agent."""from __future__ import absolute_importfrom __future__ import divisionfrom __future__ import print_functionfrom dopamine.agents.dq
复制链接

扫一扫