重复造轮子没啥意义,但是手打一遍还是有收获。
import tensorflow as tf
import random
from collections import namedtuple
import math
import numpy as np
def huber_loss(y_true, y_pred, clip_delta=1.0):
'''
Huber_loss:回归loss函数,专用于DQN算法
:param y_true: 真实集
:param y_pred: 预测集
:param clip_delta: 超参数,决定平方结果随误差变化的速度
:return: loss值
'''
error = y_true - y_pred
cond = tf.keras.backend.abs(error) < clip_delta
squared_loss = 0.5 * tf.keras.backend.square(error)
linear_loss = clip_delta * (tf.keras.backend.abs(error) - 0.5 * clip_delta)
return tf.where(cond, squared_loss, linear_loss)
def huber_loss_mean(y_true, y_pred, clip_delta=1.0):
return tf.keras.backend.mean(huber_loss(y_true, y_pred, clip_delta))
class BasicPool(object):
def __init__(self, capacity):
'''
基于python内置环境,开辟数据池空间。理论上更快。
:param capacity: 数据池容量
'''
self.capacity = capacity
self.memory = []
self.position = 0
self.Transition = namedtuple('Transition',('state', 'action', 'next_state', 'reward'))
def push(self, *args):
'''
存入任意参数
'''
if len(self.memory) < self.capacity:
self.memory.append(None)
self.memory[self.position] = self.Transition(*args)
self.positi