python中nonzero_Python tensorflow 模块,count_nonzero() 实例源码 - 编程字典

def build_reward(self):

with tf.name_scope('permutations'):

# Reorder input % tour

self.permutations = tf.stack([tf.tile(tf.expand_dims(tf.range(self.batch_size,dtype=tf.int32),1),[1,self.max_length+2]),self.positions],2)

self.ordered_input_ = tf.gather_nd(self.input_,self.permutations)

self.ordered_input_ = tf.transpose(self.ordered_input_,[2,1,0]) # [batch size, seq length +1 , features] to [features, seq length +1, batch_size] Rq: +1 because end = start = depot

# Ordered coordinates

ordered_x_ = self.ordered_input_[0] # [seq length +1, batch_size]

delta_x2 = tf.transpose(tf.square(ordered_x_[1:]-ordered_x_[:-1]),[1,0]) # [batch_size, seq length] delta_x**2

ordered_y_ = self.ordered_input_[1] # [seq length +1, batch_size]

delta_y2 = tf.transpose(tf.square(ordered_y_[1:]-ordered_y_[:-1]),[1,0]) # [batch_size, seq length] delta_y**2

# Ordered TW constraints

self.ordered_tw_mean_ = tf.transpose(self.ordered_input_[2][:-1],[1,0]) # [seq length, batch_size] to [batch_size, seq length]

self.ordered_tw_width_ = tf.transpose(self.ordered_input_[3][:-1],[1,0]) # [seq length, batch_size] to [batch_size, seq length]

self.ordered_tw_open_ = self.ordered_tw_mean_ - self.ordered_tw_width_/2

self.ordered_tw_close_ = self.ordered_tw_mean_ + self.ordered_tw_width_/2

with tf.name_scope('environment'):

# Get tour length (euclidean distance)

inter_city_distances = tf.sqrt(delta_x2+delta_y2) # sqrt(delta_x**2 + delta_y**2) this is the euclidean distance between each city: depot --> ... ---> depot [batch_size, seq length]

self.distances = tf.reduce_sum(inter_city_distances, axis=1) # [batch_size]

variable_summaries('tour_length',self.distances, with_max_min = True)

# Get time at each city if no constraint

self.time_at_cities = (1/self.speed)*tf.cumsum(inter_city_distances, axis=1, exclusive=True)-10 # [batch size, seq length] # Rq: -10 to be on time at depot (t_mean centered)

# Apply constraints to each city

self.constrained_delivery_time = []

cumul_lateness = 0

for time_open, delivery_time in zip(tf.unstack(self.ordered_tw_open_,axis=1), tf.unstack(self.time_at_cities,axis=1)): # Unstack % seq length

delayed_delivery = delivery_time + cumul_lateness

cumul_lateness += tf.maximum(time_open-delayed_delivery,tf.zeros([self.batch_size])) # if you have to wait... wait (impacts further states)

self.constrained_delivery_time.append(delivery_time+cumul_lateness)

self.constrained_delivery_time = tf.stack(self.constrained_delivery_time,1)

# Define delay from lateness

self.delay = tf.maximum(self.constrained_delivery_time-self.ordered_tw_close_-0.0001, tf.zeros([self.batch_size,self.max_length+1])) # Delay perceived by the client (doesn't care if the deliver waits..)

self.delay = tf.count_nonzero(self.delay,1)

variable_summaries('delay',tf.cast(self.delay,tf.float32), with_max_min = True)

# Define reward from tour length & delay

self.reward = tf.cast(self.distances,tf.float32)+self.beta*tf.sqrt(tf.cast(self.delay,tf.float32))

variable_summaries('reward',self.reward, with_max_min = True)

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值