ATTENTION NETWORK分析

最新推荐文章于 2022-08-03 10:15:01 发布

weixin_30432007

最新推荐文章于 2022-08-03 10:15:01 发布

阅读量177

点赞数

文章标签： python

原文链接：http://www.cnblogs.com/valleyofwind/p/8630048.html

版权

1. TensorFlowTrainable类

 1 class TensorFlowTrainable(object):
 2     def __init__(self):  3 self.parameters = []  4  5 def get_weights(self, dim_in, dim_out, name, trainable=True):  6 shape = (dim_out, dim_in)  7 weightsInitializer = tf.constant_initializer(  8 self.truncated_normal(shape=shape, stddev=0.01, mean=0.))  9 weights = tf.get_variable( 10 initializer=weightsInitializer, shape=shape, trainable=True, name=name) 11 if trainable: 12  self.parameters.append(weights) 13 return weights 14 def get_4Dweights(self, filter_height, filter_width, in_channels, out_channels, name, trainable=True): 15 shape = (filter_height, filter_width, in_channels, out_channels) 16 weightsInitializer = tf.constant_initializer( 17 self.truncated_normal(shape=shape, stddev=0.01, mean=0)) 18 weights = tf.get_variable( 19 initializer=weightsInitializer, shape=shape, trainable=True, name=name) 20 if trainable: 21  self.parameters.append(weights) 22 return weights 23 def get_biases(self, dim_out, name, trainable=True): 24 shape = (dim_out, 1) 25 initialBiases = tf.constant_initializer(np.zeros(shape)) 26 biases = tf.get_variable( 27 initializer=initialBiases, shape=shape, trainable=True, name=name) 28 if trainable: 29  self.parameters.append(biases) 30 return biases 31  @staticmethod 32 def truncated_normal(shape, stddev, mean=0.): 33 rand_init = np.random.normal(loc=mean, scale=stddev, size=shape) 34 inf_mask = rand_init < (mean - 2 * stddev) 35 rand_init = rand_init * \ 36 np.abs(1 - inf_mask) + inf_mask * (mean - 2 * stddev) 37 sup_mask = rand_init > (mean + 2 * stddev) 38 rand_init = rand_init * \ 39 np.abs(1 - sup_mask) + sup_mask * (mean + 2 * stddev) 40 return rand_init

@staticmethod

静态方法，类可以不用实例化就可以调用该方法，当然也可以实例化后调用。

所以要注意这里前面几个函数用到的self.truncated_normal()并不是一开始我以为的tf.truncated_normal()这个正态分布函数（我就奇怪为什么是self.而不是tf.，名字一样的0.0）。

那么这个函数传入参数为shape和stddev，形状和标准差。返回一个形状为shape的截断正态分布数组。

其余函数，get_weights是得到shape=(dim_out, dim_in)的截断正太分布权重,get_4Dweights是得到shape=(filter_height, filter_width, in_channels, out_channels)的截断正态分布权重，get_biases是得到shape=(dim_out, 1)的初始零向量偏置。

2. LSTMCell类

 1 class LSTMCell(TensorFlowTrainable):
 2     def __init__(self, num_units, **kwargs):
 3         super(LSTMCell, self).__init__()
 4         self._num_units = num_units  # 单元的个数
 5         self.w_i = self.get_weights(
 6             dim_in=2 * self._num_units, dim_out=self._num_units, name="w_i")  # 输入门权重
 7         self.w_f = self.get_weights(dim_in=2 * self._num_units, dim_out=self._num_units, name="w_f")  # 忘记门权重
 8         self.w_o = self.get_weights(dim_in=2 * self._num_units, dim_out=self._num_units, name="w_o")  # 输出门权重
 9         self.w_c = self.get_weights(dim_in=2 * self._num_units, dim_out=self._num_units, name="w_c")  # 数据输入权重
10         self.b_i = self.get_biases(dim_out=self._num_units, name="b_i")  # 输入门偏重
11         self.b_f = self.get_biases(dim_out=self._num_units, name="b_f")  # 忘记门偏重
12         self.b_o = self.get_biases(dim_out=self._num_units, name="b_o")  # 输出门偏重
13         self.b_c = self.get_biases(dim_out=self._num_units, name="b_c")  # 数据输入偏重
14         self.c = [self.get_biases(dim_out=self._num_units, name="c", trainable=False)]  # 记忆细胞状态偏重
15     def initialize_something(self, input):
16         # 对输入做一定的变换，包括转置、展开、扩展为度等，并把数值初始化为1
17         self.batch_size_vector = 1 + 0 * tf.expand_dims(tf.unstack(tf.transpose(input, [1, 0]))[0], 0)
18         # 初始化
19         self.h = [self.get_biases(dim_out=self._num_units, name="h", trainable=False) * self.batch_size_vector]
20 
21     def process(self, input, **kwargs):
22         H = tf.concat([tf.transpose(input, perm=[1, 0]),self.h[-1]], 0)  # 将输入数据与上一时刻的记忆信息整合成一个新的输入
23         i = tf.sigmoid(x=tf.add(tf.matmul(self.w_i, H), self.b_i))  # 经过输入门后的数据
24         f = tf.sigmoid(x=tf.add(tf.matmul(self.w_f, H), self.b_f))  # 经过忘记门后的数据
25         o = tf.sigmoid(x=tf.add(tf.matmul(self.w_o, H), self.b_o))  # 经过输出门后的数据
26         c = f * self.c[-1] + i * tf.tanh(x=tf.add(tf.matmul(self.w_c, H), self.b_c))
27         # 原代码：h = o * tf.tanh(x=self.c[-1])
28         h = o * tf.tanh(x=self.c[-1])
29         self.c.append(c)
30         self.h.append(h)
31 
32     @property
33     def features(self):
34         return self.h[-1]  # 将最后一个的向量输出

View Code

tf.transpose(input, [dimension_1, dimenaion_2,..,dimension_n]): 这里[1, 0]就是把第０，１维交换位置了。

tf.stack（）这是一个矩阵拼接的函数，tf.unstack（）则是一个矩阵分解的函数.

stack把两个矩阵按某个轴拼接起来，与tf.concat有所区分。

如拼接两个shape=(4, 3)的矩阵:

concat拼接axis=0后的矩阵是shape=(8, 3),拼接axis=1后，shape=(4,6)

stack拼接axis=0后的矩阵是shape=(2, 4, 3),拼接axis=1后的矩阵是shape=(4, 2, 3),拼接axis=0后的矩阵是shape=(4, 3, 1),

input.shape=(m, n)

H.shape=(2n, m)

i.shape=(n, m)

c.shape=(n, m)

h.shape=(n, m)

@property 装饰器

用装饰器函数把 get/set 方法“装饰”成属性调用：

 1 class Student(object):
 2     def __init__(self, name, score):
 3         self.name = name
 4         self.__score = score
 5     def get_score(self):
 6         return self.__score
 7     def set_score(self, score):
 8         if score < 0 or score > 100:
 9             raise ValueError('invalid score')
10         self.__score = score

- >

 1 class Student(object):
 2     def __init__(self, name, score):
 3         self.name = name
 4         self.__score = score
 5     @property
 6     def score(self):
 7         return self.__score
 8     @score.setter
 9     def score(self, score):
10         if score < 0 or score > 100:
11             raise ValueError('invalid score')
12         self.__score = score