Theano学习-scan循环

\(1.Scan\)

  1. 通用的一般形式,可用于循环
  2. 减少和映射(对维数循环)是特殊的 \(scan\)
  3. 对输入序列进行 \(scan\) 操作,每一步都能得到一个输出
  4. \(scan\) 能看到定义函数的前 \(k\) 个时间结果
  5. 给定初始值 \(z=0\)\(sum()\) 函数能扫描计算列表的和 \(z+x(i)\)
  6. 通常一个 \(for\) 循环能用 \(scan()\) 表示,\(scan\) 是最接近 \(Theano\) 中表示循环的方式
  7. 使用 \(scan\) 表示循环的好处:
    • 迭代数成为了符号图的一部分
    • \(GPU\) 传输最少(假如用了 \(GPU\) )
    • 执行比编译好的使用 \(Python\)\(for\) 循环快一些
    • 通过检测到的实际内存量降低整体内存使用量

\(Scan\) 例子:计算 \(tanh(x(t).dot(W) + b)\)

import theano
import theano.tensor as T
import numpy as np
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym), sequences = X)
compute_elementwise = theano.function(inputs = [X, W, b_sym], outputs = results)
x = np.eye(2, dtype = theano.config.floatX)
w = np.ones((2, 2), dtype = theano.config.floatX)
b = np.ones((2), dtype = theano.config.floatX)
b[1] = 2
print('x: ', x)
print('w: ', w)
print('b: ', b)
print(compute_elementwise(x, w, b))
print(np.tanh(x.dot(w) + b))
#####
print 
x:  [[ 1.  0.]
 [ 0.  1.]]
w:  [[ 1.  1.]
 [ 1.  1.]]
b:  [ 1.  2.]
[[ 0.96402758  0.99505475]
 [ 0.96402758  0.99505475]]
[[ 0.96402758  0.99505475]
 [ 0.96402758  0.99505475]]
#####

\(Scan\) 例子:计算序列 \(x(t)=tanh(x(t-1).dot(W)+y(t).dot(U)+p(T-t).dot(V))\)

import theano
import theano.tensor as T
import numpy as np
X = T.vector('X')
W = T.matrix('W')
b_sym = T.vector('b_sym')
U = T.matrix('U')
Y = T.matrix('Y')
V = T.matrix('V')
P = T.matrix('P')
results, updates = theano.scan(lambda y, p, x_tml: T.tanh(T.dot(x_tml, W) + T.dot(y, U) + T.dot(p, V)), sequences = [Y, P[::-1]], outputs_info = [X])
compute_seq = theano.function(inputs = [X, W, Y, U, P, V], outputs = results)
x = np.zeros((2), dtype = theano.config.floatX)
x[1] = 1
w = np.ones((2, 2), dtype = theano.config.floatX)
y = np.ones((5, 2), dtype = theano.config.floatX)
y[0, :] = -3
u = np.ones((2, 2), dtype = theano.config.floatX)
p = np.ones((5, 2), dtype = theano.config.floatX)
p[0, :] = 3
v = np.ones((2, 2), dtype = theano.config.floatX)
print(compute_seq(x, w, y, u, p, v))
x_res = np.zeros((5, 2), dtype = theano.config.floatX)
x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
for i in range(1, 5):
    x_res[i] = np.tanh(x_res[i - 1].dot(w) + y[i].dot(u) + p[4- i].dot(v))
print(x_res)
#####print
[[-0.99505475 -0.99505475]
 [ 0.96471973  0.96471973]
 [ 0.99998585  0.99998585]
 [ 0.99998771  0.99998771]
 [ 1.          1.        ]]
[[-0.99505475 -0.99505475]
 [ 0.96471973  0.96471973]
 [ 0.99998585  0.99998585]
 [ 0.99998771  0.99998771]
 [ 1.          1.        ]]
 #####

\(Scan\) 例子:计算 \(X\) 的行范数

import theano
import theano.tensor as T
import numpy as np
X = T.matrix("X")
results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences = [X])
compute_norm_lines = theano.function(inputs = [X], outputs = results)
x = np.diag(np.arange(1, 6, dtype = theano.config.floatX), 1)
print(compute_norm_lines(x))
print(np.sqrt((x ** 2).sum(1)))
#####print
[ 1.  2.  3.  4.  5.  0.]
[ 1.  2.  3.  4.  5.  0.]
#####

\(Scan\) 例子:计算 \(X\) 的列范数

import theano
import theano.tensor as T
import numpy as np
X = T.matrix("X")
results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences = [X.T])
compute_norm_cols = theano.function(inputs = [X], outputs = results)
x = np.diag(np.arange(1, 6, dtype = theano.config.floatX), 1)
print(compute_norm_cols(x))
print(np.sqrt((x ** 2).sum(0)))
#####print
[ 0.  1.  2.  3.  4.  5.]
[ 0.  1.  2.  3.  4.  5.]
#####

\(Scan\) 例子:计算 \(X\) 的迹

import theano
import theano.tensor as T
import numpy as np
floatX = 'float32'
X = T.matrix("X")
results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i, j] + t_f, floatX), sequences = [T.arange(X.shape[0]), T.arange(X.shape[1])], outputs_info = np.asarray(0, dtype = floatX))
result = results[-1]
compute_trace = theano.function(inputs = [X], outputs = result)
x = np.eye(5, dtype = theano.config.floatX)
x[0] = np.arange(5, dtype = theano.config.floatX)
print(compute_trace(x))
print(np.diagonal(x).sum())
#####print
4.0
4.0
#####

\(Scan\) 例子:计算序列 \(x(t)=x(t-2).dot(U)+x(t-1).dot(V)+tanh(x(t-1).dot(W)+b)\)

import theano
import theano.tensor as T
import numpy as np
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
V = T.matrix("V")
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda x_tm2, x_tm1: T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T.dot(x_tm1, W) + b_sym), n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=results)
x = np.zeros((2, 2), dtype=theano.config.floatX)
x[1, 1] = 1
w = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
u = 0.5 * (np.ones((2, 2), dtype=theano.config.floatX) - np.eye(2, dtype=theano.config.floatX))
v = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
n = 10
b = np.ones((2), dtype=theano.config.floatX)
print(compute_seq2(x, u, v, w, b, n))
x_res = np.zeros((10, 2))
x_res[0] = x[0].dot(u) + x[1].dot(v) + np.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) + np.tanh(x_res[0].dot(w) + b)
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) + np.tanh(x_res[1].dot(w) + b)
for i in range(2, 10):
    x_res[i] = (x_res[i - 2].dot(u) + x_res[i - 1].dot(v) + np.tanh(x_res[i - 1].dot(w) + b))
print(x_res)
#####print
[[  1.40514825   1.40514825]
 [  2.88898899   2.38898899]
 [  4.34018291   4.34018291]
 [  6.53463142   6.78463142]
 [  9.82972243   9.82972243]
 [ 14.22203814  14.09703814]
 [ 20.07439936  20.07439936]
 [ 28.12291843  28.18541843]
 [ 39.1913681   39.1913681 ]
 [ 54.28407732  54.25282732]]
[[  1.40514825   1.40514825]
 [  2.88898899   2.38898899]
 [  4.34018291   4.34018291]
 [  6.53463142   6.78463142]
 [  9.82972243   9.82972243]
 [ 14.22203814  14.09703814]
 [ 20.07439936  20.07439936]
 [ 28.12291843  28.18541843]
 [ 39.1913681   39.1913681 ]
 [ 54.28407732  54.25282732]]
#####

\(Scan\) 例子:计算 \(y=tanh(v.dot(A))\)\(Jacobian\)

import theano
import theano.tensor as T
import numpy as np
v = T.vector()
A = T.matrix()
y = T.tanh(T.dot(v, A))
results, updates = theano.scan(lambda i: T.grad(y[i], v), sequences = [T.arange(y.shape[0])])
compute_jac_t = theano.function([A, v], results, allow_input_downcast = True)
x = np.eye(5, dtype = theano.config.floatX)[0]
w = np.eye(5, 3, dtype = theano.config.floatX)
w[2] = np.ones((3), dtype = theano.config.floatX)
print(compute_jac_t(w, x))
print(((1 - np.tanh(x.dot(w)) ** 2) * w).T)
#####print
[[ 0.41997434  0.          0.41997434  0.          0.        ]
 [ 0.          1.          1.          0.          0.        ]
 [ 0.          0.          1.          0.          0.        ]]
[[ 0.41997434  0.          0.41997434  0.          0.        ]
 [ 0.          1.          1.          0.          0.        ]
 [ 0.          0.          1.          0.          0.        ]]
#####

注意:遍历的是 \(y\) 的索引,而不是 \(y\) 的元素. 因为 \(scan\) 函数为内部函数创建了一个占位符变量,这个占位符变量和替代它的其他变量不同.

\(Scan\) 例子:计算 \(scan\) 函数的循环次数

import theano
import theano.tensor as T
k = theano.shared(0)
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda: {k: (k + 1)}, n_steps = n_sym)
accumulator = theano.function([n_sym], [], updates = updates, allow_input_downcast = True)
print(k.get_value())  #print 0
accumulator(5)
print(k.get_value())  #print 5

\(Scan\) 例子:计算 \(tanh(v.dot(W) + b) * d\) 其中 \(d\) 服从二项分布

import theano
import theano.tensor as T
import numpy as np
X = T.matrix('X')
W = T.matrix('W')
b_sym = T.vector('b_sym')
trng = T.shared_randomstreams.RandomStreams(1234)
d = trng.binomial(size = W[1].shape)
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d, sequences = X)
compute_with_bnoise = theano.function(inputs = [X, W, b_sym], outputs = results, updates = updates, allow_input_downcast = True)
x = np.eye(10, 2, dtype = theano.config.floatX)
w = np.ones((2, 2), dtype = theano.config.floatX)
b = np.ones((2), dtype = theano.config.floatX)
print(compute_with_bnoise(x, w, b))
#####print
[[ 0.96402758  0.        ]
 [ 0.          0.96402758]
 [ 0.          0.        ]
 [ 0.76159416  0.76159416]
 [ 0.76159416  0.        ]
 [ 0.          0.76159416]
 [ 0.          0.76159416]
 [ 0.          0.76159416]
 [ 0.          0.        ]
 [ 0.76159416  0.76159416]]
#####

注意:假如你不想随机变量 \(d\)\(scan\) 函数循环是发生变换,就应该把它作为 \(non_sequences\) 参数.

\(Scan\) 例子:计算 \(pow(A, k)\)

import theano
import theano.tensor as T
theano.config.warn.subtensor_merge_bug = False
k = T.iscalar('k')
A = T.vector('A')
def inner_fct(prior_result, B):
    return prior_result * B

result, updates = theano.scan(fn = inner_fct, outputs_info = T.ones_like(A), non_sequences = A, n_steps = k)
final_result = result[-1]
power = theano.function(inputs = [A, k], outputs = final_result, updates = updates)
print(power(range(10), 2))
#####print
[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]
#####

\(Scan\) 例子:计算多项式

import theano
import theano.tensor as T
import numpy
theano.config.warn.subtensor_merge_bug = False
coefficients = theano.tensor.vector("coefficients")
x = T.scalar('x')
max_coefficients_supported = 10000
full_range = theano.tensor.arange(max_coefficients_supported)
components, updates = theano.scan(fn = lambda coeff, power, free_var: coeff * (free_var ** power), outputs_info = None, sequences = [coefficients, full_range], non_sequences = x)
polynomial = components.sum()
calculate_polynomial = theano.function(inputs = [coefficients, x], outputs = polynomial)
test_coeff = numpy.asarray([1, 0, 2], dtype = numpy.float32)
print(calculate_polynomial(test_coeff, 3))
#####print
19.0
#####

转载于:https://www.cnblogs.com/shihuayun/p/7161571.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值