Theano Tutorial文档翻译（七） - 循环

最新推荐文章于 2020-09-24 04:30:24 发布

Viggo_yuan

最新推荐文章于 2020-09-24 04:30:24 发布

阅读量361

点赞数

分类专栏： Theano

Theano 专栏收录该内容

10 篇文章 0 订阅

订阅专栏

原文地址：http://deeplearning.net/software/theano/tutorial/loop.html

Scan

一个常用的循环形式，可以用于循环操作。
Reduction和map是scan的特殊例子。(在主要维度循环)
你扫描一个函数的输入值，在每一步输出一个输出值。
这个函数可以看你函数的前k步。
sum()可以通过在一个列表扫描z+x(i)，初始状态为z=0。
经常用scan()来表达循环操作，Theano的循环最接近scan()。
使用scan()的好处：
- 迭代的次数是符号图的一部分
- 最小化GPU的转变。
- 连续步骤的梯度计算。
- 在Theano里比Python的循环要快一点
- 可以减少总体的储存使用通过估计准确的储存使用。

Scan的例子：依次计算 tanh(x(t.dot(W) + b)

import theano
import theano,tensor as T
import numpy as np

X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")

results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym), sequences=X)
compute_elementwise = theano.function(inputs=[X,W,b_sym], outputs=results)

x = np.eye(2, dtype=theano.config.floatX)
w = np.ones((2,2), dtype=theano.config.floatX)
b = np.ones((2), dtype=theano.config.floatX)
b[1] = 2

print(compute_elementwise(x,w,b))
print(np.tanh(x.dot(w)+b))

[[ 0.96402758  0.99505475]
 [ 0.96402758  0.99505475]]
[[ 0.96402758  0.99505475]
 [ 0.96402758  0.99505475]]

Scan的例子：计算 x(t) = tanh(x(t)-1).dat(W) + y(t).dot(U) + p(T - t).dot(V))

import theano
import theano.tensor as T
import numpy as np

X = T.vector("X")
W = T.vector("W")
b_sym = T.vector("b_sym")
U = T,matrix("U")
Y = T.matrix("Y")
V = T.matrix("V")
P = T.matrix("P")

results, updates = theano.scan(lambda y,p,x_tm1: T.tanh(T.dot(x_tm1, W) + T.dot(y,U) + T.dot(p,V)), sequences=[Y,P[::-1]], outputs_info=[X])

[[-0.99505475 -0.99505475]
 [ 0.96471973  0.96471973]
 [ 0.99998585  0.99998585]
 [ 0.99998771  0.99998771]
 [ 1.          1.        ]]
[[-0.99505475 -0.99505475]
 [ 0.96471973  0.96471973]
 [ 0.99998585  0.99998585]
 [ 0.99998771  0.99998771]
 [ 1.          1.        ]]

Scan的例子：计算矩阵的范数

import theano
import theano.tensor as T
import numpy as np

# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X.T])
compute_norm_cols = theano.function(inputs=[X], outputs=results)

# test value
x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
print(compute_norm_cols(x))

# comparison with numpy
print(np.sqrt((x ** 2).sum(0)))

[ 0.  1.  2.  3.  4.  5.]
[ 0.  1.  2.  3.  4.  5.]

Scan的例子：计算X的秩

import theano
import theano.tensor as T
import numpy as np
floatX = "float32"

# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i, j] + t_f, floatX),
                  sequences=[T.arange(X.shape[0]), T.arange(X.shape[1])],
                  outputs_info=np.asarray(0., dtype=floatX))
result = results[-1]
compute_trace = theano.function(inputs=[X], outputs=result)

# test value
x = np.eye(5, dtype=theano.config.floatX)
x[0] = np.arange(5, dtype=theano.config.floatX)
print(compute_trace(x))

# comparison with numpy
print(np.diagonal(x).sum())

4.0
4.0

Scan的例子：计算x(t) = x(t - 2).dot(U) + x(t - 1).dot(V) + tanh(x(t - 1).dot(W) + b)

import theano
import theano.tensor as T
import numpy as np

# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
V = T.matrix("V")
n_sym = T.iscalar("n_sym")

results, updates = theano.scan(lambda x_tm2, x_tm1: T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T.dot(x_tm1, W) + b_sym),
                    n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=results)

# test values
x = np.zeros((2, 2), dtype=theano.config.floatX) # the initial value must be able to return x[-2]
x[1, 1] = 1
w = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
u = 0.5 * (np.ones((2, 2), dtype=theano.config.floatX) - np.eye(2, dtype=theano.config.floatX))
v = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
n = 10
b = np.ones((2), dtype=theano.config.floatX)

print(compute_seq2(x, u, v, w, b, n))

# comparison with numpy
x_res = np.zeros((10, 2))
x_res[0] = x[0].dot(u) + x[1].dot(v) + np.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) + np.tanh(x_res[0].dot(w) + b)
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) + np.tanh(x_res[1].dot(w) + b)
for i in range(2, 10):
    x_res[i] = (x_res[i - 2].dot(u) + x_res[i - 1].dot(v) +
                np.tanh(x_res[i - 1].dot(w) + b))
print(x_res)

[[  1.40514825   1.40514825]
 [  2.88898899   2.38898899]
 [  4.34018291   4.34018291]
 [  6.53463142   6.78463142]
 [  9.82972243   9.82972243]
 [ 14.22203814  14.09703814]
 [ 20.07439936  20.07439936]
 [ 28.12291843  28.18541843]
 [ 39.1913681   39.1913681 ]
 [ 54.28407732  54.25282732]]
[[  1.40514825   1.40514825]
 [  2.88898899   2.38898899]
 [  4.34018291   4.34018291]
 [  6.53463142   6.78463142]
 [  9.82972243   9.82972243]
 [ 14.22203814  14.09703814]
 [ 20.07439936  20.07439936]
 [ 28.12291843  28.18541843]
 [ 39.1913681   39.1913681 ]
 [ 54.28407732  54.25282732]]

Scan的例子：计算y=tanh(v.dot(A))关于x的雅可比

import theano
import theano.tensor as T
import numpy as np

# define tensor variables
v = T.vector()
A = T.matrix()
y = T.tanh(T.dot(v, A))
results, updates = theano.scan(lambda i: T.grad(y[i], v), sequences=[T.arange(y.shape[0])])
compute_jac_t = theano.function([A, v], results, allow_input_downcast=True) # shape (d_out, d_in)

# test values
x = np.eye(5, dtype=theano.config.floatX)[0]
w = np.eye(5, 3, dtype=theano.config.floatX)
w[2] = np.ones((3), dtype=theano.config.floatX)
print(compute_jac_t(w, x))

# compare with numpy
print(((1 - np.tanh(x.dot(w)) ** 2) * w).T)

[[ 0.41997434  0.          0.41997434  0.          0.        ]
 [ 0.          1.          1.          0.          0.        ]
 [ 0.          0.          1.          0.          0.        ]]
[[ 0.41997434  0.          0.41997434  0.          0.        ]
 [ 0.          1.          1.          0.          0.        ]
 [ 0.          0.          1.          0.          0.        ]]

Scan的例子：计算累计和

import theano
import theano.tensor as T
import numpy as np

# define shared variables
k = theano.shared(0)
n_sym = T.iscalar("n_sym")

results, updates = theano.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast=True)

k.get_value()
accumulator(5)
k.get_value()

Scan的例子：计算 tanh(v.dot(W) + b) * d 当d是个二项式

import theano
import theano.tensor as T
import numpy as np

# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")

# define shared random stream
trng = T.shared_randomstreams.RandomStreams(1234)
d=trng.binomial(size=W[1].shape)

results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d, sequences=X)
compute_with_bnoise = theano.function(inputs=[X, W, b_sym], outputs=results,
                          updates=updates, allow_input_downcast=True)
x = np.eye(10, 2, dtype=theano.config.floatX)
w = np.ones((2, 2), dtype=theano.config.floatX)
b = np.ones((2), dtype=theano.config.floatX)

print(compute_with_bnoise(x, w, b))

[[ 0.96402758  0.        ]
 [ 0.          0.96402758]
 [ 0.          0.        ]
 [ 0.76159416  0.76159416]
 [ 0.76159416  0.        ]
 [ 0.          0.76159416]
 [ 0.          0.76159416]
 [ 0.          0.76159416]
 [ 0.          0.        ]
 [ 0.76159416  0.76159416]]

Scan的例子：计算pow(A， k)

import theano
import theano.tensor as T
theano.config.warn.subtensor_merge_bug = False

k = T.iscalar("k")
A = T.vector("A")

def inner_fct(prior_result, B):
    return prior_result * B

# Symbolic description of the result
result, updates = theano.scan(fn=inner_fct,
                            outputs_info=T.ones_like(A),
                            non_sequences=A, n_steps=k)

# Scan has provided us with A ** 1 through A ** k.  Keep only the last
# value. Scan notices this and does not waste memory saving them.
final_result = result[-1]

power = theano.function(inputs=[A, k], outputs=final_result,
                      updates=updates)

print(power(range(10), 2))

[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]

Scan的例子：计算一个多项式

import numpy
import theano
import theano.tensor as T
theano.config.warn.subtensor_merge_bug = False

coefficients = theano.tensor.vector("coefficients")
x = T.scalar("x")
max_coefficients_supported = 10000

# Generate the components of the polynomial
full_range=theano.tensor.arange(max_coefficients_supported)
components, updates = theano.scan(fn=lambda coeff, power, free_var:
                                   coeff * (free_var ** power),
                                outputs_info=None,
                                sequences=[coefficients, full_range],
                                non_sequences=x)

polynomial = components.sum()
calculate_polynomial = theano.function(inputs=[coefficients, x],
                                     outputs=polynomial)

test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32)
print(calculate_polynomial(test_coeff, 3))