Python--Theano学习笔记(一)--基础1
1、标量运算
import numpy
import theano.tensor as T
from theano import function
#定义零维数组(即标量)
#利用T.dscalar('a')可实现变量a代替给定的浮点标量
x = T.dscalar('x')
y = T.dscalar('y')
#定义z为x与y的和。可通过print(theano.pp(z))来查询z的内容
z = x + y
#function([input],output)用来定义输入输出
f = function([x, y], z)
>>> f(2, 3)
array(5.0)
>>> numpy.allclose(f(16.3, 12.1), 28.4)
True
2、矩阵运算
#定义矩阵变量,dmatrix表示double类型的矩阵,其他和代数运算类似。
x = T.dmatrix('x')
y = T.dmatrix('y')
z = x + y
f = function([x, y], z)
>>> f([[1,2],[1,2]],[[4,5],[6,7]])
array([[ 5., 7.],
[ 7., 9.]])
同理,可以实现标量与矩阵相加,矢量与矩阵相加,标量与矩阵相加,等等。这种性质遵循Numpy的广播准则。
可用的数据类型为:
3、函数方程
比简单的数值计算稍微复杂一点点。Theano-Tutorial中以逻辑函数S(x)为例。
import theano
import theano.tensor as T
x = T.dmatrix('x')
#定义逻辑函数表达式。
s = 1 / (1 + T.exp(-x))
logistic = theano.function([x], s)
>>> logistic([[-21, 1], [-1, -2]])
array([[ 7.58256042e-10, 7.31058579e-01],
[ 2.68941421e-01, 1.19202922e-01]])
4、实现多输出
import theano
import theano.tensor as T
x = T.dmatrix('x')
a, b = T.dmatrices('a', 'b')
diff = a - b
abs_diff = abs(diff)
diff_squared = diff**2
f = theano.function([a, b], [diff, abs_diff, diff_squared])
>>> print f([[423,5],[54,654],[3,2]],[[53,65],[342,5],[0,-12]])
[array([[ 370., -60.],
[-288., 649.],
[ 3., 14.]]), array([[ 370., 60.],
[ 288., 649.],
[ 3., 14.]]), array([[ 1.36900000e+05, 3.60000000e+03],
[ 8.29440000e+04, 4.21201000e+05],
[ 9.00000000e+00, 1.96000000e+02]])]
5、默认值设置
import theano
import theano.tensor as T
from theano import In
from theano import function
x, y = T.dscalars('x', 'y')
z = x + y
#利用In为y设置默认值为2.5(当未输入y的情况下)
f = function([x, In(y, value=2.5)], z)
>>> f(35)
array(37.5)
>>> f(35,1)
array(36.0)
同时,可以通过In定义变量的名称:
import theano
import theano.tensor as T
from theano import In
from theano import function
x, y, w = T.dscalars('x', 'y', 'w')
z = (x + y) * w
f = function([x, In(y, value=1), In(w, value=2, name='w_by_name')], z)
>>> f(33)
array(68.0)
>>> f(33,2)
array(70.0)
>>> f(33,0,1)
array(33.0)
>>> f(33,w_by_name=1)
array(34.0)
>>> f(33,w_by_name=1,y=-1)
array(32.0)
这时已经将w的名称定义为w_by_name,如果仍然给w赋值,就会出现问题:
>>> f(33,w=1,y=-1)
Traceback (most recent call last):
Python Shell, prompt 8, line 1
File "D:\Anaconda\Lib\site-packages\theano\compile\function_module.py", line 801, in __call__
self[k] = arg
File "D:\Anaconda\Lib\site-packages\theano\compile\function_module.py", line 548, in __setitem__
self.value[item] = value
File "D:\Anaconda\Lib\site-packages\theano\compile\function_module.py", line 499, in __setitem__
(str(item), msg))
<p><span style="color:#ff0000;">TypeError: Unknown input or state: w. The function has 3 named inputs (x, y, w_by_name).</span></p>
6、共享变量(updates与givens的用法)
from theano import sharedfrom theano import shared
from theano import tensor as T
from theano import function
#定义共享变量state,可通过.get_value()来查询;通过.set_value()来重置新值。
state = shared(0)
inc = T.iscalar('inc')
#输入为'inc',输出为当前状态的state,然后通过参数updates将state+inc赋值给state。
accumulator = function([inc], state, updates=[(state, state+inc)])
>>> state.get_value()
array(0)
>>> accumulator(20)
array(0)
>>> state.get_value()
array(20)
>>> accumulator(10)
array(20)
>>> state.get_value()
array(30)
>>> state.set_value(1)
>>> state.get_value()
array(1)
共享变量可应用于不同的function中,例如再定义一个减法:
decrementor = function([inc], state, updates=[(state, state-inc)])
>>> state.get_value()
array(0)
>>> accumulator(10)
array(0)
>>> state.get_value()
array(10)
>>> decrementor(1)
array(10)
>>> state.get_value()
array(9)
有些情况下,我们所用的function中需要调用某共享变量,但并不想用它的值,此时,我们使用function的givens参数来进行修改。
<pre name="code" class="python">import theano
from theano import tensor as T
from theano import function
from theano import shared
#定义state初始值
state = shared(0)
inc = T.iscalar('inc')
fn_of_state = state * 2 + inc
foo = T.scalar(dtype=state.dtype) #这里foo要与state同型。
#givens参数可将foo的值临时赋给state,而不改变state的值。
#也就是说,这里这是引用了一个名为state的共享变量,而并没有引用、也没有改变它的值。
skip_shared = function([inc, foo], fn_of_state, givens=[(state, foo)])
skip_shared(1, 3)
>>> state.get_value()
array(0)
>>> skip_shared(3,3)
array(9)
>>> state.get_value()
array(0)
7、复制function
import theano
import theano.tensor as T
state = theano.shared(0)
inc = T.iscalar('inc')
accumulator = theano.function([inc], state, updates=[(state, state+inc)])
#用.copy复制function,并swap其变量为new_state
new_state = theano.shared(0)
new_accumulator = accumulator.copy(swap={state:new_state})
>>> state.get_value()
array(0)
>>> new_state.get_value()
array(0)
>>> accumulator(1)
array(0)
>>> new_accumulator(2)
[array(0)]
>>> state.get_value()
array(1)
>>> new_state.get_value()
array(2)
8、随机数
from theano.tensor.shared_randomstreams import RandomStreams
from theano import function
srng = RandomStreams(seed=234)
#rv_u为满足均匀分布的2x2矩阵的随机流;rv_n为满足正态分布的2x2矩阵的随机流。
rv_u = srng.uniform((2,2))
rv_n = srng.normal((2,2))
f = function([], rv_u)
g = function([], rv_n, no_default_updates=True)
nearly_zeros = function([], rv_u + rv_u - 2 * rv_u) #其中三个随机数是分三次取得,并不相同。
>>> f()
array([[ 0.12672381, 0.97091597],
[ 0.13989098, 0.88754825]])
>>> f()
array([[ 0.31971415, 0.47584377],
[ 0.24129163, 0.42046081]])
>>> f()
array([[ 0.44078224, 0.26993381],
[ 0.14317277, 0.43571539]])
>>> g()
array([[ 0.37328447, -0.65746672],
[-0.36302373, -0.97484625]])
>>> g()
array([[ 0.37328447, -0.65746672],
[-0.36302373, -0.97484625]])
>>> nearly_zeros()
array([[ 0., 0.],
[ 0., 0.]])
可见,f()每次会抽取不同的随机数,而g()不会变。
9、种子流
可以单独播种也可以集体播种rng_val = rv_u.rng.get_value(borrow=True)
rng_val.seed(89234) #单独给rv_u重新播种
rv_u.rng.set_value(rng_val, borrow=True)
srng.seed(902340) #为rv_u和rv_n共同重新播种
10、functions之间共享流
from theano import function
from theano.tensor.shared_randomstreams import RandomStreams
srng = RandomStreams(seed=234)
rv_u = srng.uniform((2,2))
f = function([], rv_u)
nearly_zeros = function([], rv_u + rv_u- 2 * rv_u)
#记录state_after_v0
state_after_v0 =rv_u.rng.get_value().get_state()
nearly_zeros()
v1 = f()
#还原state为state_after_v0
rng = rv_u.rng.get_value(borrow=True)
rng.set_state(state_after_v0)
rv_u.rng.set_value(rng, borrow=True)
v2 = f()
v3 = f()
11、Theano图之间复制状态
创建两个Theano Graph
from __future__ import print_function
import theano
import numpy
import theano.tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams
from theano.tensor.shared_randomstreams import RandomStreams
class Graph():
def __init__(self, seed=123):
self.rng = RandomStreams(seed)
self.y = self.rng.uniform(size=(1,))
g1 = Graph(seed=123)
f1 = theano.function([], g1.y)
g2 = Graph(seed=987)
f2 = theano.function([], g2.y)
两个方程输出系统的默认值为:
>>> f1()
array([ 0.72803009])
>>> f2()
array([ 0.55056769])
复制状态:
def copy_random_state(g1, g2):
if isinstance(g1.rng, MRG_RandomStreams):
g2.rng.rstate = g1.rng.rstate
#打包并赋值。
for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
su2[0].set_value(su1[0].get_value())
>>> f2()
array([ 0.23715077])
>>> f1()
array([ 0.23715077])
此外,还可通过MRG或CURAND实现其他类型的随机分布。RandomStream只支持CPU,CURAND支持GPU,而MGR两者皆可。
12、一个简单的例子--逻辑回归函数的实现
import numpy
import theano
import theano.tensor as T
rng = numpy.random
N = 400 # 样本数量
feats = 784 # 样本维度,这里表示400张28x28的图片
# 生成D(输入样本(400x784的矩阵),标签)
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
training_steps = 10000
# 定义Theano变量
x = T.dmatrix("x")
y = T.dvector("y")
# 定义权重变量w为共享变量,因此在迭代中可以保持该数值并更新
w = theano.shared(rng.randn(feats), name="w")
# 定义偏置b,同样为共享变量
b = theano.shared(0., name="b")
print("Initial model:")
print(w.get_value())
print(b.get_value())
# 定义激活函数和损失函数
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))
prediction = p_1 > 0.5
xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1)
cost = xent.mean() + 0.01 * (w ** 2).sum()
gw, gb = T.grad(cost, [w, b]) # 计算梯度
# 编译训练过程
train = theano.function(
inputs=[x,y],
outputs=[prediction, xent],
updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb))) #按梯度下降更新参数
predict = theano.function(inputs=[x], outputs=prediction)
# 训练
for i in range(training_steps):
pred, err = train(D[0], D[1])
print("Final model:")
print(w.get_value())
print(b.get_value())
print("target values for D:")
print(D[1]) #原始标签数据
print("prediction on D:")
print(predict(D[0])) #根据样本预测的输出
m=0
for i in range(N) :
if D[1][i]==predict(D[0])[i]:
m += 1
acc = m/400.
print 'The accuracy is ', acc