tensorflow 基本使用
简介
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore" )
gpu_options = tf.GPUOptions(allow_growth=True )
基本使用
首先需要定义一个session,然后定义变量和计算图(即计算公式),最后对变量进行初始化,同时调用run即可 对于Variable,TF需要进行初始化,否则会报错,而对于Constant,TF则不需要进行初始化 如果不想对每一个变量都单独进行初始化,可以使用global_variables_initializer,对所有变量进行初始化
with tf.Session( config=tf.ConfigProto(gpu_options=gpu_options) ) as sess:
x = tf.Variable( 3 , name="x" )
y = tf.Variable( 3 , name="y" )
f = x**2 * y + y + 2
sess.run( x.initializer )
sess.run( y.initializer )
print( sess.run( f ) )
x = tf.Variable( 3 , name="x" )
y = tf.Variable( 3 , name="y" )
init = tf.global_variables_initializer()
with tf.Session():
init.run()
print( f.eval() )
32
32
graph相关
在tf中创建的任意一个节点,都被被初始化增加到一个graph中 如果需要管理多个独立的graph,则需要分开定义graph,这样就可以保证不同的变量或者节点对应着不同的图 在计算节点时,tf会找出计算出该节点值所需要的其他节点,然后再进行后续的计算,因此,如果单独计算的话,有些被重复使用的节点会被计算多次 如果要想使graph的计算更加高效,需要将其需要计算的节点放在一个graph中,然后进行计算
x1 = tf.Variable(1 )
print( x1.graph is tf.get_default_graph() )
graph = tf.Graph()
with graph.as_default():
x2 = tf.Variable(2 )
print( x2.graph is tf.get_default_graph() )
w = tf.constant(3 )
x = w + 2
y = x + 5
z = x * 3
with tf.Session() as sess:
print( y.eval(), z.eval() )
with tf.Session() as sess:
y_val, z_val = sess.run( [y, z] )
print( y_val, z_val )
True
False
10 15
10 15
基于tf的线性回归
将原始数据加入偏置项,然后利用公式
θ = ( X T ⋅ X ) − 1 ⋅ X T ⋅ y
进行线性回归的参数拟合
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing(data_home="./dataset" )
m, n = housing.data.shape
housing_data_plus_bias = np.c_[ np.ones((m,1 )), housing.data ]
X = tf.constant( housing_data_plus_bias, dtype=tf.float32, name="X" )
y = tf.constant( housing.target.reshape( -1 ,1 ), dtype=tf.float32, name="y" )
XT = tf.transpose( X )
theta = tf.matmul( tf.matmul( tf.matrix_inverse(tf.matmul(XT, X)), XT ), y )
with tf.Session() as sess:
print( theta.eval().T )
[[-3.7465141e+01 4.3573415e-01 9.3382923e-03 -1.0662201e-01
6.4410698e-01 -4.2513184e-06 -3.7732250e-03 -4.2664889e-01
-4.4051403e-01]]
GD算法( Gradient Descent )
可以手动计算梯度,也可以让TF自动计算梯度,autodiff的速度要快于手动计算的速度
import time
from sklearn.preprocessing import StandardScaler
def reset_graph (seed=42 ) :
tf.reset_default_graph()
tf.set_random_seed(seed)
np.random.seed(seed)
return
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((scaled_housing_data.shape[0 ], 1 )), scaled_housing_data]
n_epochs = 1000
learning_rate = 0.01
X = tf.constant( scaled_housing_data_plus_bias, dtype=tf.float32, name="X" )
y = tf.constant( housing.target.reshape(-1 ,1 ), dtype=tf.float32, name="y" )
theta = tf.Variable( tf.random_uniform([n+1 , 1 ], -1.0 , 1.0 , seed=42 ), name="theta" )
y_pred = tf.matmul( X, theta, name="predictions" )
error = y_pred - y
mse = tf.reduce_mean( tf.square(error), name="mse" )
gradients = 2 /m * tf.matmul( tf.transpose(X), error )
train_op = tf.assign( theta, theta - learning_rate * gradients )
init = tf.global_variables_initializer()
start = time.clock()
with tf.Session( config=tf.ConfigProto(gpu_options=gpu_options) ) as sess:
sess.run( init )
for epoch in range(n_epochs):
if epoch % 100 == 0 :
print( "epoch:" , epoch, "MSE = " , mse.eval() )
sess.run( train_op )
best_theta = theta.eval()
print( best_theta.T )
finish = time.clock()
print( "manual GD time : " , finish-start )
epoch: 0 MSE = 2.7544262
epoch: 100 MSE = 0.63222194
epoch: 200 MSE = 0.5727803
epoch: 300 MSE = 0.5585008
epoch: 400 MSE = 0.54907
epoch: 500 MSE = 0.54228795
epoch: 600 MSE = 0.5373791
epoch: 700 MSE = 0.53382194
epoch: 800 MSE = 0.53124255
epoch: 900 MSE = 0.5293705
[[ 2.06855226e+00 7.74078071e-01 1.31192401e-01 -1.17845066e-01
1.64778143e-01 7.44081335e-04 -3.91945131e-02 -8.61356676e-01
-8.23479772e-01]]
manual GD time : 1.7389979999999987
import time
from sklearn.preprocessing import StandardScaler
def reset_graph (seed=42 ) :
tf.reset_default_graph()
tf.set_random_seed(seed)
np.random.seed(seed)
return
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((scaled_housing_data.shape[0 ], 1 )), scaled_housing_data]
n_epochs = 1000
learning_rate = 0.01
X = tf.constant( scaled_housing_data_plus_bias, dtype=tf.float32, name="X" )
y = tf.constant( housing.target.reshape(-1 ,1 ), dtype=tf.float32, name="y" )
theta = tf.Variable( tf.random_uniform([n+1 , 1 ], -1.0 , 1.0 , seed=42 ), name="theta" )
y_pred = tf.matmul( X, theta, name="predictions" )
error = y_pred - y
mse = tf.reduce_mean( tf.square(error), name="mse" )
gradients = tf.gradients( mse, [theta] )[0 ]
train_op = tf.assign( theta, theta - learning_rate * gradients )
init = tf.global_variables_initializer()
start = time.clock()
with tf.Session() as sess:
sess.run( init )
for epoch in range(n_epochs):
if epoch % 100 == 0 :
print( "epoch:" , epoch, "MSE = " , mse.eval() )
sess.run( train_op )
best_theta = theta.eval()
print( best_theta.T )
finish = time.clock()
print( "auto diff GD time : " , finish-start )
epoch: 0 MSE = 2.7544262
epoch: 100 MSE = 0.6322219
epoch: 200 MSE = 0.5727803
epoch: 300 MSE = 0.5585008
epoch: 400 MSE = 0.54907
epoch: 500 MSE = 0.54228795
epoch: 600 MSE = 0.5373791
epoch: 700 MSE = 0.53382194
epoch: 800 MSE = 0.5312425
epoch: 900 MSE = 0.5293705
[[ 2.06855226e+00 7.74078071e-01 1.31192386e-01 -1.17845066e-01
1.64778143e-01 7.44077959e-04 -3.91945131e-02 -8.61356676e-01
-8.23479772e-01]]
auto diff GD time : 1.0426760000000002
计算梯度时,在有些情况下,如DNN,计算十分繁琐,而且容易出错,也不是非常有效。 以函数
f ( x ) = e x p ( e x p ( e x p ( x ) ) )
为例,计算微分时
f ′ ( x ) = e x p ( x ) ⋅ e x p ( e x p ( x ) ) ⋅ e x p ( e x p ( e x p ( x ) ) )
,但是更加高效的方法是先计算出
e x p ( x )
,再利用前面的结果依次计算出后面的两项。使用autodiff会使得
e x p ( x )
被计算很多次 TF对偏微分的过程进行了优化,使用Tf的gradient函数可以很方便的进行微分与偏微分 使用GD算法时,可以自己编写程序,TF中也包含了许多常用的优化器(optimizer),其中就包括GD,使用集成库的方法要快于自己编写的程序
reset_graph()
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X" )
y = tf.constant(housing.target.reshape(-1 , 1 ), dtype=tf.float32, name="y" )
theta = tf.Variable(tf.random_uniform([n + 1 , 1 ], -1.0 , 1.0 , seed=42 ), name="theta" )
y_pred = tf.matmul(X, theta, name="predictions" )
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse" )
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
start = time.clock()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
if epoch % 100 == 0 :
print("Epoch" , epoch, "MSE =" , mse.eval())
sess.run(training_op)
best_theta = theta.eval()
print( best_theta.T)
finish = time.clock()
print( "time : " , finish-start )
Epoch 0 MSE = 9.161542
Epoch 100 MSE = 0.7145004
Epoch 200 MSE = 0.56670487
Epoch 300 MSE = 0.5555718
Epoch 400 MSE = 0.54881126
Epoch 500 MSE = 0.5436363
Epoch 600 MSE = 0.53962916
Epoch 700 MSE = 0.5365092
Epoch 800 MSE = 0.53406775
Epoch 900 MSE = 0.5321473
[[ 2.0685523 0.8874027 0.14401656 -0.3477088 0.36178365 0.00393811
-0.04269556 -0.66145283 -0.6375278 ]]
time : 0.6122350000000001
a = tf.Variable(1 , dtype=tf.float32)
b = tf.Variable(2 , dtype=tf.float32)
z = tf.pow( a, 2 ) + 3 * b
init = tf.global_variables_initializer()
c = tf.gradients( z, [a,b] )
with tf.Session() as sess:
init.run()
print( sess.run( c ) )
print( len(c), c )
[2.0, 3.0]
2 [<tf.Tensor 'gradients_1/Pow_grad/Reshape:0' shape=() dtype=float32>, <tf.Tensor 'gradients_1/mul_grad/Reshape_1:0' shape=() dtype=float32>]
将数据输入到训练算法中
之前的GD算法是一次性使用所有的数据,如果要采用Mini-batch GD方法,则我们在每次迭代时,只能对部分数据进行训练,相当于在每个迭代过程中,都需要用新的数据去代替
X
和y tf中的placeholder可以实现这种功能,在指定placeholder的维度的时候,相对比较自由
A = tf.placeholder( tf.float32, shape=(None ,3 ) )
B = A + 5
with tf.Session() as sess:
print( B.eval( feed_dict={A:[[1 ,2 ,3 ]]} ) )
print( B.eval( feed_dict={A:[[4 ,5 ,6 ], [7 ,8 ,9 ]]} ) )
[[6. 7. 8.]]
[[ 9. 10. 11.]
[12. 13. 14.]]
reset_graph()
learning_rate = 0.01
X = tf.placeholder(tf.float32, shape=(None , n + 1 ), name="X" )
y = tf.placeholder(tf.float32, shape=(None , 1 ), name="y" )
theta = tf.Variable(tf.random_uniform([n + 1 , 1 ], -1.0 , 1.0 , seed=42 ), name="theta" )
y_pred = tf.matmul(X, theta, name="predictions" )
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse" )
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
def fetch_batch (epoch, batch_index, batch_size) :
np.random.seed(epoch * n_batches + batch_index)
indices = np.random.randint(m, size=batch_size)
X_batch = scaled_housing_data_plus_bias[indices]
y_batch = housing.target.reshape(-1 , 1 )[indices]
return X_batch, y_batch
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
best_theta = theta.eval()
print( best_theta.T )
[[ 2.0703337 0.8637145 0.12255152 -0.31211877 0.38510376 0.00434168
-0.0123295 -0.83376896 -0.8030471 ]]
保存训练好的模型
reset_graph()
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X" )
y = tf.constant(housing.target.reshape(-1 , 1 ), dtype=tf.float32, name="y" )
theta = tf.Variable(tf.random_uniform([n + 1 , 1 ], -1.0 , 1.0 , seed=42 ), name="theta" )
y_pred = tf.matmul(X, theta, name="predictions" )
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse" )
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
if epoch % 100 == 0 :
print("Epoch" , epoch, "MSE =" , mse.eval())
save_path = saver.save(sess, "models/my_model.ckpt" )
sess.run(training_op)
best_theta = theta.eval()
save_path = saver.save(sess, "models/my_model_final.ckpt" )
Epoch 0 MSE = 9.161542
Epoch 100 MSE = 0.7145004
Epoch 200 MSE = 0.56670487
Epoch 300 MSE = 0.5555718
Epoch 400 MSE = 0.54881126
Epoch 500 MSE = 0.5436363
Epoch 600 MSE = 0.53962916
Epoch 700 MSE = 0.5365092
Epoch 800 MSE = 0.53406775
Epoch 900 MSE = 0.5321473
graph可视化和tensorboard基本使用
tensorflow可以对计算过程中生成的日志进行可视化,同时可以观察graph的结构。如果需要在远程访问服务器上的tensorboard,我们需要利用ssh建立一个本地端口到远程端口的隧道,可以实现在本地对远程端口的访问 putty建立隧道的方法如下:http://blog.csdn.net/lifan5/article/details/33709271 为了防止日志文件发生混淆,可以利用时间戳作为文件名,对其进行区分 下面的生成了日志文件之后,在服务器命令行窗口运行tensorboard –logdir=”./”,之后在本地浏览器窗口打开127.0.0.1:16006,即可看见tensorboard了 在之后的DNN等模型中,可能会有很多交错的节点,这使得可视化graph时非常复杂,因此引入name_scope的概念,属于不同name_scope的节点在可视化时不会交错在一起,在同一个name_scope中的节点会被整合为一个节点
reset_graph()
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S" )
root_logdir = "tf_logs/mse"
logdir = "{}/run-{}/" .format(root_logdir, now)
learning_rate = 0.01
X = tf.placeholder(tf.float32, shape=(None , n + 1 ), name="X" )
y = tf.placeholder(tf.float32, shape=(None , 1 ), name="y" )
theta = tf.Variable(tf.random_uniform([n + 1 , 1 ], -1.0 , 1.0 , seed=42 ), name="theta" )
y_pred = tf.matmul(X, theta, name="predictions" )
with tf.name_scope("loss_test" ) as scope:
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse" )
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
mse_summary = tf.summary.scalar('MSE' , mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
with tf.Session( ) as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
if batch_index % 10 == 0 :
summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
step = epoch * n_batches + batch_index
file_writer.add_summary(summary_str, step)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
best_theta = theta.eval()
file_writer.close()
print( mse_summary.op.name )
with tf.name_scope("loss" ) as scope:
test_loss = tf.Variable( 1 , dtype=tf.float32 )
print( test_loss.op.name )
MSE
loss/Variable
TF的一些模块使用
有一些函数,比如说RELU,如果自己实现比较麻烦,可以直接利用TF封装好的函数或者自己定义函数进行计算 在name_scope下使用函数,会大大简化graph的可视化 get_variable函数的功能为:如果当前没有该共享变量,则会创建一个变量,如果有,则会reuse。 如果使用variable,则每次使用时都会新建一个变量,而get_variable则会首先搜索并使用已经存在的变量;如果在同一个scope中如果存在两个相同的变量,variable会引入重命名机制,将两个变量区别开,而get_variable则会直接报错 name_scope是为了在可视化graph时,节点之间的运算或者操作更加清晰;而variable_scope则是为了管理与使用共享变量,相对来说更值得注意。相当于name_scope用于op,variable_scope用于变量 为了更加方便的使用共享变量,我们也可以在scope中直接使用get_variable 参考文献:https://www.zhihu.com/question/54513728
reset_graph()
def relu (X) :
w_shape = (int(X.shape[1 ]), 1 )
w = tf.Variable( tf.random_normal(w_shape), name="weights" )
b = tf.Variable( 0.0 , name="bias" )
z = tf.add( tf.matmul(X,w), b, name="z" )
return tf.maximum( z, 0 , name="relu" )
n_features = 3
X = tf.placeholder( tf.float32, shape=(None , n_features), name="X" )
relus = [relu(X) for i in range(5 )]
output = tf.add_n( relus, name="output" )
print( output )
file_writer = tf.summary.FileWriter("tf_logs/relu1" , tf.get_default_graph())
Tensor("output:0", shape=(?, 1), dtype=float32)
reset_graph()
def relu (X) :
with tf.name_scope("relu" ):
w_shape = (int(X.shape[1 ]), 1 )
w = tf.Variable( tf.random_normal(w_shape), name="weights" )
b = tf.Variable( 0.0 , name="bias" )
z = tf.add( tf.matmul(X,w), b, name="z" )
return tf.maximum( z, 0 , name="relu" )
n_features = 3
X = tf.placeholder( tf.float32, shape=(None , n_features), name="X" )
relus = [relu(X) for i in range(5 )]
output = tf.add_n( relus, name="output" )
print( output )
file_writer = tf.summary.FileWriter("tf_logs/relu2" , tf.get_default_graph())
Tensor("output:0", shape=(?, 1), dtype=float32)
reset_graph()
def relu (X, thres) :
with tf.name_scope("relu" ):
w_shape = (int(X.shape[1 ]), 1 )
w = tf.Variable( tf.random_normal(w_shape), name="weights" )
b = tf.Variable( 0.0 , name="bias" )
z = tf.add( tf.matmul(X,w), b, name="z" )
return tf.maximum( z, thres, name="relu" )
threshold = tf.Variable(0.0 , name="threshold" )
X = tf.placeholder(tf.float32, shape=(None , n_features), name="X" )
relus = [relu(X, threshold) for i in range(5 )]
output = tf.add_n(relus, name="output" )
file_writer = tf.summary.FileWriter("tf_logs/relu3" , tf.get_default_graph())
reset_graph()
def relu (X) :
with tf.variable_scope("relu" , reuse=True ):
threshold = tf.get_variable("threshold" )
w_shape = int(X.get_shape()[1 ]), 1
w = tf.Variable(tf.random_normal(w_shape), name="weights" )
b = tf.Variable(0.0 , name="bias" )
z = tf.add(tf.matmul(X, w), b, name="z" )
return tf.maximum(z, threshold, name="max" )
X = tf.placeholder(tf.float32, shape=(None , n_features), name="X" )
with tf.variable_scope("relu" ):
threshold = tf.get_variable("threshold" , shape=(),
initializer=tf.constant_initializer(0.0 ))
relus = [relu(X) for relu_index in range(5 )]
output = tf.add_n(relus, name="output" )
file_writer = tf.summary.FileWriter("tf_logs/relu4" , tf.get_default_graph())
reset_graph()
def relu (X) :
threshold = tf.get_variable("threshold" , shape=(),
initializer=tf.constant_initializer(0.0 ))
w_shape = (int(X.get_shape()[1 ]), 1 )
w = tf.Variable(tf.random_normal(w_shape), name="weights" )
b = tf.Variable(0.0 , name="bias" )
z = tf.add(tf.matmul(X, w), b, name="z" )
return tf.maximum(z, threshold, name="max" )
X = tf.placeholder(tf.float32, shape=(None , n_features), name="X" )
relus = []
for relu_index in range(5 ):
with tf.variable_scope("relu" , reuse=(relu_index >= 1 )) as scope:
relus.append(relu(X))
output = tf.add_n(relus, name="output" )
file_writer = tf.summary.FileWriter("tf_logs/relu5" , tf.get_default_graph())
file_writer.close()
TF中Session和Graph的关系
with tf.Graph().as_default():
a = tf.constant([5 ], name='a' )
b = tf.constant([5 ], name='a' )
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
print(sess.run(b))
[5]
s1 = tf.Session()
s1.run( init )
print( s1.run(b) )
s2 = tf.Session()
s2.run( init )
print( s2.run(b) )
s1.close()
s2.close()
[5]
[5]