基本使用
- 使用graph来表示计算任务
- 在被称之为Session的上下文中执行graph
- 使用tensor表示数据
- 通过Variable维护状态
- 使用feed和fetch可以为任意的操作(op)赋值或者取数据
综述
TensorFlow 是一个编程系统, 使用图来表示计算任务. 图中的节点被称之为 op (operation 的缩写). 一个 op 获得 0 个或多个 Tensor
, 执行计算, 产生 0 个或多个 Tensor
. 每个 Tensor 是一个类型化的多维数组. 例如, 你可以将一小组图像集表示为一个四维浮点数数组, 这四个维度分别是 [batch, height, width, channels]
.
一个 TensorFlow 图描述了计算的过程. 为了进行计算, 图必须在 会话
里被启动. 会话
将图的 op 分发到诸如 CPU 或 GPU 之类的 设备
上, 同时提供执行 op 的方法. 这些方法执行后, 将产生的 tensor 返回. 在 Python 语言中, 返回的 tensor 是 numpy ndarray
对象; 在 C 和 C++ 语言中, 返回的 tensor 是tensorflow::Tensor
实例.
MNIST
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data print(tf.__version__) # 1. create data mnist = input_data.read_data_sets('../MNIST_data', one_hot=True) with tf.variable_scope('Input'): tf_x = tf.placeholder(tf.float32, [None, 28 * 28], name='x') image = tf.reshape(tf_x, [-1, 28, 28, 1], name='image') tf_y = tf.placeholder(tf.float32, [None, 10], name='y') is_training = tf.placeholder(tf.bool, None) # 2. define Network with tf.variable_scope('Net'): """ "SAME" 类型的padding: out_height = ceil(in_height / strides[1]); ceil向上取整 out_width = ceil(in_width / strides[2]) "VALID"类型的padding: out_height = ceil((in_height - filter_height + 1) / striders[1]) out_width = ceil((in_width - filter_width + 1) / striders[2] """ conv1 = tf.layers.conv2d(inputs=image, filters=32, kernel_size=5, strides=1, padding='same', activation=tf.nn.relu) # 32x28x28 pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=2, strides=2) # 32x14x14 conv2 = tf.layers.conv2d(pool1, 64, 3, 1, 'same', activation=tf.nn.relu) # 64x14x14 pool2 = tf.layers.max_pooling2d(conv2, 2, 2) # 64x7x7 pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) fc1 = tf.layers.dense(pool2_flat, 1024, tf.nn.relu) fc1 = tf.layers.dropout(fc1, rate=0.5, training=is_training) predict = tf.layers.dense(fc1, 10, tf.nn.softmax) # 3. define loss with tf.name_scope('loss'): loss = tf.losses.softmax_cross_entropy(onehot_labels=tf_y, logits=predict) tf.summary.scalar('loss', loss) with tf.name_scope('accuracy'): # tf.metrics.accuracy() 返回 [上次的平均accuracy, 这次的平均accuracy] accuracy = tf.metrics.accuracy(labels=tf.argmax(tf_y, axis=1), predictions=tf.argmax(predict, axis=1))[1] tf.summary.scalar('accuracy', accuracy) # 4. define optimizer with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss) # 5. initialize init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # 6. train saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) # ================= merge_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter('logs/train', sess.graph) test_writer = tf.summary.FileWriter('logs/test', sess.graph) # tensorboard --logdir=logs # ================= for step in range(10001): batch_x, batch_y = mnist.train.next_batch(100) _, ls, train_output = sess.run([optimizer, loss, merge_op], feed_dict={tf_x: batch_x, tf_y: batch_y, is_training: True}) if step % 100 == 0: acc_test, test_output = sess.run([accuracy, merge_op], feed_dict={tf_x: mnist.test.images, tf_y: mnist.test.labels, is_training: False}) print('Step:', step, '| train loss: %.4f' % ls, '| test accuracy: %.2f' % acc_test) sess.run(tf.local_variables_initializer()) # 不加上这句的话 accuracy 就是个累积平均值了 train_writer.add_summary(train_output, step) test_writer.add_summary(test_output, step) save_path = saver.save(sess, './cnn_mnist.ckpt') with tf.Session() as sess: sess.run(init_op) saver.restore(sess, './cnn_mnist.ckpt') acc_test = sess.run(accuracy, feed_dict={tf_x: mnist.test.images, tf_y: mnist.test.labels, is_training: False}) print(acc_test)
常量 constant
tf.constant( value, dtype=None, shape=None, name='Const', verify_shape=False )
示例:
import tensorflow as tf import numpy as np def my_func(arg): arg = tf.convert_to_tensor(arg, dtype=tf.float32) return tf.matmul(arg, arg) + arg # The following calls are equivalent. value_1 = my_func(tf.constant([[1.0, 2.0], [3.0, 4.0]])) value_2 = my_func([[1.0, 2.0], [3.0, 4.0]]) value_3 = my_func(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)) with tf.Session() as sess: print(value_1.eval()) print(value_2.eval()) print(value_3.eval()) ''' [[ 8. 12.] [18. 26.]] [[ 8. 12.] [18. 26.]] [[ 8. 12.] [18. 26.]] '''
变量 Variable
A variable starts its lift when its initializer is run, and it ends when the session is closed.
__init__( initial_value=None, trainable=None, collections=None, validate_shape=True, caching_device=None, name=None, variable_def=None, dtype=None, expected_shape=None, import_scope=None, constraint=None, use_resource=None, synchronization=tf.VariableSynchronization.AUTO, aggregation=tf.VariableAggregation.NONE, shape=None )
以线性回归为例:
import numpy as np import tensorflow as tf import matplotlib.pyplot as plt print(tf.__version__) # 1. creat data np.random.seed(0) n = 100 x_data = np.random.rand(n).astype(np.float32) y_data = 0.1 * x_data + 0.3 + np.random.randint(-20, 20, size=(n)) / 10000.0 # 2. creat tensorflow structure weights = tf.Variable(tf.random_uniform([1], -1.0, 1.0)) # random initialize biases = tf.Variable(tf.zeros([1])) # 3. define loss function y = weights * x_data + biases loss = tf.reduce_mean(tf.square(y - y_data)) # 4. define optimizer optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss) # 5. initialize variable init = tf.global_variables_initializer() # ============================== config GPU config = tf.ConfigProto() config.gpu_options.allow_growth=True # ============================== sess = tf.Session(config=config) sess.run(init) # 6. train with sess.as_default(): for step in range(201): train_loss, wcoeff, bias, _ = sess.run([loss, weights, biases, optimizer]) if step % 20 == 0: print(step, train_loss, wcoeff, bias) # ================================================================ # # Evaluate and plot # # ================================================================ # Input_values, Labels = x_data, y_data Prediction_values = x_data * wcoeff + bias plt.plot(Input_values, Labels, 'ro', label='main') plt.plot(Input_values, Prediction_values, label='Predicted') # show the result. plt.legend() plt.show() plt.close()
占位符 placeholder
tf.placeholder( dtype, shape=None, name=None )
示例代码
import tensorflow as tf import numpy as np x = tf.placeholder(tf.float32, shape=(1024, 1024)) y = tf.matmul(x, x) with tf.Session() as sess: rand_array = np.random.rand(1024, 1024) print(sess.run(y, feed_dict={x: rand_array}))
维度增加 expand_dims
tf.expand_dims( input, axis=None, name=None, dim=None )
示例代码
''' # 't' is a tensor of shape [2] tf.shape(tf.expand_dims(t, 0)) # [1, 2] tf.shape(tf.expand_dims(t, 1)) # [2, 1] tf.shape(tf.expand_dims(t, -1)) # [2, 1] # 't2' is a tensor of shape [2, 3, 5] tf.shape(tf.expand_dims(t2, 0)) # [1, 2, 3, 5] tf.shape(tf.expand_dims(t2, 2)) # [2, 3, 1, 5] tf.shape(tf.expand_dims(t2, 3)) # [2, 3, 5, 1] '''
reshape
tf.reshape( tensor, shape, name=None )
示例代码:
# tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9] # tensor 't' has shape [9] reshape(t, [3, 3]) ==> [[1, 2, 3], [4, 5, 6], [7, 8, 9]] # tensor 't' is [[[1, 1], [2, 2]], # [[3, 3], [4, 4]]] # tensor 't' has shape [2, 2, 2] reshape(t, [2, 4]) ==> [[1, 1, 2, 2], [3, 3, 4, 4]] # tensor 't' is [[[1, 1, 1], # [2, 2, 2]], # [[3, 3, 3], # [4, 4, 4]], # [[5, 5, 5], # [6, 6, 6]]] # tensor 't' has shape [3, 2, 3] # pass '[-1]' to flatten 't' reshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6] # -1 can also be used to infer the shape # -1 is inferred to be 9: reshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3], [4, 4, 4, 5, 5, 5, 6, 6, 6]] # -1 is inferred to be 2: reshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3], [4, 4, 4, 5, 5, 5, 6, 6, 6]] # -1 is inferred to be 3: reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]] # tensor 't' is [7] # shape `[]` reshapes to a scalar reshape(t, []) ==> 7
共享变量
with tf.variable_scope("my_scope"): x0 = tf.get_variable("x", shape=(), initializer=tf.constant_initializer(0.)) x1 = tf.Variable(0., name="x") x2 = tf.Variable(0., name="x") with tf.variable_scope("my_scope", reuse=True): x3 = tf.get_variable("x") x4 = tf.Variable(0., name="x") with tf.variable_scope("", default_name="", reuse=True): x5 = tf.get_variable("my_scope/x") print("x0:", x0.op.name) # x0: my_scope/x print("x1:", x1.op.name) # x1: my_scope/x_1 print("x2:", x2.op.name) # x2: my_scope/x_2 print("x3:", x3.op.name) # x3: my_scope/x print("x4:", x4.op.name) # x4: my_scope_1/x print("x5:", x5.op.name) # x5: my_scope/x print(x0 is x3 and x3 is x5) # True """ 第一个 variable_scope() 首先创建了一个共享变量 x0,即 my_scope/x. 对于除共享变量以外的所有操作来说, variable scope 实际上只相当于 name scope,因此随后创建的两个变量 x1, x2 名为 my_scope/x_1, my_scope/x_2. 第二个 variable_scope() 首先重用了 my_scope 范围里的共享变量,这里 x3 就是 x0. 同样的,对于其他非共享 变量来说 variable scope 只是个 name scope,而又因为和第一个variable_scope() 分开与不同的 block, 因此 x4 被命名为 my_scope_1/x. 第三个 variable_scope() 展示了获取共享变量 my_scope/x 的另一种方法,即在根作用域上创建一个variable_scope() """
类型转换
#tensor`a` is [1.8,2.2],dtype = tf.float tf.cast(a, tf.int32) == > [ 1 , 2 ] #dtype = tf.int32