合并与分割
- tf.concat
- tf.split
- tf.stack
- tf.unstack
tf.concat
- Along distinct dim/axis
#tf.concat
a = tf.ones([4, 35, 8])
b = tf.ones([2, 35, 8])
c = tf.concat([a, b], axis=0)
a = tf.ones([4, 32, 8])
b = tf.ones([4, 3, 8])
d = tf.concat([a, b], axis=1)
tf.split
#tf.split
a = tf.ones([4, 35, 8])
b = tf.ones([4, 35, 8])
c = tf.stack([a, b])
res = tf.split(c, axis=3, num_or_size_splits=2)
res = tf.split(c, axis=3, num_or_size_splits=[2,2,4])
tf.stack
- Create new dim
#tf.stack
a = tf.ones([4, 35, 8])
b = tf.ones([4, 35, 8])
c = tf.concat([a, b], axis=-1)
d = tf.stack([a, b], axis=0)
e = tf.stack([a, b], axis=3)
tf.unstack
#tf.unstack
a = tf.ones([4, 35, 8])
b = tf.ones([4, 35, 8])
c = tf.stack([a, b])
aa, bb = tf.unstack(c, axis=0)
res = tf.unstack(c, axis=3)
数据统计
- tf.norm:范数
- tf.reduce_min/max/mean
- tf.argmax/argmin
- tf.equal
- tf.unique
Vector Norm
Eukl. Norm ∣ ∣ x ∣ ∣ 2 = [ ∑ k x k 2 ] 1 / 2 ||x||_2=[\sum_kx_k^2]^{1/2} ∣∣x∣∣2=[k∑xk2]1/2
Max. Norm ∣ ∣ x ∣ ∣ ∞ = m a x k ∣ x k ∣ ||x||_{ ∞}=max_k|x_k| ∣∣x∣∣∞=maxk∣xk∣
L 1 L_1 L1-Norm ∣ ∣ x ∣ ∣ 1 = ∑ k ∣ x k ∣ ||x||_1=\sum_k|x_k| ∣∣x∣∣1=k∑∣xk∣
tf.norm
a = tf.ones([2, 2])
tf.norm(a)
tf.sqrt(tf.reduce_sum(tf.square(a)))
b = tf.ones([4, 28, 28, 3])
tf.norm(b)
tf.sqrt(tf.reduce_sum(tf.square(b)))
L1 Norm
a = tf.ones([2, 2])
tf.norm(a)
tf.norm(a, ord=2, axis=1)
tf.norm(a, ord=1)
tf.norm(a, ord=1, axis=0)
tf.norm(a, ord=1, axis=1)
reduce_min/max/mean
a = tf.random.normal([4, 10])
tf.reduce_mean(a), tf.reduce_min(a), tf.reduce_max(a)
tf.reduce_mean(a, axis=1), tf.reduce_min(a, axis=1), tf.reduce_max(a, axis=1)
argmax/argmin
a = tf.random.normal([4, 10])
a.shape
tf.argmax(a).shape
tf.argmax(a)
tf.argmin(a)
tf.argmin(a).shape
tf.equal
a = tf.constant([1, 2, 3, 2, 5])
b = tf.range(5)
tf.equal(a, b)
res = tf.equal(a, b)
tf.reduce_sum(tf.cast(res, dtype=tf.int32))
tf.unique
a = tf.range(5)
tf.unique(a)
a = tf.constant([4, 2, 2, 4, 3])
tf.unique(a)
张量排序
- Sort / argsort
- Topk
- Top-5 Acc.
Sort / argsort
a = tf.random.shuffle(tf.range(5))
tf.sort(a, direction='DESCENDING')
tf.argsort(a, direction='DESCENDING')
idx = tf.argsort(a, direction='DESCENDING')
tf.gather(a, idx)
a = tf.random.uniform([3, 3], maxval=10, dtype=tf.int32)
tf.sort(a)
tf.sort(a, direction='DESCENDING')
idx = tf.argsort(a)
Top_k
Only return top-k values and indices
a = tf.random.uniform([3, 3], maxval=10, dtype=tf.int32)
res = tf.math.top_k(a, 2)
res.indices
res.values
Top-k accuracy
- Prob:[0.1, 0.2, 0.3, 0.4]
- Label:[2]
- *Only consider top-1 prediction: [3] *
- Only consider top-2 prediction: [3, 2]
- Only consider top-3 prediction: [3, 2, 1]
prob = tf.constant([[0.1, 0.2, 0.7], [0.2, 0.7, 0.1]])
target = tf.constant([2, 0])
k_b = tf.math.top_k(prob, 3).indices
k_b = tf.transpose(k_b, [1, 0])
target = tf.broadcast_to(target, [3, 2])
def accuracy(output, target, topk=(1,)):
maxk = max(topk)
batch_size = target.shape[0]
pred = tf.math.top_k(output, maxk).indices
pred = tf.transpose(pred, perm=[1, 0])
target_ = tf.broadcast_to(target, pred.shape)
correct = tf.equal(pred, target_)
res = []
for k in topk:
correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
correct_k = tf.reduce_sum(correct_k)
acc = float(correct_k / batch_size)
res.append(acc)
return res
填充与复制
- pad
- tile
- broadcast_to
pad
a = tf.reshape(tf.range(9), [3, 3])
tf.pad(a, [[0, 0], [0, 0]])
tf.pad(a, [[1, 0], [0, 0]])
tf.pad(a, [[1, 1], [0, 0]])
tf.pad(a, [[1, 1], [1, 0]])
tf.pad(a, [[1, 1], [1, 1]])
Image padding
a = tf.random.normal([4, 28, 28, 3])
b = tf.pad(a, [[0, 0], [2, 2], [2, 2], [0, 0]])
b.shape
tile
- repeat data along dim n times
- [a, b, c] , 2
- → [a, b, c, a, b, c]
- broadcast_to
Inner dim first
a = tf.reshape(tf.range(9), [3, 3])
tf.tile(a, [1, 2])
tf.tile(a, [2, 1])
tf.tile(a, [2, 2])
tile VS broadcast_to
a = tf.reshape(tf.range(9), [3, 3])
tf.tile(a, [1, 2])
tf.tile(a, [2, 1])
tf.tile(a, [2, 2])
aa = tf.expand_dims(a, axis=0)
tf.tile(aa, [2, 1, 1])
tf.broadcast_to(aa, [2, 3, 3])
张量限幅
- clip_by_value
- relu
- clip_by_norm
- gradient clipping
clip_by_value
#clip_by_value
a = tf.range(10)
tf.maximum(a, 2)
tf.minimum(a, 8)
tf.clip_by_value(a, 2, 8)
relu
a =a - 5
tf.nn.relu(a)
tf.maximum(a, 0)
clip_by_norm
a = tf.random.normal([2, 2], mean=10)
tf.norm(a)
aa = tf.clip_by_norm(a, 15)
tf.norm(aa)
gradient clipping
- Gradient Exploding or vanishing
- set lr=1
- new_grads, total_norm = tf.clip_by_global_norm(grads, 25)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, optimizers
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
print(tf.__version__)
(x, y), _ = datasets.mnist.load_data()
x = tf.convert_to_tensor(x, dtype=tf.float32) / 50.
y = tf.convert_to_tensor(y)
y = tf.one_hot(y, depth=10)
print('x:', x.shape, 'y:', y.shape)
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128).repeat(30)
x,y = next(iter(train_db))
print('sample:', x.shape, y.shape)
# print(x[0], y[0])
def main():
# 784 => 512
w1, b1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1)), tf.Variable(tf.zeros([512]))
# 512 => 256
w2, b2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1)), tf.Variable(tf.zeros([256]))
# 256 => 10
w3, b3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1)), tf.Variable(tf.zeros([10]))
optimizer = optimizers.SGD(lr=0.01)
for step, (x,y) in enumerate(train_db):
# [b, 28, 28] => [b, 784]
x = tf.reshape(x, (-1, 784))
with tf.GradientTape() as tape:
# layer1.
h1 = x @ w1 + b1
h1 = tf.nn.relu(h1)
# layer2
h2 = h1 @ w2 + b2
h2 = tf.nn.relu(h2)
# output
out = h2 @ w3 + b3
# out = tf.nn.relu(out)
# compute loss
# [b, 10] - [b, 10]
loss = tf.square(y-out)
# [b, 10] => [b]
loss = tf.reduce_mean(loss, axis=1)
# [b] => scalar
loss = tf.reduce_mean(loss)
# compute gradient
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
# print('==before==')
# for g in grads:
# print(tf.norm(g))
grads, _ = tf.clip_by_global_norm(grads, 15)
# print('==after==')
# for g in grads:
# print(tf.norm(g))
# update w' = w - lr*grad
optimizer.apply_gradients(zip(grads, [w1, b1, w2, b2, w3, b3]))
if step % 100 == 0:
print(step, 'loss:', float(loss))
if __name__ == '__main__':
main()
高阶特性
- where
- scatter_nd
- meshgrid
Where(tensor)
a = tf.random.normal([3, 3])
mask = a > 0
tf.boolean_mask(a, mask)
indices = tf.where(mask)
tf.gather_nd(a, indices)
- where(cond, A, B)
mask
A = tf.ones([3, 3])
B = tf.zeros([3, 3])
tf.where(mask, A, B)
scatter_nd
- tf.scatter_nd(
- indices,
- updates,
- shape)
indices = tf.constant([[4], [3], [1], [7]])
updates = tf.constant([9, 10, 11, 12])
shape = tf.constant([8])
tf.scatter_nd(indices, updates, shape)
indices = tf.constant([[0], [2]])
updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
[7, 7, 7, 7], [8, 8, 8, 8]],
[[5, 5, 5, 5], [6, 6, 6, 6],
[7, 7, 7, 7], [8, 8, 8, 8]]])
updates.shape
shape = tf.constant([4, 4, 4])
tf.scatter_nd(indices, updates, shape)
meshgrid
import tensorflow as tf
import matplotlib.pyplot as plt
def func(x):
"""
:param x: [b, 2]
:return:
"""
z = tf.math.sin(x[...,0]) + tf.math.sin(x[...,1])
return z
x = tf.linspace(0., 2*3.14, 500)
y = tf.linspace(0., 2*3.14, 500)
# [50, 50]
point_x, point_y = tf.meshgrid(x, y)
# [50, 50, 2]
points = tf.stack([point_x, point_y], axis=2)
# points = tf.reshape(points, [-1, 2])
print('points:', points.shape)
z = func(points)
print('z:', z.shape)
plt.figure('plot 2d func value')
plt.imshow(z, origin='lower', interpolation='none')
plt.colorbar()
plt.figure('plot 2d func contour')
plt.contour(point_x, point_y, z)
plt.colorbar()
plt.show()