Tensor数据类型
list
np.array
tf.Tensor
Tensor
- scalar: 1.01
- vector: [1.1], [1.1, 2.2, …]
- matrix: [[1.1, 2.2], [3.3, 4.4], [5.5, 6.6]]
- tensor: rank > 2
变量类型:
科学计算库,以支持计算为第一要素
- int
- float
- double
- bool
- string
创建:
tf.constant(1)
tf.constant(1.)
tf.constant(1.2, dtype=int32)
tf.constant(1., dtype=double)
tf.constant([True, False])
tf.constant('hello world.')
Tensor 属性
with tf.device("cpu"):
a = tf.constant([0])
with tf.device("gpu"):
b = tf.constant([0])
a.device
b.device
aa = a.gpu()
bb = b.cpu()
b.numpy()
b.ndim ## 1
tf.rank(b) ##<tf.Tensor: id=147, shape=(), dtype=int32, numpy=1>
tf.rank(tf.ones([3,4,2])) ##<tf.Tensor: id=139, shape=(), dtype=int32, numpy=3>
tf.is_tensor(a)
a.dtype = tf.float32
基础操作
a = np.arange(5)
a.dtype
aa = tf.convert_to_tensor(a)
aa = tf.convert_to_tensor(a, dtype=tf.int32)
tf.cast(aa, dtype=tf.float32)
b = tf.constant([0,1])
bb = tf.cast(b, dtype=tf.bool)
a = tf.range(5)
tf.Variable(a)
b.dtype
b.name
b = tf.Variable(a, name='input_data')
b.name
b.trainable
tf.is_tensor(b)
b.numpy()
a = tf.ones([])
a.numpy()
int(a)
float(a)
tensor的创建
- from numpy, list
- zeros, ones
- fill
- random
- constant
- Application
#from numpy, list
tf.convert_to_tensor(np.ones([2,3]))
#tf.zeros
tf.zeros([])
tf.zeros([1])
tf.zeros([2,2])
tf.zeros([2,3,3])
tf.zeros_like(a)
#tf.ones
tf.ones([])
tf.ones([1])
tf.ones([2,2])
tf.ones([2,3,3])
tf.ones_like(a)
#tf.fill
tf.fill([2,2], 0)
tf.fill([2,2], 9)
#Normal
tf.random.normal([2,2], mean=1, stddev=1)
tf.random.truncated_normal([2,2], mean=0, stddev=1)
#Uniform
tf.random.uniform([2,2], minval=0, maxval=100)
#Random Permutation
idx = tf.range(10)
idx = tf.random.shuffle(idx)
a = tf.random.normal([10,784])
b = tf.random.uniform([10], maxval=10, dtype=tf.int32)
a = tf.gather(a, idx)
b = tf.gather(b, idx)
#tf.constant
tf.constant(1)
tf.constant([1])
tf.constant([1,2.])
几种典型的Tensor
- Scalar
标量,用在 loss 和 accuracy
#Loss
out = tf.random.uniform([4,10])
y = tf.range(4)
y = tf.one_hot(y, depth=10)
loss = tf.keras.losses.mse(y, out)
loss = tf.reduce_mean(loss)
- Vector
Bias 偏置项
[out_dim]
net = tf.keras.layers.Dense(10)
net.build((4,8))
net.kernel
net.bias
- Matrix
input x: [b, vec_dim]
weight: [input_dim, output_dim]
W = [ w 1 , 1 w 1 , 2 . . . w 1 , R w 2 , 1 w 2 , 2 . . . w 2 , R w S , 1 w S , 2 . . . w S , R ] W = \begin{bmatrix} w_{1,1}&w_{1,2}&...&w_{1,R}\\w_{2,1}&w_{2,2}&...&w_{2,R}\\\\w_{S,1}&w_{S,2}&...&w_{S,R} \end{bmatrix} \quad W=⎣⎢⎢⎡w1,1w2,1wS,1w1,2w2,2wS,2.........w1,Rw2,RwS,R⎦⎥⎥⎤
#matrix
x = tf.random.normal([4,784])
net = tf.keras.layers.Dense(10)
net.build((4, 784))
net(x).shape
net.kernel.shape
net.bias.shape
- Dim = 3 Tensor
x: [b, seq_len, word_dim]
#Dim = 3 Tensor
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000)
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=80)
x_train.shape
- Dim = 4 Tensor
Image: [b, h, w, 3]
feature maps: [b, h, w, c]
#Dim = 4 Tensor
x = tf.random.normal((4,32,32,3))
net = tf.keras.layers.Conv2D(16, kernel_size=3)
net(x)
- Dim = 5 Tensor
Single task: [b, h, w, 3]
meta-learning:
- [task_b, b, h, w, 3]
索引与切片
- Basic indexing
[idx][idx][idx] - Same with Numpy
[idx, idx,…] - start : end
- start : end : step
- …
Basic indexing
a = tf.ones([1,5,5,3])
a[0][0]
a[0][0][0]
a[0][0][0][2]
Numpy-style indexing
a = tf.random.normal([4,28,28,3])
a[1].shape
a[1,2].shape
a[1,2,3].shape
a[1,2,3,2].shape
start : end
a = tf.range(10)
a[-1:]
a[-2:]
a[:2]
a[:-1]
a.shape
a[0].shape
a[0,:,:,:].shape
a[0,1,:,:].shape
a[:,:,:,0].shape
a[:,:,:,2].shape
a[:,0,:,:].shape
a.shape
a[0:2,:,:,:].shape
a[:,0:28:2,0:28:2,:].shape
a[:,:14,:14,:].shape
a[:,14:,14:,:].shape
a[:,::2,::2,:].shape
start : end : step
a = tf.range(4)
a[::-1]
a[::-2]
a[2::-2]
a = tf.random.normal([2,4,28,28,3])
a[0].shape
a[0,:,:,:,:].shape
a[0,...].shape
a[:,:,:,:,0].shape
a[...,0].shape
a[0,...,2].shape
a[1,0,...,0].shape
Selective Indexing
- tf.gather
- tf.gather_nd
- tf.boolean_mask
tf.gather
- data: [classes, students, subjects]
#tf.gather
tf.gather(a, axis=0, indices=[2,3]).shape
a[2:4].shape
tf.gather(a, axis=0, indices=[2,1,2,0]).shape
tf.gather(a, axis=1, indices=[2,3,7,9,16]).shape
tf.gather(a, axis=2, indices=[2,3,7]).shape
tf.gather_nd
- data: [classes, students, subjects]
- What if sample several students and their several subjects?
- ?? = ??. ???ℎ??(?, ????,[??????? ????????])
- ??? = ??. ???ℎ??(??, ????,[??????? ????????])
- What if sample several (classes and students)?
- ??? ????????:
- [class1_student1, class2_studnet2, class3_student3, class4_student4]
- recommended indices format:
[[0], [1],…]
[[0,0], [1,1],…]
[[0,0,0], [1,1,1],…]
#tf.gather_nd
a.shape
tf.gather_nd(a, [0]).shape
tf.gather_nd(a, [0,1]).shape
tf.gather_nd(a, [0,1,2]).shape
tf.gather_nd(a, [[0,1,2]]).shape
tf.gather_nd(a, [[0,0],[1,1]]).shape
tf.gather_nd(a,[[0,0],[1,1],[2,2]]).shape
tf.gather_nd(a,[[0,0,0],[1,1,1],[2,2,2]]).shape
tf.gather_nd(a,[[[0,0,0],[1,1,1],[2,2,2]]]).shape
tf.boolean_mask
#tf.boolean_mask
a.shape
tf.boolean_mask(a, mask=[True,True,False,False]).shape
tf.boolean_mask(a, mask=[True,True,False], axis=3).shape
a = tf.ones([2,3,4])
tf.boolean_mask(a, mask=[[True,False,False],[False,False,True]])
维度变换
Outline
- shape, ndim
- reshape
- expand_dims/squeeze
- transpose
- broadcast_to
View
- [b, 28, 28]
- →[b, 28 * 28]
- →[b, 2, 14 * 28]
- →[b, 28, 28, 1]
Reshape
a = tf.random.normal([4,28,28,3])
a.shape, a.ndim
tf.reshape(a, [4, 28*28, 3])
tf.reshape(a, [4, -1, 3])
tf.reshape(a, [4, 28*28*3])
tf.reshape(a, [-1])
Reshape is flexible
a = tf.random.normal([4,28,28,3])
tf.reshape(tf.reshape(a, [4, -1]), [4, 28, 28, 3])
tf.reshape(tf.reshape(a, [4, -1], [4, 14, 56, 3]))
tf.reshape(tf.reshape(a, [4, -1], [4, 14, 784, 3]))
tf.transpose
#tf.transpose
a = tf.random.normal((4, 3, 2, 1))
tf.transpose(a)
tf.transpose(a, perm=[0, 1, 3, 2])
#->[b, 3, h, w]
a = tf.random.normal([4, 28, 28, 3])
a = tf.transpose(a, [0, 2, 1, 3])
a = tf.transpose(a, [0, 3, 2, 1])
a = tf.transpose(a, [0, 3, 1, 2])
Squeeze VS Expand_dims
Expand dim
- a: [classes, students, classes]
[4, 35, 8]- add school dim
- [1, 4, 35, 8] + [1, 4, 35, 8]
[2, 4, 35, 8]
#Expand dim
a = tf.random.normal([4, 35, 8])
tf.expand_dims(a, axis=0)
tf.expand_dims(a, axis=3)
tf.expand_dims(a, axis=-1)
tf.expand_dims(a, axis=-4)
Squeeze dim
- Only squeeze for shape=1 dim
- [4, 35, 8, 1]
- [1, 4, 35, 8]
- [1, 4, 35, 1]
#tf.squeeze
tf.squeeze(tf.zeros([1, 2, 1, 1, 3]))
a = tf.zeros([1, 2, 1, 3])
tf.squeeze(a, axis=0)
tf.squeeze(a, axis=2)
tf.squeeze(a, axis=-2)
tf.squeeze(a, axis=-4)
Broadcasting
- expand
- without copying data
- VS tf.tile
- tf.broadcast_to
#Broadcasting
x = tf.random.normal([4, 32, 32, 3])
(x + tf.random.normal([3]))
(x + tf.random.normal([32, 32, 1]))
(x + tf.random.normal([4, 1, 1, 1]))
(x + tf.random.normal([1, 4, 1, 1])) #error
#tf.broadcast_to
x.shape
(x + tf.random.normal([4, 1, 1, 1]))
b = tf.broadcast_to(tf.random.normal([4, 1, 1, 1]), [4, 32, 32, 3])
#Broadcast VS Tile
a = tf.ones([3, 4])
a1 = tf.broadcast_to(a, [2, 3, 4])
a2 = tf.expand_dims(a, axis=0)
a2 = tf.tile(a2, [2, 1, 1])
数学运算
Outline
- + - * /
- **, pow, square
- sqrt
- //, %
- exp, log
- @, matmul
- linear layer
Operation type
- element-wise
+ - * / - matrix-wise
@, matmul - dim-wise
reduce_mean/max/min/sum
前向传播
o u t = r e l u { r e l u { r e l u [ X @ W 1 + b 1 ] @ W 2 + b 2 } @ W 3 + b 3 } out = relu\{relu\{relu[ X@W_1 + b_1]@W_2 + b_2\} @W_3 + b_3\} out=relu{relu{relu[X@W1+b1]@W2+b2}@W3+b3}
p r e d = a r g m a x ( o u t ) pred = argmax(out) pred=argmax(out)
l o s s = M S E ( o u t , l a b e l ) loss = MSE(out, label) loss=MSE(out,label)
minimize ????
[ W 1 ′ , b 1 ′ , W 2 ′ , b 2 ′ , W 3 ′ , b 3 ′ ] [W_1^{'}, b_1^{'}, W_2^{'}, b_2^{'}, W_3^{'}, b_3^{'}] [W1′,b1′,W2′,b2′,W3′,b3′]
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# x: [60k, 28, 28],
# y: [60k]
(x, y), _ = datasets.mnist.load_data()
# x: [0~255] => [0~1.]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
print(x.shape, y.shape, x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)
# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
lr = 1e-3
for epoch in range(10): # iterate db for 10
for step, (x, y) in enumerate(train_db): # for every batch
# x:[128, 28, 28]
# y: [128]
# [b, 28, 28] => [b, 28*28]
x = tf.reshape(x, [-1, 28*28])
with tf.GradientTape() as tape: # tf.Variable
# x: [b, 28*28]
# h1 = x@w1 + b1
# [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
h1 = tf.nn.relu(h1)
# [b, 256] => [b, 128]
h2 = h1@w2 + b2
h2 = tf.nn.relu(h2)
# [b, 128] => [b, 10]
out = h2@w3 + b3
# compute loss
# out: [b, 10]
# y: [b] => [b, 10]
y_ = tf.one_hot(y, depth=10)
# mse = mean(sum(y-out)^2)
# [b, 10]
loss = tf.square(y_ - out)
# mean: scalar
loss = tf.reduce_mean(loss)
# compute gradients
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
# print(grads)
# w1 = w1 - lr * w1_grad
w1.assign_sub(lr * grads[0])
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])
if step % 100 == 0:
print(epoch, step, 'loss:', float(loss))