数据类型
python
Data Container
- list :消耗大
- np.array :无GPU
- tf.tensor
TF is a computing lib
Creat
tf.constant(1)
# int
tf.constant(1.)
# float32
tf.constant(2.2, dtype=tf.int32)
# error
tf.constant(2., dtype=tf.double)
# float64
tf.constant([True, False])
# bool
tf.constant('hello,world.')
# string
Property
with tf.device("cpu"):
a = tf.constant([1])
with tf.device("gpu"):
a = tf.range(4)
a.device
b.device
aa = a.gpu()
aa.device
bb = b.cpu()
bb.device
b.numpy()
# return numpy array
b.ndim
tf.rank(b)
tf.rank(tf.ones([3,4,2]))
b.name
#没啥用了
check Tensor Type
isinstace(a, tf.Tensor)
tf.is_tensor(b)
c.dtype
Convert
a = np.arange(5)
a.dtype
# int64
aa = tf.convert_to_tensor(a) # int64
aa = tf.convert_to_tensor(a, dtype=tf.int32) #int32
tf.cast(aa, dtype=tf.float32)
tf.cast(aa, dtype=tf.double)
tf.cast(aa, dtype=tf.int32)
tf.Variable
对于可优化的参数用Variable
a = tf.range(5)
b = tf.Variable(a)
b.dtype
b.name
b = tf.Variable(a, name='input_data')
b.name
b.trainable
isinstance(b, tf.Tensor)
isinstance(b, tf.Variable)
tf.is_tensor(b)
b.numpy()
a = tf.ones([])
int(a)
float(a)
创建Tensor
- from numpy,list
- zeros, ones
- fill
- random
- constant
- Application
From Numpy,List
tf.convert_to_tensor(np.ones([2,3]))
tf.convert_to_tensor(np.zeros([2,3]))
tf.convert_to_tensor([1, 2])
tf.convert_to_tensor([1, 2.])
tf.convert_to_tensor([[1], [2.]])
zeros,ones
# zeros
a = tf.zeros([2, 3, 3]) # input为shape
tf.zeros_like(a)
tf.zeros_like(a.shape)
# ones
tf.ones()
tf.ones_like()
Fill
tf.fill([2,3],0)
tf.fill([2,3],0)
Random
Normal
tf.random.normal([2,2],mean=1,stddev=1)
tf.random.normal([2,2])
tf.random.truncated_normal([2,2],mean=0,stddev=1)
Uniform
tf.random.uniform([2,2],minval=0, maxval=1)
tf.random.uniform([2,2],minval=0, maxval=100)
Random Permutation
idx = tf.range(10)
idx = tf.random.shuffle(idx)
a = tf.random.normal([10,784])
b = tf.random.uniform([10], maxval=10, dtype=tf.int32)
a = tf.gather(a,idx)
b = tf.gather(b.idx)
Application
Scalar
loss / accuracy
out = tf.random.uniform([4,10])
y = tf.range(4)
y = tf.one_hot(y, depth=10)
loss = tf.keras.losses.mse(y, out)
loss = tf.reduce_mean(loss)
Vector
Bias
net = layers.Dense(10)
net.build((4,8))
net.kernel
net.bias
Matrix
x = tf.random.normal([4,784])
net = layers.Dense(10)
net.build((4,784))
net(x).shape #[4,10]
net.kernel.shape # [784,10]
net.bias.shape [10]
Tensor
dim = 3
nlp : [b, seq_len, word_dim]
(X_train, y_train), (X_test, y_test) = keras.datasets.imdb.load_data(num_words=10000)
x_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=80)
x_train.shape
emb = embedding(x_train)
emb.shape
out = rnn(emb[:4])
out.shape
dim = 4
- Image :[b,h,w,3]
- feature map :[b,h,w,c]
x = tf.random.normal((4,32,32,3))
net = layers.Conv2D(16, kernel_size=3)
net(x)
dim = 5
- Single task: [b, h, w, 3]
- meta-learning: [task_b, b, h, w, 3]
索引与切片
Indexing
- Basic indexing
- [idx][idx][idx]
a = tf.ones([1,5,5,3]) a[0][0] a[0][0][0] a[0][0][0][2]
- Numpy Style
- [idx, idx, …]
a = tf.random.normal([4, 28, 28, 3]) a[1].shape a[1,2].shape a[1,2,3].shape a[1,2,3,2].shape
- start:end
[A,B)a = tf.range(10) a[-1:] a[-2:] a[:2] a[:-1]
- Indexing by :
a.shape a[0].shape a[0,:,:,:].shape a[:,:,:,0].shape
- Indexing by ::
- s t a r t : e n d : s t e p start:end:step start:end:step
- : : s t e p ::step ::step
a.shape a[0:2,:,:,:].shape a[:,0:28:2,0:28:2,:].shape a[:,:14,:14,:].shape a[:,14:,14:,:].shape a[:,::2,::2,:].shape
- : : − 1 ::-1 ::−1
a = tf.range(4) a[::-1] a[::-2] a[2::-2]
- …
a = tf.random.normal([2,4,28,28,3]) a[0].shape a[0,:,:,:,:].shape a[0,...].shape a[:,:,:,:,0].shape a[...,0].shape a[0,...,2].shape
- Selective Indexing
- tf.gather
# data: [classes, students, subjects] # [4,35,8] tf.gather(a, axis=0, indices=[2,3]).shape tf.gather(a, axis=0, indices=[2,1,3,0]).shape tf.gather(a, axis=1, indices=[2,3,7,9,16]).shape
- tf.gather_nd
- several students and several subjects
aa = tf.gather(a,axis,[several students]) aaa = tf.gather(aa,axis,[several subjects])
- several (students and subjects)
a.shape tf.gather_nd(a,[0]).shape tf.gather_nd(a, [0,1]).shape tf.gather_nd(a, [0,1,2]).shape tf.gather_nd(a, [[0,1,2]]).shape tf.gather_nd(a, [[0,0],[1,1]]).shape tf.gather_nd(a, [[0,0],[1,1],[2,2]]).shape tf.gather_nd(a, [[0,0,0],[1,1,1],[2,2,2]]).shape tf.gather_nd(a, [[[0,0,0],[1,1,1],[2,2,2]]]).shape
- tf.boolean_mask
# [4, 28, 28, 3] a.shape tf.boolean_mask(a, mask=[True,True,False,False]).shape tf.boolean_mask(a, mask=[True,True,False],axis=3).shape a = tf.ones([2,3,4]) tf.boolean_mask(a, mask=[[True,False,False],[True,True,False]]).shape
- tf.gather
维度变换
-
shape,ndim
- [b, h, w, c]
-
reshape
a = tf.random.normal([4,28,28,3]) a.shape, a.ndim tf.reshape(a, [4, 784, 3]).shape tf.reshape(a, [4, -1, 3]).shape tf.reshape(a, [4, 784*3]).shape tf.reshape(a, [4, -1]).shape aa = tf.reshape(a, [4, -1]) tf.reshape(aa, [4, 28, 28, 3]).shape tf.reshape(aa, [4, 14, 56, 3]).shape tf.reshape(aa, [4, 1, 784, 3]).shape
-
expand_dims
a = tf.random.normal([4,35,8]) tf.expand_dims(a, axis=0).shape tf.expand_dims(a, axis=3).shape tf.expand_dims(a, axis=-1).shape tf.expand_dims(a, axis=-4).shape
-
Squeeze dim
tf.squeeze(tf.zeros([1,2,1,1,3])).shape a = tf.zeros([1,2,1,3]) tf.squeeze(a,axis=0).shape tf.squeeze(a,axis=2).shape tf.squeeze(a,axis=-2).shape tf.squeeze(a,axis=-4).shape
-
transpose
a = tf.random.normal((4,3,2,1)) a.shape tf.transpose(a).shape tf.transpose(1, perm=[0,1,3,2]).shape # transpose to pytorch # [b,w,h,c] to [b,3,h,w] tf.transpose(a,[0,3,1,2]).shape
Broadcasting
- expand
- insert 1 dim ahead if needed
- expand dims with size 1 to same size
- [4,32,32,3]
- [3] - [1,1,1,3] -[4,32,32,3]
- without copying data
- tf.broadcast_to
x = tf.random.normal([4,32,32,3]) x.shape x +tf.random.normal([32,32,1]).shape x +tf.random.normal([1,4,1,1]).shape b = tf.broadcast_to(tf.random.normal([4,1,1,1]), [4,32,32,3]) b.shape
- Broadcast VS Tile
# 内存占用不一样 a = tf.ones([3,4]) a1 = tf.broadcast_to(a, [2,3,4]) a2 = tf.expand_dims(a, axis=0) a2 = tf.tile(a2, [2,1,1])
数学运算
-
element-wise
- +-*/%//
- tf.math.log/tf.exp
- tf.pow/tf.sqrt
-
matrix-wise
- @ matmul
- with broadcasting
a = tf.ones([4,2,3]) b = tf.fill([4,3,5],2.) a@b tf.matmul(a,b) b = tf.fill([3,5],2.) bb = tf.broadcast_to(b, [4,3,5]) a@bb
-
axis-wise
前向传播(张量)
- o u t = r e l u { r e l u { r e l u [ X @ W 1 + b 1 ] @ W 2 + b 2 } @ W 3 + b 3 } out = relu\{relu\{relu[X@W_1+b_1]@W_2 + b_2\}@W_3+b_3\} out=relu{relu{relu[X@W1+b1]@W2+b2}@W3+b3}
- p r e d = a r g m a x ( o u t ) pred = argmax(out) pred=argmax(out)
- l o s s = M S E ( o u t , l a b e l ) loss = MSE(out,label) loss=MSE(out,label)
- m i n i m i z e l o s s minimize\ loss minimize loss