TensorFlow 2.0 笔记(一)—— 基础操作

Tensor数据类型

list
np.array
tf.Tensor

Tensor
  • scalar: 1.01
  • vector: [1.1], [1.1, 2.2, …]
  • matrix: [[1.1, 2.2], [3.3, 4.4], [5.5, 6.6]]
  • tensor: rank > 2
变量类型:

科学计算库,以支持计算为第一要素

  • int
  • float
  • double
  • bool
  • string
创建:
tf.constant(1)
tf.constant(1.)
tf.constant(1.2, dtype=int32)
tf.constant(1., dtype=double)
tf.constant([True, False])
tf.constant('hello world.')
Tensor 属性

with tf.device("cpu"):
    a = tf.constant([0])

with tf.device("gpu"):
    b = tf.constant([0])

a.device
b.device

aa = a.gpu()
bb = b.cpu()
b.numpy()
b.ndim							## 1
tf.rank(b) 						##<tf.Tensor: id=147, shape=(), dtype=int32, numpy=1>
tf.rank(tf.ones([3,4,2]))    	##<tf.Tensor: id=139, shape=(), dtype=int32, numpy=3>
tf.is_tensor(a)
a.dtype = tf.float32
基础操作
a = np.arange(5)
a.dtype

aa = tf.convert_to_tensor(a)
aa = tf.convert_to_tensor(a, dtype=tf.int32)
tf.cast(aa, dtype=tf.float32)

b = tf.constant([0,1])
bb = tf.cast(b, dtype=tf.bool)

a = tf.range(5)
tf.Variable(a)
b.dtype
b.name
b = tf.Variable(a, name='input_data')
b.name
b.trainable

tf.is_tensor(b)

b.numpy()

a = tf.ones([])
a.numpy()
int(a)
float(a)
tensor的创建
  • from numpy, list
  • zeros, ones
  • fill
  • random
  • constant
  • Application
#from numpy, list
tf.convert_to_tensor(np.ones([2,3]))

#tf.zeros
tf.zeros([])
tf.zeros([1])
tf.zeros([2,2])
tf.zeros([2,3,3])
tf.zeros_like(a)

#tf.ones
tf.ones([])
tf.ones([1])
tf.ones([2,2])
tf.ones([2,3,3])
tf.ones_like(a)

#tf.fill
tf.fill([2,2], 0)
tf.fill([2,2], 9)

#Normal
tf.random.normal([2,2], mean=1, stddev=1)
tf.random.truncated_normal([2,2], mean=0, stddev=1)

#Uniform
tf.random.uniform([2,2], minval=0, maxval=100)

#Random Permutation
idx = tf.range(10)
idx = tf.random.shuffle(idx)
a = tf.random.normal([10,784])
b = tf.random.uniform([10], maxval=10, dtype=tf.int32)
a = tf.gather(a, idx)
b = tf.gather(b, idx)

#tf.constant
tf.constant(1)
tf.constant([1])
tf.constant([1,2.])
几种典型的Tensor
  • Scalar

标量,用在 lossaccuracy

#Loss

out = tf.random.uniform([4,10])
y = tf.range(4)
y = tf.one_hot(y, depth=10)

loss = tf.keras.losses.mse(y, out)
loss = tf.reduce_mean(loss)
  • Vector

Bias 偏置项
[out_dim]

net = tf.keras.layers.Dense(10)
net.build((4,8))
net.kernel
net.bias
  • Matrix

input x: [b, vec_dim]
weight: [input_dim, output_dim]
W = [ w 1 , 1 w 1 , 2 . . . w 1 , R w 2 , 1 w 2 , 2 . . . w 2 , R w S , 1 w S , 2 . . . w S , R ] W = \begin{bmatrix} w_{1,1}&w_{1,2}&...&w_{1,R}\\w_{2,1}&w_{2,2}&...&w_{2,R}\\\\w_{S,1}&w_{S,2}&...&w_{S,R} \end{bmatrix} \quad W=w1,1w2,1wS,1w1,2w2,2wS,2.........w1,Rw2,RwS,R

#matrix
x = tf.random.normal([4,784])
net = tf.keras.layers.Dense(10)
net.build((4, 784))
net(x).shape
net.kernel.shape
net.bias.shape
  • Dim = 3 Tensor

x: [b, seq_len, word_dim]

#Dim = 3 Tensor
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000)
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=80)
x_train.shape
  • Dim = 4 Tensor

Image: [b, h, w, 3]
feature maps: [b, h, w, c]

#Dim = 4 Tensor
x = tf.random.normal((4,32,32,3))
net = tf.keras.layers.Conv2D(16, kernel_size=3)
net(x)
  • Dim = 5 Tensor

Single task: [b, h, w, 3]
meta-learning:

  • [task_b, b, h, w, 3]
索引与切片
  • Basic indexing
    [idx][idx][idx]
  • Same with Numpy
    [idx, idx,…]
  • start : end
  • start : end : step

Basic indexing

a = tf.ones([1,5,5,3])
a[0][0]
a[0][0][0]
a[0][0][0][2]

Numpy-style indexing

a = tf.random.normal([4,28,28,3])
a[1].shape
a[1,2].shape
a[1,2,3].shape
a[1,2,3,2].shape

start : end

a = tf.range(10)
a[-1:]
a[-2:]
a[:2]
a[:-1]
a.shape
a[0].shape
a[0,:,:,:].shape
a[0,1,:,:].shape
a[:,:,:,0].shape
a[:,:,:,2].shape
a[:,0,:,:].shape

a.shape
a[0:2,:,:,:].shape
a[:,0:28:2,0:28:2,:].shape
a[:,:14,:14,:].shape
a[:,14:,14:,:].shape
a[:,::2,::2,:].shape

start : end : step

a = tf.range(4)
a[::-1]
a[::-2]
a[2::-2]
a = tf.random.normal([2,4,28,28,3])
a[0].shape
a[0,:,:,:,:].shape
a[0,...].shape
a[:,:,:,:,0].shape
a[...,0].shape
a[0,...,2].shape
a[1,0,...,0].shape

Selective Indexing

  • tf.gather
  • tf.gather_nd
  • tf.boolean_mask

tf.gather

  • data: [classes, students, subjects]
#tf.gather
tf.gather(a, axis=0, indices=[2,3]).shape
a[2:4].shape
tf.gather(a, axis=0, indices=[2,1,2,0]).shape
tf.gather(a, axis=1, indices=[2,3,7,9,16]).shape
tf.gather(a, axis=2, indices=[2,3,7]).shape

tf.gather_nd

  • data: [classes, students, subjects]
  • What if sample several students and their several subjects?
  • ?? = ??. ???ℎ??(?, ????,[??????? ????????])
  • ??? = ??. ???ℎ??(??, ????,[??????? ????????])
  • What if sample several (classes and students)?
  • ??? ????????:
    • [class1_student1, class2_studnet2, class3_student3, class4_student4]
  • recommended indices format:
    [[0], [1],…]
    [[0,0], [1,1],…]
    [[0,0,0], [1,1,1],…]
#tf.gather_nd
a.shape
tf.gather_nd(a, [0]).shape
tf.gather_nd(a, [0,1]).shape
tf.gather_nd(a, [0,1,2]).shape
tf.gather_nd(a, [[0,1,2]]).shape
tf.gather_nd(a, [[0,0],[1,1]]).shape
tf.gather_nd(a,[[0,0],[1,1],[2,2]]).shape
tf.gather_nd(a,[[0,0,0],[1,1,1],[2,2,2]]).shape
tf.gather_nd(a,[[[0,0,0],[1,1,1],[2,2,2]]]).shape

tf.boolean_mask

#tf.boolean_mask
a.shape
tf.boolean_mask(a, mask=[True,True,False,False]).shape
tf.boolean_mask(a, mask=[True,True,False], axis=3).shape
a = tf.ones([2,3,4])
tf.boolean_mask(a, mask=[[True,False,False],[False,False,True]])
维度变换

Outline

  • shape, ndim
  • reshape
  • expand_dims/squeeze
  • transpose
  • broadcast_to

View

  • [b, 28, 28]
  • →[b, 28 * 28]
  • →[b, 2, 14 * 28]
  • →[b, 28, 28, 1]

Reshape

a = tf.random.normal([4,28,28,3])
a.shape, a.ndim
tf.reshape(a, [4, 28*28, 3])
tf.reshape(a, [4, -1, 3])
tf.reshape(a, [4, 28*28*3])
tf.reshape(a, [-1])

Reshape is flexible

a = tf.random.normal([4,28,28,3])
tf.reshape(tf.reshape(a, [4, -1]), [4, 28, 28, 3])
tf.reshape(tf.reshape(a, [4, -1], [4, 14, 56, 3]))
tf.reshape(tf.reshape(a, [4, -1], [4, 14, 784, 3]))

tf.transpose

#tf.transpose
a = tf.random.normal((4, 3, 2, 1))
tf.transpose(a)
tf.transpose(a, perm=[0, 1, 3, 2])
#->[b, 3, h, w]
a = tf.random.normal([4, 28, 28, 3])
a = tf.transpose(a, [0, 2, 1, 3])
a = tf.transpose(a, [0, 3, 2, 1])
a = tf.transpose(a, [0, 3, 1, 2])

Squeeze VS Expand_dims

Expand dim

  • a: [classes, students, classes]
    [4, 35, 8]
  • add school dim
  • [1, 4, 35, 8] + [1, 4, 35, 8]
    [2, 4, 35, 8]
#Expand dim
a = tf.random.normal([4, 35, 8])
tf.expand_dims(a, axis=0)
tf.expand_dims(a, axis=3)
tf.expand_dims(a, axis=-1)
tf.expand_dims(a, axis=-4)

Squeeze dim

  • Only squeeze for shape=1 dim
  • [4, 35, 8, 1]
  • [1, 4, 35, 8]
  • [1, 4, 35, 1]
#tf.squeeze
tf.squeeze(tf.zeros([1, 2, 1, 1, 3]))
a = tf.zeros([1, 2, 1, 3])
tf.squeeze(a, axis=0)
tf.squeeze(a, axis=2)
tf.squeeze(a, axis=-2)
tf.squeeze(a, axis=-4)
Broadcasting
  • expand
  • without copying data
    • VS tf.tile
  • tf.broadcast_to
#Broadcasting
x = tf.random.normal([4, 32, 32, 3])
(x + tf.random.normal([3]))
(x + tf.random.normal([32, 32, 1]))
(x + tf.random.normal([4, 1, 1, 1]))
(x + tf.random.normal([1, 4, 1, 1])) #error

#tf.broadcast_to
x.shape
(x + tf.random.normal([4, 1, 1, 1]))
b = tf.broadcast_to(tf.random.normal([4, 1, 1, 1]), [4, 32, 32, 3])

#Broadcast VS Tile
a = tf.ones([3, 4])
a1 = tf.broadcast_to(a, [2, 3, 4])
a2 = tf.expand_dims(a, axis=0)
a2 = tf.tile(a2, [2, 1, 1])
数学运算

Outline

  • + - * /
  • **, pow, square
  • sqrt
  • //, %
  • exp, log
  • @, matmul
  • linear layer

Operation type

  • element-wise
    + - * /
  • matrix-wise
    @, matmul
  • dim-wise
    reduce_mean/max/min/sum
前向传播

o u t = r e l u { r e l u { r e l u [ X @ W 1 + b 1 ] @ W 2 + b 2 } @ W 3 + b 3 } out = relu\{relu\{relu[ X@W_1 + b_1]@W_2 + b_2\} @W_3 + b_3\} out=relu{relu{relu[X@W1+b1]@W2+b2}@W3+b3}
p r e d = a r g m a x ( o u t ) pred = argmax(out) pred=argmax(out)
l o s s = M S E ( o u t , l a b e l ) loss = MSE(out, label) loss=MSE(out,label)
minimize ????
[ W 1 ′ , b 1 ′ , W 2 ′ , b 2 ′ , W 3 ′ , b 3 ′ ] [W_1^{'}, b_1^{'}, W_2^{'}, b_2^{'}, W_3^{'}, b_3^{'}] [W1,b1,W2,b2,W3,b3]

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# x: [60k, 28, 28],
# y: [60k]
(x, y), _ = datasets.mnist.load_data()
# x: [0~255] => [0~1.]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)

print(x.shape, y.shape, x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))


train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)


# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

lr = 1e-3

for epoch in range(10): # iterate db for 10
    for step, (x, y) in enumerate(train_db): # for every batch
        # x:[128, 28, 28]
        # y: [128]

        # [b, 28, 28] => [b, 28*28]
        x = tf.reshape(x, [-1, 28*28])

        with tf.GradientTape() as tape: # tf.Variable
            # x: [b, 28*28]
            # h1 = x@w1 + b1
            # [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
            h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
            h1 = tf.nn.relu(h1)
            # [b, 256] => [b, 128]
            h2 = h1@w2 + b2
            h2 = tf.nn.relu(h2)
            # [b, 128] => [b, 10]
            out = h2@w3 + b3

            # compute loss
            # out: [b, 10]
            # y: [b] => [b, 10]
            y_ = tf.one_hot(y, depth=10)

            # mse = mean(sum(y-out)^2)
            # [b, 10]
            loss = tf.square(y_ - out)
            # mean: scalar
            loss = tf.reduce_mean(loss)

        # compute gradients
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        # print(grads)
        # w1 = w1 - lr * w1_grad
        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])

        if step % 100 == 0:
            print(epoch, step, 'loss:', float(loss))

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值