TensorFlow 2.0 笔记（一）—— 基础操作

最新推荐文章于 2022-01-24 17:00:50 发布

资深糖分大叔

最新推荐文章于 2022-01-24 17:00:50 发布

阅读量445

点赞数

分类专栏：机器学习 TensorFlow2.0

本文链接：https://blog.csdn.net/hetengjiao523/article/details/99716196

版权

机器学习同时被 2 个专栏收录

24 篇文章 6 订阅

订阅专栏

TensorFlow2.0

9 篇文章 1 订阅

订阅专栏

Tensor数据类型

list
np.array
tf.Tensor

Tensor

scalar: 1.01
vector: [1.1], [1.1, 2.2, …]
matrix: [[1.1, 2.2], [3.3, 4.4], [5.5, 6.6]]
tensor: rank > 2

变量类型：

科学计算库，以支持计算为第一要素

int
float
double
bool
string

创建：

tf.constant(1)
tf.constant(1.)
tf.constant(1.2, dtype=int32)
tf.constant(1., dtype=double)
tf.constant([True, False])
tf.constant('hello world.')

Tensor 属性


with tf.device("cpu"):
    a = tf.constant([0])

with tf.device("gpu"):
    b = tf.constant([0])

a.device
b.device

aa = a.gpu()
bb = b.cpu()
b.numpy()
b.ndim							## 1
tf.rank(b) 						##<tf.Tensor: id=147, shape=(), dtype=int32, numpy=1>
tf.rank(tf.ones([3,4,2]))    	##<tf.Tensor: id=139, shape=(), dtype=int32, numpy=3>
tf.is_tensor(a)
a.dtype = tf.float32

基础操作

a = np.arange(5)
a.dtype

aa = tf.convert_to_tensor(a)
aa = tf.convert_to_tensor(a, dtype=tf.int32)
tf.cast(aa, dtype=tf.float32)

b = tf.constant([0,1])
bb = tf.cast(b, dtype=tf.bool)

a = tf.range(5)
tf.Variable(a)
b.dtype
b.name
b = tf.Variable(a, name='input_data')
b.name
b.trainable

tf.is_tensor(b)

b.numpy()

a = tf.ones([])
a.numpy()
int(a)
float(a)

tensor的创建

from numpy, list
zeros, ones
fill
random
constant
Application

#from numpy, list
tf.convert_to_tensor(np.ones([2,3]))

#tf.zeros
tf.zeros([])
tf.zeros([1])
tf.zeros([2,2])
tf.zeros([2,3,3])
tf.zeros_like(a)

#tf.ones
tf.ones([])
tf.ones([1])
tf.ones([2,2])
tf.ones([2,3,3])
tf.ones_like(a)

#tf.fill
tf.fill([2,2], 0)
tf.fill([2,2], 9)

#Normal
tf.random.normal([2,2], mean=1, stddev=1)
tf.random.truncated_normal([2,2], mean=0, stddev=1)

#Uniform
tf.random.uniform([2,2], minval=0, maxval=100)

#Random Permutation
idx = tf.range(10)
idx = tf.random.shuffle(idx)
a = tf.random.normal([10,784])
b = tf.random.uniform([10], maxval=10, dtype=tf.int32)
a = tf.gather(a, idx)
b = tf.gather(b, idx)

#tf.constant
tf.constant(1)
tf.constant([1])
tf.constant([1,2.])

几种典型的Tensor

Scalar

标量，用在 loss 和 accuracy

#Loss

out = tf.random.uniform([4,10])
y = tf.range(4)
y = tf.one_hot(y, depth=10)

loss = tf.keras.losses.mse(y, out)
loss = tf.reduce_mean(loss)

Vector

Bias 偏置项
[out_dim]

net = tf.keras.layers.Dense(10)
net.build((4,8))
net.kernel
net.bias

Matrix

input x: [b, vec_dim]
weight: [input_dim, output_dim]
$\begin{bmatrix} w_{1,1}&w_{1,2}&...&w_{1,R}\\w_{2,1}&w_{2,2}&...&w_{2,R}\\\\w_{S,1}&w_{S,2}&...&w_{S,R} \end{bmatrix} \quad$

#matrix
x = tf.random.normal([4,784])
net = tf.keras.layers.Dense(10)
net.build((4, 784))
net(x).shape
net.kernel.shape
net.bias.shape

Dim = 3 Tensor

x: [b, seq_len, word_dim]

#Dim = 3 Tensor
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000)
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=80)
x_train.shape

Dim = 4 Tensor

Image: [b, h, w, 3]
feature maps: [b, h, w, c]

#Dim = 4 Tensor
x = tf.random.normal((4,32,32,3))
net = tf.keras.layers.Conv2D(16, kernel_size=3)
net(x)

Dim = 5 Tensor

Single task: [b, h, w, 3]
meta-learning:

[task_b, b, h, w, 3]

索引与切片

Basic indexing
[idx][idx][idx]
Same with Numpy
[idx, idx,…]
start : end
start : end : step
…

Basic indexing

a = tf.ones([1,5,5,3])
a[0][0]
a[0][0][0]
a[0][0][0][2]

Numpy-style indexing

a = tf.random.normal([4,28,28,3])
a[1].shape
a[1,2].shape
a[1,2,3].shape
a[1,2,3,2].shape

start : end

a = tf.range(10)
a[-1:]
a[-2:]
a[:2]
a[:-1]
a.shape
a[0].shape
a[0,:,:,:].shape
a[0,1,:,:].shape
a[:,:,:,0].shape
a[:,:,:,2].shape
a[:,0,:,:].shape

a.shape
a[0:2,:,:,:].shape
a[:,0:28:2,0:28:2,:].shape
a[:,:14,:14,:].shape
a[:,14:,14:,:].shape
a[:,::2,::2,:].shape

start : end : step

a = tf.range(4)
a[::-1]
a[::-2]
a[2::-2]
a = tf.random.normal([2,4,28,28,3])
a[0].shape
a[0,:,:,:,:].shape
a[0,...].shape
a[:,:,:,:,0].shape
a[...,0].shape
a[0,...,2].shape
a[1,0,...,0].shape

Selective Indexing

tf.gather
tf.gather_nd
tf.boolean_mask

tf.gather

data: [classes, students, subjects]

#tf.gather
tf.gather(a, axis=0, indices=[2,3]).shape
a[2:4].shape
tf.gather(a, axis=0, indices=[2,1,2,0]).shape
tf.gather(a, axis=1, indices=[2,3,7,9,16]).shape
tf.gather(a, axis=2, indices=[2,3,7]).shape

tf.gather_nd

data: [classes, students, subjects]
What if sample several students and their several subjects？
?? = ??. ???ℎ??(?, ????,[??????? ????????])
??? = ??. ???ℎ??(??, ????,[??????? ????????])
What if sample several (classes and students)？
??? ????????:
[class1_student1, class2_studnet2, class3_student3, class4_student4]

recommended indices format:
[[0], [1],…]
[[0,0], [1,1],…]
[[0,0,0], [1,1,1],…]

#tf.gather_nd
a.shape
tf.gather_nd(a, [0]).shape
tf.gather_nd(a, [0,1]).shape
tf.gather_nd(a, [0,1,2]).shape
tf.gather_nd(a, [[0,1,2]]).shape
tf.gather_nd(a, [[0,0],[1,1]]).shape
tf.gather_nd(a,[[0,0],[1,1],[2,2]]).shape
tf.gather_nd(a,[[0,0,0],[1,1,1],[2,2,2]]).shape
tf.gather_nd(a,[[[0,0,0],[1,1,1],[2,2,2]]]).shape

tf.boolean_mask

#tf.boolean_mask
a.shape
tf.boolean_mask(a, mask=[True,True,False,False]).shape
tf.boolean_mask(a, mask=[True,True,False], axis=3).shape
a = tf.ones([2,3,4])
tf.boolean_mask(a, mask=[[True,False,False],[False,False,True]])

维度变换

Outline

shape, ndim
reshape
expand_dims/squeeze
transpose
broadcast_to

View

[b, 28, 28]
→[b, 28 * 28]
→[b, 2, 14 * 28]
→[b, 28, 28, 1]

Reshape

a = tf.random.normal([4,28,28,3])
a.shape, a.ndim
tf.reshape(a, [4, 28*28, 3])
tf.reshape(a, [4, -1, 3])
tf.reshape(a, [4, 28*28*3])
tf.reshape(a, [-1])

Reshape is flexible

a = tf.random.normal([4,28,28,3])
tf.reshape(tf.reshape(a, [4, -1]), [4, 28, 28, 3])
tf.reshape(tf.reshape(a, [4, -1], [4, 14, 56, 3]))
tf.reshape(tf.reshape(a, [4, -1], [4, 14, 784, 3]))

tf.transpose

#tf.transpose
a = tf.random.normal((4, 3, 2, 1))
tf.transpose(a)
tf.transpose(a, perm=[0, 1, 3, 2])
#->[b, 3, h, w]
a = tf.random.normal([4, 28, 28, 3])
a = tf.transpose(a, [0, 2, 1, 3])
a = tf.transpose(a, [0, 3, 2, 1])
a = tf.transpose(a, [0, 3, 1, 2])

Squeeze VS Expand_dims

Expand dim

a: [classes, students, classes]
[4, 35, 8]
add school dim
[1, 4, 35, 8] + [1, 4, 35, 8]
[2, 4, 35, 8]

#Expand dim
a = tf.random.normal([4, 35, 8])
tf.expand_dims(a, axis=0)
tf.expand_dims(a, axis=3)
tf.expand_dims(a, axis=-1)
tf.expand_dims(a, axis=-4)

Squeeze dim

Only squeeze for shape=1 dim
[4, 35, 8, 1]
[1, 4, 35, 8]
[1, 4, 35, 1]

#tf.squeeze
tf.squeeze(tf.zeros([1, 2, 1, 1, 3]))
a = tf.zeros([1, 2, 1, 3])
tf.squeeze(a, axis=0)
tf.squeeze(a, axis=2)
tf.squeeze(a, axis=-2)
tf.squeeze(a, axis=-4)

Broadcasting

expand
without copying data
- VS tf.tile
tf.broadcast_to

#Broadcasting
x = tf.random.normal([4, 32, 32, 3])
(x + tf.random.normal([3]))
(x + tf.random.normal([32, 32, 1]))
(x + tf.random.normal([4, 1, 1, 1]))
(x + tf.random.normal([1, 4, 1, 1])) #error

#tf.broadcast_to
x.shape
(x + tf.random.normal([4, 1, 1, 1]))
b = tf.broadcast_to(tf.random.normal([4, 1, 1, 1]), [4, 32, 32, 3])

#Broadcast VS Tile
a = tf.ones([3, 4])
a1 = tf.broadcast_to(a, [2, 3, 4])
a2 = tf.expand_dims(a, axis=0)
a2 = tf.tile(a2, [2, 1, 1])

数学运算

Outline

+ - * /
**, pow, square
sqrt
//, %
exp, log
@, matmul
linear layer

Operation type

element-wise
+ - * /
matrix-wise
@, matmul
dim-wise
reduce_mean/max/min/sum

前向传播

$out = relu\{relu\{relu[ X@W_1 + b_1]@W_2 + b_2\} @W_3 + b_3\}$
$p r e d = a r g m a x (o u t)$
$l o s s = M S E (o u t, l a b e l)$
minimize ????
$W_1^{'}, b_1^{'}, W_2^{'}, b_2^{'}, W_3^{'}, b_3^{'}]$

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# x: [60k, 28, 28],
# y: [60k]
(x, y), _ = datasets.mnist.load_data()
# x: [0~255] => [0~1.]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)

print(x.shape, y.shape, x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))


train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)


# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

lr = 1e-3

for epoch in range(10): # iterate db for 10
    for step, (x, y) in enumerate(train_db): # for every batch
        # x:[128, 28, 28]
        # y: [128]

        # [b, 28, 28] => [b, 28*28]
        x = tf.reshape(x, [-1, 28*28])

        with tf.GradientTape() as tape: # tf.Variable
            # x: [b, 28*28]
            # h1 = x@w1 + b1
            # [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
            h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
            h1 = tf.nn.relu(h1)
            # [b, 256] => [b, 128]
            h2 = h1@w2 + b2
            h2 = tf.nn.relu(h2)
            # [b, 128] => [b, 10]
            out = h2@w3 + b3

            # compute loss
            # out: [b, 10]
            # y: [b] => [b, 10]
            y_ = tf.one_hot(y, depth=10)

            # mse = mean(sum(y-out)^2)
            # [b, 10]
            loss = tf.square(y_ - out)
            # mean: scalar
            loss = tf.reduce_mean(loss)

        # compute gradients
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        # print(grads)
        # w1 = w1 - lr * w1_grad
        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])

        if step % 100 == 0:
            print(epoch, step, 'loss:', float(loss))