四、TF2.0中张量的数学运算

最新推荐文章于 2023-01-15 09:46:53 发布

置顶 feifeiyechuan

最新推荐文章于 2023-01-15 09:46:53 发布

阅读量561

点赞数

分类专栏： # 【算法】TF2.0 文章标签： TF2运算

本文链接：https://blog.csdn.net/feifeiyechuan/article/details/110774176

版权

【算法】TF2.0 专栏收录该内容

11 篇文章 1 订阅

订阅专栏

TF2.0中的运算

一、标量运算

a = tf.constant([
    [1.0, 2], [-3, 4.0]
])
b = tf.constant([
    [5.0, 6], [7.0, 8.0]
])

print(a + b)
	tf.Tensor(
	[[ 6.  8.]
	 [ 4. 12.]], shape=(2, 2), dtype=float32)
	 
print(a - b)
	tf.Tensor(
	[[ -4.  -4.]
	 [-10.  -4.]], shape=(2, 2), dtype=float32)
	 
print(a * b)
	tf.Tensor(
	[[  5.  12.]
	 [-21.  32.]], shape=(2, 2), dtype=float32)
	 
print(a / b)
	tf.Tensor(
	[[ 0.2         0.33333334]
	 [-0.42857143  0.5       ]], shape=(2, 2), dtype=float32)

print(a ** 0.5)
	 tf.Tensor(
	[[1.        1.4142135]
	 [      nan 2.       ]], shape=(2, 2), dtype=float32)
	 
print(a % 3)
	tf.Tensor(
	[[ 1.  2.]
	 [-0.  1.]], shape=(2, 2), dtype=float32)
	 
print(a // 3)
	tf.Tensor(
	[[ 0.  0.]
	 [-1.  1.]], shape=(2, 2), dtype=float32)
	 
print(a >= 2)
	tf.Tensor(
	[[False  True]
	 [False  True]], shape=(2, 2), dtype=bool)
	 
print((a>=2)&(a<=3))
	tf.Tensor(
	[[False  True]
	 [False False]], shape=(2, 2), dtype=bool)
	 
print((a>=2)|(a<=3))
	tf.Tensor(
	[[ True  True]
	 [ True  True]], shape=(2, 2), dtype=bool)
	 
print(a==5)
	tf.Tensor(
	[[False False]
	 [False False]], shape=(2, 2), dtype=bool)
	 
print(tf.sqrt(a))
	tf.Tensor(
	[[1.        1.4142135]
	 [      nan 2.       ]], shape=(2, 2), dtype=float32)
	 
print(tf.maximum(a, b))
	tf.Tensor(
	[[5. 6.]
	 [7. 8.]], shape=(2, 2), dtype=float32)
	 
print(tf.minimum(a, b))
	tf.Tensor(
	[[ 1.  2.]
	 [-3.  4.]], shape=(2, 2), dtype=float32)
	 
print(tf.constant([2.6, -2.7]))
	tf.Tensor([ 2.6 -2.7], shape=(2,), dtype=float32)



a = tf.constant([
    [0.5, 0.6], [-1.2, -6]
])

print(tf.math.round(a))
	tf.Tensor(
	[[ 0.  1.]
	 [-1. -6.]], shape=(2, 2), dtype=float32)
	 
print(tf.math.floor(a))
	tf.Tensor(
	[[ 0.  0.]
	 [-2. -6.]], shape=(2, 2), dtype=float32)
	 
print(tf.math.ceil(a))
tf.Tensor(
	[[ 1.  1.]
	 [-1. -6.]], shape=(2, 2), dtype=float32)

import tensorflow as tf

x = tf.constant([0.9,-0.8,100.0,-20.0,0.7])
y = tf.clip_by_value(x,clip_value_min=-1,clip_value_max=1)
z = tf.clip_by_norm(x,clip_norm = 3)

tf.print(y)
tf.print(z)

>> [0.9 -0.8 1 -1 0.7]
>> [0.0264732055 -0.0235317405 2.94146752 -0.588293493 0.0205902718]

解释：
	1）tf.clip_by_value 其实很好理解，将最大值赋值为clip_value_max, 最小值赋值为clip_value_min.
	2) tf.clip_by_norm 是一种防止梯度爆炸的方式，也是一种比较常用的梯度规约的方式。
	源码：
	def clip_by_norm(t, clip_norm, axes=None, name=None):
		with ops.name_scope(name, "clip_by_norm", [t, clip_norm]) as name:
    	t = ops.convert_to_tensor(t, name="t")

    	# Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
    	l2norm_inv = math_ops.rsqrt(
        math_ops.reduce_sum(t * t, axes, keep_dims=True))
    	tclip = array_ops.identity(t * clip_norm * math_ops.minimum(
        l2norm_inv, constant_op.constant(1.0, dtype=t.dtype) / clip_norm),
                               name=name)

  		return tclip
 
 原理解释：
  	通过上述表达可以很清楚的明白：其作用是将传入的梯度张量 t 的L2范数进行了上限约束，约束值即为clip_norm,如果 t 的L2范数超过了
  	clip_norm， 则变换为t * clip_norm / l2norm(t), 这样，变换后的 t 的 L2 范数就小于等于 clip_norm 了。

 reference: https://blog.csdn.net/linuxwindowsios/article/details/67635867

二、向量运算

1) 一维度
import tensorflow as tf
a = tf.range(1,10)		
tf.print(tf.convert_to_tensor(a))	# [1 2 3 ... 7 8 9]   转为tensor
tf.print(tf.reduce_sum(a))			# 45	求和
tf.print(tf.reduce_mean(a))			# 5		求平均数
tf.print(tf.reduce_max(a))			# 9		求最大值
tf.print(tf.reduce_min(a))			# 1		求最小值
tf.print(tf.reduce_prod(a))			# 362880	返回各维度乘积，一般用于在计算神经网络中的参数数量中使用。
									# reference: https://blog.csdn.net/qq_29566629/article/details/90178503

p = tf.constant([True,False,False])	
q = tf.constant([False,False,True])	
print(tf.reduce_all(p))				# tf.Tensor(False, shape=(), dtype=bool)
tf.print(tf.reduce_all(p))			# 0		且的意思
tf.print(tf.reduce_any(q))			# 1		或的意思
小结：tf.print()输出内容会将False，转换为0，将True转为1.

s = tf.foldl(lambda a,b:a+b,initializer = tf.range(10))
s = tf.foldr(lambda a,b:a+b,initializer = tf.range(10))
tf.print(s)							# 45  这个结果类似reduce_sum
解释：tf.foldl(
    fn,
    elems,
    initializer=None,
    parallel_iterations=10,
    back_prop=True,
    swap_memory=False,
    name=None
)
其实这是一个累计的过程，fn 有两个参数，即 fn(x1, x2): return calc(x1, x2),在一个遍历运行initializer 的时候，
x1 = 上次的calc(x1, x2)，x2为新遍历的值，如此：相当于：tf.foldl(fn,elems=[x1,x2,x3,x4]) = fn(fn(fn(x1,x2),x3),x4)
而，tf.foldl 和  tf.foldr的区别是，遍历的方向分别是左到右， 右到左。
reference:  https://blog.csdn.net/loseinvain/article/details/81635711

#cum扫描累积
a = tf.range(1,10)
tf.print(tf.math.cumsum(a))		# [1 3 6 ... 28 36 45]	累加
tf.print(tf.math.cumprod(a))	# [1 2 6 ... 5040 40320 362880] 累乘
解释：1- cumsum(
    x,
    axis=0,
    exclusive=False,
    reverse=False,
    name=None
)
tf.cumsum([a, b, c]) ==> [a, a + b, a + b + c]
tf.cumsum([a, b, c], exclusive=True) ==> [0, a, a + b]
tf.cumsum([a, b, c], reverse=True) ==> [a + b + c, b + c, c]
tf.cumsum([a, b, c], exclusive=True, reverse=True) ==> [b + c, c, 0]
2- cumprod(
    x,
    axis=0,
    exclusive=False,
    reverse=False,
    name=None
)
tf.cumprod([a, b, c]) ==> [a, a * b, a * b * c]
tf.cumprod([a, b, c], exclusive=True) ==> [1, a, a * b]
tf.cumprod([a, b, c], reverse=True) ==> [a * b * c, b * c, c]
tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 1]

#arg最大最小值索引
a = tf.range(1,10)
tf.print(tf.argmax(a))		# 8		获取最大值索引下标值
tf.print(tf.argmin(a))		# 0		获取最小值索引下标值

#tf.math.top_k可以用于对张量排序，并截取前多少个最大值，并且返回原下标
values,indices = tf.math.top_k(a,3,sorted=True)
tf.print(values)	# [8 7 5] 返回top3
tf.print(indices)	# [5 2 3] 返回top3的原下标

2）多维
import tensorflow as tf
a = tf.range(1,10)
b = tf.reshape(a,(3,3))
tf.print(b)							# [[1 2 3] [4 5 6]  [7 8 9]]
tf.print(tf.reduce_sum(b, axis=1, keepdims=True))	# [[6] [15] [24]]
tf.print(tf.reduce_sum(b, axis=0, keepdims=True))	# [[12 15 18]]

三、矩阵运算

矩阵必须是二维的。类似tf.constant([1,2,3])这样的不是矩阵。
矩阵运算包括：矩阵乘法，矩阵转置，矩阵逆，矩阵求迹，矩阵范数，矩阵行列式，矩阵求特征值，矩阵分解等运算。
除了一些常用的运算外，大部分和矩阵有关的运算都在tf.linalg子包中。

#矩阵乘法
a = tf.constant([[1,2],[3,4]])
b = tf.constant([[2,0],[0,2]])
tf.print(tf.matmul(a,b))	
tf.print(a@b)   		# 两种方式均可
>> [[2 4]
 	[6 8]]

#矩阵转置
a = tf.constant([[1,2],[3,4]])
tf.print(tf.transpose(a))
>> [[1 3]
 	[2 4]]

#矩阵逆，必须为tf.float32或tf.double类型
a = tf.constant([[1.0,2],[3,4]],dtype = tf.float32)
tf.print(tf.linalg.inv(a))
>> [[-2.0000002 ,  1.0000001 ],
   [ 1.5000001 , -0.50000006]]


# 矩阵主对角线元素之和,即求trace。
a = tf.constant([
    [1, 2],
    [3, 4]
], dtype=tf.float32)
tf.print(tf.linalg.trace(a))
>> 5


#矩阵求范数
a = tf.constant([[1.0,2],[3,4]])
print(tf.linalg.norm(a))
>> 5.477226
解释：
	x_norm=np.linalg.norm(x, ord=None, axis=None, keepdims=False)
	①x: 表示矩阵（也可以是一维）
	②ord：范数类型
reference: https://blog.csdn.net/qq_36387683/article/details/81126848

在这里插入图片描述

#矩阵行列式
a = tf.constant([[1.0,2],[3,4]])
tf.print(tf.linalg.det(a))
>> -2
解释： 1 * 4 - 2 * 3 = -2

#矩阵特征值
a = tf.constant([[1.0,2],[-5,4]])
print(tf.linalg.eigvals(a))
>> <tf.Tensor: shape=(2,), dtype=complex64, numpy=array([2.4999995+2.7838817j, 2.5      -2.783882j ], dtype=complex64)>


#矩阵QR分解, 将一个方阵分解为一个正交矩阵q和上三角矩阵r
#QR分解实际上是对矩阵a实施Schmidt正交化得到q
a = tf.constant([[1.0,2.0],[3.0,4.0]],dtype = tf.float32)
q,r = tf.linalg.qr(a)
tf.print(q)
tf.print(r)
tf.print(q@r)


#矩阵svd分解
#svd分解可以将任意一个矩阵分解为一个正交矩阵u,一个对角阵s和一个正交矩阵v.t()的乘积
#svd常用于矩阵压缩和降维
a  = tf.constant([[1.0,2.0],[3.0,4.0],[5.0,6.0]], dtype = tf.float32)
s,u,v = tf.linalg.svd(a)

tf.print(u,"\n")
>> [[0.229847744 -0.88346082]
 [0.524744868 -0.240782902]
 [0.819642067 0.401896209]] 
 
tf.print(s,"\n")
>> [9.52551842 0.51429987] 

tf.print(v,"\n")
>> [[0.619629562 0.784894466]
 [0.784894466 -0.619629562]]
 
tf.print(u@tf.linalg.diag(s)@tf.transpose(v))
>> [[1.00000119 2]
 [3.00000095 4.00000048]
 [5.00000143 6.00000095]]
#利用svd分解可以在TensorFlow中实现主成分分析降维

四、广播机制

在这里插入图片描述

两个矩阵的维度相同，有一个矩阵的一个维度的shape为1，另外一个矩阵的该维度的shape不为1，那么该维度就可以实现广播机制。

a = tf.constant([1,2,3])		# 1 * 3  -> 3 * 3
b = tf.constant([[0,0,0],[1,1,1],[2,2,2]])	# 3 * 3
print(b + a)  	# 等价于 b + tf.broadcast_to(a,b.shape)
>> tf.Tensor(
[[1 2 3]
 [2 3 4]
 [3 4 5]], shape=(3, 3), dtype=int32)
解释： tf.broadcast_to 以显式的方式按照广播机制扩展张量的维度。

#计算广播后计算结果的形状，静态形状，TensorShape类型参数
tf.broadcast_static_shape(a.shape,b.shape)
>> TensorShape([3, 3])

#计算广播后计算结果的形状，动态形状，Tensor类型参数
c = tf.constant([1,2,3])
d = tf.constant([[1],[2],[3]])
tf.broadcast_dynamic_shape(tf.shape(c),tf.shape(d))
>> <tf.Tensor: shape=(2,), dtype=int32, numpy=array([3, 3], dtype=int32)>

#广播效果
c+d #等价于 tf.broadcast_to(c,[3,3]) + tf.broadcast_to(d,[3,3])