tensorFlow2.0基本操作(一)

import tensorflow as tf

1 张量排序实例

output = tf.random.normal([10,6])# 随机生成一个正态分布
output
<tf.Tensor: id=5, shape=(10, 6), dtype=float32, numpy=
array([[ 0.76310456, -1.1337202 ,  1.0353428 ,  1.0621719 , -1.3435235 ,
        -0.8340364 ],
       [-0.47014472, -0.2422621 ,  0.2518393 , -0.23825932, -1.0721133 ,
         0.24922352],
       [ 0.44158378,  0.6831124 , -0.54429495,  1.1736444 , -0.26429346,
         0.7027973 ],
       [-0.7662839 ,  1.4855492 ,  0.42412958, -0.29403406,  1.043192  ,
         1.0433921 ],
       [ 0.44711986,  0.7275903 ,  0.31700605,  0.2726328 , -0.16104753,
        -1.1572416 ],
       [-0.90731895, -0.2063934 ,  0.952755  ,  0.6108949 ,  2.9908571 ,
        -1.7417939 ],
       [-0.17370664, -0.03342665,  0.5075081 ,  1.3842217 ,  0.4897528 ,
        -0.37641558],
       [-1.0750929 , -0.03694849,  1.319619  ,  1.5328622 ,  1.5641279 ,
        -0.2661516 ],
       [ 0.4573638 ,  0.8733606 ,  0.0470969 ,  1.0739747 , -0.8262296 ,
        -0.59460646],
       [ 1.420134  ,  1.216806  ,  0.7350984 , -2.035839  ,  0.675249  ,
         0.5640526 ]], dtype=float32)>
output = tf.math.softmax(output, axis = 1)# 使这六类的概率和为1
output
<tf.Tensor: id=7, shape=(10, 6), dtype=float32, numpy=
array([[0.24179898, 0.03628056, 0.31745782, 0.32609025, 0.02941423,
        0.04895815],
       [0.12230478, 0.15360723, 0.25176606, 0.15422331, 0.0669903 ,
        0.25110835],
       [0.15342492, 0.19533965, 0.05724456, 0.31902578, 0.07574209,
        0.199223  ],
       [0.03621496, 0.34422845, 0.11909077, 0.05807425, 0.22117367,
        0.22121793],
       [0.20889905, 0.27653062, 0.18341242, 0.17545176, 0.11371368,
        0.04199244],
       [0.01568617, 0.03161731, 0.10077128, 0.07159271, 0.7735231 ,
        0.00680941],
       [0.08595599, 0.09890062, 0.16987287, 0.4082027 , 0.16688333,
        0.07018443],
       [0.02241746, 0.06330627, 0.24580826, 0.30423334, 0.3138957 ,
        0.05033905],
       [0.17673211, 0.26790482, 0.117257  , 0.3274207 , 0.04896186,
        0.0617235 ],
       [0.30757383, 0.2509835 , 0.15503944, 0.00970598, 0.14603263,
        0.1306646 ]], dtype=float32)>
target = tf.random.uniform([10],maxval = 6,dtype = tf.int32)# 生成一个随机的lable
target
<tf.Tensor: id=12, shape=(10,), dtype=int32, numpy=array([4, 5, 5, 5, 4, 1, 3, 3, 4, 3])>
print('prob:',output.numpy())
prob: [[0.24179898 0.03628056 0.31745782 0.32609025 0.02941423 0.04895815]
 [0.12230478 0.15360723 0.25176606 0.15422331 0.0669903  0.25110835]
 [0.15342492 0.19533965 0.05724456 0.31902578 0.07574209 0.199223  ]
 [0.03621496 0.34422845 0.11909077 0.05807425 0.22117367 0.22121793]
 [0.20889905 0.27653062 0.18341242 0.17545176 0.11371368 0.04199244]
 [0.01568617 0.03161731 0.10077128 0.07159271 0.7735231  0.00680941]
 [0.08595599 0.09890062 0.16987287 0.4082027  0.16688333 0.07018443]
 [0.02241746 0.06330627 0.24580826 0.30423334 0.3138957  0.05033905]
 [0.17673211 0.26790482 0.117257   0.3274207  0.04896186 0.0617235 ]
 [0.30757383 0.2509835  0.15503944 0.00970598 0.14603263 0.1306646 ]]
pred = tf.argmax(output,axis = 1)
print('pred:',pred.numpy())
pred: [3 2 3 1 1 4 3 4 3 0]
print('lable:',target.numpy())
lable: [4 5 5 5 4 1 3 3 4 3]

计算准确度

topk = (1,2,3,4,5,6)
maxk = max(topk)
batch_size = target.shape[0]
print(batch_size)
10
pred = tf.math.top_k(output,maxk).indices
print(pred.numpy())
[[3 2 0 5 1 4]
 [2 5 3 1 0 4]
 [3 5 1 0 4 2]
 [1 5 4 2 3 0]
 [1 0 2 3 4 5]
 [4 2 3 1 0 5]
 [3 2 4 1 0 5]
 [4 3 2 1 5 0]
 [3 1 0 2 5 4]
 [0 1 2 4 5 3]]
pred = tf.transpose(pred,perm = [1,0])
target_ = tf.broadcast_to(target, pred.shape)
print(target_.numpy())
[[4 5 5 5 4 1 3 3 4 3]
 [4 5 5 5 4 1 3 3 4 3]
 [4 5 5 5 4 1 3 3 4 3]
 [4 5 5 5 4 1 3 3 4 3]
 [4 5 5 5 4 1 3 3 4 3]
 [4 5 5 5 4 1 3 3 4 3]]
correct = tf.equal(pred,target_)
print(correct.numpy())
[[False False False False False False  True False False False]
 [False  True  True  True False False False  True False False]
 [False False False False False False False False False False]
 [False False False False False  True False False False False]
 [False False False False  True False False False False False]
 [ True False False False False False False False  True  True]]
res = []
print(correct[:1])
#tf.reshape()    -1所代表的含义是我们不用亲自去指定这一维的大小,
#函数会自动进行计算,但是列表中只能存在一个-1。
#(如果存在多个-1,就是一个存在多解的方程) 
correct_1 = tf.cast(tf.reshape(correct[:1],[-1]),dtype = tf.float32) 
print(correct_1.numpy())
correct_1 = tf.reduce_sum(correct_1)
print(correct_1.numpy())
acc = float(correct_1*(100.0/batch_size))
print(acc)
res.append(acc)
print(res)
tf.Tensor([[False False False False False False  True False False False]], shape=(1, 10), dtype=bool)
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
1.0
10.0
[10.0]
res = []
for k in topk:
    correct_k = tf.cast(tf.reshape(correct[:k],[-1]),dtype = tf.float32) 
    correct_k = tf.reduce_sum(correct_k)
    acc = float(correct_k*(100.0/batch_size))
    res.append(acc)
print(res)
[10.0, 50.0, 50.0, 60.0, 70.0, 100.0]

2 填充与复制

填充:pad
(在图片的填充时是经常使用的)

a = tf.reshape(tf.range(9),[3,3])
b = tf.pad(a,[[0,0],[0,0]])
c = tf.pad(a,[[1,1],[1,1]])
print('a\n',a.numpy())
print('b\n',b.numpy())
print('\nc',c.numpy())
a
 [[0 1 2]
 [3 4 5]
 [6 7 8]]
b
 [[0 1 2]
 [3 4 5]
 [6 7 8]]

c [[0 0 0 0 0]
 [0 0 1 2 0]
 [0 3 4 5 0]
 [0 6 7 8 0]
 [0 0 0 0 0]]
a = tf.random.normal([4,28,28,3])
b = tf.pad(a,[[0,0],[2,2],[2,2],[0,0]])
print(b.shape)
(4, 32, 32, 3)

复制:tile
(broadcast_to也是复制数据,不过不是在真实的内存中复制)

a = [[0,1,2],[3,4,5],[5,3,3]]
b = tf.tile(a,[1,2])
c = tf.tile(a,[2,1])
d = tf.tile(a,[2,2])
print(a)
print(b)
print(c)
print(d)
[[0, 1, 2], [3, 4, 5], [5, 3, 3]]
tf.Tensor(
[[0 1 2 0 1 2]
 [3 4 5 3 4 5]
 [5 3 3 5 3 3]], shape=(3, 6), dtype=int32)
tf.Tensor(
[[0 1 2]
 [3 4 5]
 [5 3 3]
 [0 1 2]
 [3 4 5]
 [5 3 3]], shape=(6, 3), dtype=int32)
tf.Tensor(
[[0 1 2 0 1 2]
 [3 4 5 3 4 5]
 [5 3 3 5 3 3]
 [0 1 2 0 1 2]
 [3 4 5 3 4 5]
 [5 3 3 5 3 3]], shape=(6, 6), dtype=int32)

3 张量的限幅

a = tf.range(10)
b = tf.maximum(a,2)
c = tf.minimum(a,8)
tf.clip_by_value(a,2,8)
print(a)
print(b)
print(c)
tf.Tensor([0 1 2 3 4 5 6 7 8 9], shape=(10,), dtype=int32)
tf.Tensor([2 2 2 3 4 5 6 7 8 9], shape=(10,), dtype=int32)
tf.Tensor([0 1 2 3 4 5 6 7 8 8], shape=(10,), dtype=int32)
a = a-5
d = tf.nn.relu(a)
e = tf.maximum(a,0)
print(a)
print(d)
print(e)
tf.Tensor([-5 -4 -3 -2 -1  0  1  2  3  4], shape=(10,), dtype=int32)
tf.Tensor([0 0 0 0 0 0 1 2 3 4], shape=(10,), dtype=int32)
tf.Tensor([0 0 0 0 0 0 1 2 3 4], shape=(10,), dtype=int32)

等比例缩放,完成向量方向不变

a = tf.random.normal([2,2],mean = 10)
b = tf.norm(a)
c = tf.clip_by_norm(a,15)
d = tf.norm(c)
print(a)
print(b)
print(c)
print(d)
tf.Tensor(
[[ 9.901226 11.381033]
 [10.905721  9.152639]], shape=(2, 2), dtype=float32)
tf.Tensor(20.742897, shape=(), dtype=float32)
tf.Tensor(
[[7.1599636 8.23007  ]
 [7.8863535 6.618632 ]], shape=(2, 2), dtype=float32)
tf.Tensor(14.999999, shape=(), dtype=float32)

Gradient Clipping

 from tensorflow.keras import datasets
(x,y),_ = datasets.mnist.load_data()
x = tf.convert_to_tensor(x,dtype = tf.float32)

4 高阶OP

where(tensor)

import tensorflow as tf
a = tf.random.normal([3,3])
mask = a>0
mask
<tf.Tensor: id=7, shape=(3, 3), dtype=bool, numpy=
array([[False,  True,  True],
       [False, False,  True],
       [ True,  True,  True]])>
tf.boolean_mask(a,mask)
<tf.Tensor: id=35, shape=(6,), dtype=float32, numpy=
array([0.52404463, 0.62450945, 0.6324052 , 0.4014356 , 1.3766853 ,
       0.8314979 ], dtype=float32)>
indices = tf.where(mask)
indices
<tf.Tensor: id=38, shape=(6, 2), dtype=int64, numpy=
array([[0, 1],
       [0, 2],
       [1, 2],
       [2, 0],
       [2, 1],
       [2, 2]], dtype=int64)>
tf.gather_nd(a,indices)
<tf.Tensor: id=40, shape=(6,), dtype=float32, numpy=
array([0.52404463, 0.62450945, 0.6324052 , 0.4014356 , 1.3766853 ,
       0.8314979 ], dtype=float32)>
A = tf.ones([3,3])
B = tf.zeros([3,3])
tf.where(mask,A,B)
<tf.Tensor: id=59, shape=(3, 3), dtype=float32, numpy=
array([[0., 1., 1.],
       [0., 0., 1.],
       [1., 1., 1.]], dtype=float32)>

scatter_nd

indices = tf.constant([[4],[3],[1],[7]])
indices
<tf.Tensor: id=62, shape=(4, 1), dtype=int32, numpy=
array([[4],
       [3],
       [1],
       [7]])>
updates = tf.constant([9,10,11,12])
updates
<tf.Tensor: id=64, shape=(4,), dtype=int32, numpy=array([ 9, 10, 11, 12])>
shape = tf.constant([8])
print(shape.numpy())
[8]
tf.scatter_nd(indices,updates,shape)
<tf.Tensor: id=68, shape=(8,), dtype=int32, numpy=array([ 0, 11,  0, 10,  9,  0,  0, 12])>
indices = tf.constant([[0],[2]])
indices
<tf.Tensor: id=71, shape=(2, 1), dtype=int32, numpy=
array([[0],
       [2]])>
updates = tf.constant([[[5,5,5,5],[6,6,6,6],[7,7,7,7],[8,8,8,8]],[[5,5,5,5],[6,6,6,6],[7,7,7,7],[8,8,8,8]]])
updates
<tf.Tensor: id=73, shape=(2, 4, 4), dtype=int32, numpy=
array([[[5, 5, 5, 5],
        [6, 6, 6, 6],
        [7, 7, 7, 7],
        [8, 8, 8, 8]],

       [[5, 5, 5, 5],
        [6, 6, 6, 6],
        [7, 7, 7, 7],
        [8, 8, 8, 8]]])>
updates.shape
TensorShape([2, 4, 4])
shape = tf.constant([4,4,4])
print(shape.numpy())
[4 4 4]
tf.scatter_nd(indices,updates,shape)
<tf.Tensor: id=78, shape=(4, 4, 4), dtype=int32, numpy=
array([[[5, 5, 5, 5],
        [6, 6, 6, 6],
        [7, 7, 7, 7],
        [8, 8, 8, 8]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[5, 5, 5, 5],
        [6, 6, 6, 6],
        [7, 7, 7, 7],
        [8, 8, 8, 8]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]]])>

meshgird画三位坐标轴

y = tf.linspace(-2.,2,5)
y
<tf.Tensor: id=83, shape=(5,), dtype=float32, numpy=array([-2., -1.,  0.,  1.,  2.], dtype=float32)>
x = tf.linspace(-2.,2,5)
x
<tf.Tensor: id=93, shape=(5,), dtype=float32, numpy=array([-2., -1.,  0.,  1.,  2.], dtype=float32)>
points_x,points_y = tf.meshgrid(x,y)
points_x.shape
TensorShape([5, 5])
points_x
<tf.Tensor: id=115, shape=(5, 5), dtype=float32, numpy=
array([[-2., -1.,  0.,  1.,  2.],
       [-2., -1.,  0.,  1.,  2.],
       [-2., -1.,  0.,  1.,  2.],
       [-2., -1.,  0.,  1.,  2.],
       [-2., -1.,  0.,  1.,  2.]], dtype=float32)>
points_y
<tf.Tensor: id=116, shape=(5, 5), dtype=float32, numpy=
array([[-2., -2., -2., -2., -2.],
       [-1., -1., -1., -1., -1.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.,  2.]], dtype=float32)>
points = tf.stack([points_x,points_y],axis = 2)
points
<tf.Tensor: id=119, shape=(5, 5, 2), dtype=float32, numpy=
array([[[-2., -2.],
        [-1., -2.],
        [ 0., -2.],
        [ 1., -2.],
        [ 2., -2.]],

       [[-2., -1.],
        [-1., -1.],
        [ 0., -1.],
        [ 1., -1.],
        [ 2., -1.]],

       [[-2.,  0.],
        [-1.,  0.],
        [ 0.,  0.],
        [ 1.,  0.],
        [ 2.,  0.]],

       [[-2.,  1.],
        [-1.,  1.],
        [ 0.,  1.],
        [ 1.,  1.],
        [ 2.,  1.]],

       [[-2.,  2.],
        [-1.,  2.],
        [ 0.,  2.],
        [ 1.,  2.],
        [ 2.,  2.]]], dtype=float32)>

5 数据加载

keras.datasets:

boston housing

mnist/fasion mnist

cifar10/100

imdb

1)载入cifar100

import tensorflow as tf 
from tensorflow import keras
(x, y), (x_test, y_test) = keras.datasets.mnist.load_data()
x.shape
(60000, 28, 28)
y.shape
(60000,)
x.min(),x.max(),x.mean()
(0, 255, 33.318421449829934)
x_test.shape,y_test.shape
((10000, 28, 28), (10000,))
y[:4]
array([5, 0, 4, 1], dtype=uint8)
y_onehot = tf.one_hot(y,depth = 10)
y_onehot[:2]
<tf.Tensor: id=8, shape=(2, 10), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>

2)载入cifar10/100

(x,y) = (x_test,y_test) = keras.datasets.cifar10.load_data()
#此处本人暂停了,因为下载速度实在是太慢。其中遇到个小问题,网址不对,后来进入python文件更新了网址。
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
  4890624/170498071 [..............................] - ETA: 9:40:41
import tensorflow as tf 
from tensorflow import keras
(x,y),(x_test,y_test) = keras.datasets.cifar10.load_data()       
#因为本人直接在网上下载好了数据集,并且直接拷贝在了目标文件夹‘C:\Users\wanfuchun\.keras\datasets’,因此此处加载特别快
x.shape,y.shape,x_test.shape,y_test.shape
((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))
x.min(),x.max()
(0, 255)
y[:4]
array([[6],
       [9],
       [9],
       [4]], dtype=uint8)
db = tf.data.Dataset.from_tensor_slices(x_test)
next(iter(db)).shape
TensorShape([32, 32, 3])
db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
next(iter(db))[0].shape
TensorShape([32, 32, 3])
next(iter(db))[1].shape
TensorShape([1])

3)Dataset api的shuffle打散功能

db = db.shuffle(10000)

4).map 数据预处理功能

def preprocess(x,y):
    x = tf.cast(x,dtype=tf.float32)/255.
    y = tf.cast(y,dtype=tf.int32)
    y = tf.one_hot(y,depth = 10)
    return x,y
db2 = db.map(preprocess)
res = next(iter(db2))
res[0].shape,res[1].shape
(TensorShape([32, 32, 3]), TensorShape([1, 10]))
res[1][:2] 
<tf.Tensor: id=221, shape=(1, 10), dtype=float32, numpy=array([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)>
db3 = db2.batch(32)
res = next(iter(db3))
res[0].shape,res[1].shape
(TensorShape([32, 32, 32, 3]), TensorShape([32, 1, 10]))

6 误差计算

1)熵的概念

import tensorflow as tf
a = tf.fill([4],0.25)
a * tf.math.log(a)/tf.math.log(2.)
<tf.Tensor: id=7, shape=(4,), dtype=float32, numpy=array([-0.5, -0.5, -0.5, -0.5], dtype=float32)>
-tf.reduce_sum(a*tf.math.log(a)/tf.math.log(2.))
<tf.Tensor: id=16, shape=(), dtype=float32, numpy=2.0>

由此可见,当概率均分时,此时的惊喜最小,信息最少,熵最大

a = tf.constant([0.01,0.01,0.01,0.97])
a * tf.math.log(a)/tf.math.log(2.)
<tf.Tensor: id=23, shape=(4,), dtype=float32, numpy=array([-0.06643856, -0.06643856, -0.06643856, -0.04262501], dtype=float32)>
-tf.reduce_sum(a*tf.math.log(a)/tf.math.log(2.))
<tf.Tensor: id=32, shape=(), dtype=float32, numpy=0.24194068>

此时惊喜最大,信息越有用,熵越小。

2)交叉熵的概念

tf.losses.categorical_crossentropy([0,1,0,0],[0.25,0.25,0.25,0.25])
<tf.Tensor: id=51, shape=(), dtype=float32, numpy=1.3862944>
tf.losses.categorical_crossentropy([0,1,0,0],[0.1,0.1,0.8,0.1])
<tf.Tensor: id=70, shape=(), dtype=float32, numpy=2.3978953>
tf.losses.categorical_crossentropy([0,1,0,0],[0.01,0.97,0.01,0.01])
<tf.Tensor: id=89, shape=(), dtype=float32, numpy=0.030459179>
tf.losses.BinaryCrossentropy()([1],[0.1])
<tf.Tensor: id=128, shape=(), dtype=float32, numpy=2.3025842>
tf.losses.binary_crossentropy([1],[0.1])
<tf.Tensor: id=155, shape=(), dtype=float32, numpy=2.3025842>

一般损失函数使用交叉熵函数,而不是MSE,因为对于sigmoid激活函数来讲,如果使用MSE函数作为损失函数,会出现梯度消失的现象。但是这仅仅是从工程领域来考虑。

7 梯度下降

一阶梯度

import tensorflow as tf
w = tf.constant(1.)
x = tf.constant(2.)
y = x*w
with tf.GradientTape() as tape:
    tape.watch([w])
    y = x*w
grad1 = tape.gradient(y,[w])
grad1
[<tf.Tensor: id=6, shape=(), dtype=float32, numpy=2.0>]
grad1 = tape.gradient(y,[w])
---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

<ipython-input-4-b4d14a0cffac> in <module>
----> 1 grad1 = tape.gradient(y,[w])


F:\Anaconda3\envs\gpu\lib\site-packages\tensorflow\python\eager\backprop.py in gradient(self, target, sources, output_gradients, unconnected_gradients)
    918     """
    919     if self._tape is None:
--> 920       raise RuntimeError("GradientTape.gradient can only be called once on "
    921                          "non-persistent tapes.")
    922     if self._recording:


RuntimeError: GradientTape.gradient can only be called once on non-persistent tapes.
with tf.GradientTape(persistent = True) as tape:
    tape.watch([w])
    y = x*w
grad1 = tape.gradient(y,[w])
grad1
[<tf.Tensor: id=11, shape=(), dtype=float32, numpy=2.0>]
grad1 = tape.gradient(y,[w])

二阶梯度

import tensorflow as tf

略,嵌套两层即可

8 激活函数及其梯度

sigmoid

import tensorflow as tf
a = tf.linspace(-10.,10.,10)
with tf.GradientTape() as tape:
    tape.watch(a)
    y = tf.sigmoid(a)
grads = tape.gradient(y,[a])
print(a)
print(y)
print(grads)
tf.Tensor(
[-10.         -7.7777777  -5.5555553  -3.333333   -1.1111107   1.1111116
   3.333334    5.5555563   7.7777786  10.       ], shape=(10,), dtype=float32)
tf.Tensor(
[4.5388937e-05 4.1878223e-04 3.8510561e-03 3.4445226e-02 2.4766389e-01
 7.5233626e-01 9.6555483e-01 9.9614894e-01 9.9958128e-01 9.9995458e-01], shape=(10,), dtype=float32)
[<tf.Tensor: id=12, shape=(10,), dtype=float32, numpy=
array([4.5386874e-05, 4.1860685e-04, 3.8362255e-03, 3.3258751e-02,
       1.8632649e-01, 1.8632641e-01, 3.3258699e-02, 3.8362255e-03,
       4.1854731e-04, 4.5416677e-05], dtype=float32)>]

Tanh(在RNN循环神经网络中使用的较多)

a = tf.linspace(-10.,10.,10)
y = tf.tanh(a)
print(a)
print(y)
tf.Tensor(
[-10.         -7.7777777  -5.5555553  -3.333333   -1.1111107   1.1111116
   3.333334    5.5555563   7.7777786  10.       ], shape=(10,), dtype=float32)
tf.Tensor(
[-1.         -0.99999964 -0.99997014 -0.997458   -0.8044547   0.804455
  0.997458    0.99997014  0.99999964  1.        ], shape=(10,), dtype=float32)

Rectified Linear Unit

a = tf.linspace(-1.,1.,10)
tf.nn.relu(a).numpy()
array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.11111116, 0.33333337, 0.5555556 , 0.7777778 , 1.        ],
      dtype=float32)
tf.nn.leaky_relu(a).numpy()
array([-0.2       , -0.15555556, -0.11111112, -0.06666666, -0.02222222,
        0.11111116,  0.33333337,  0.5555556 ,  0.7777778 ,  1.        ],
      dtype=float32)

9 损失函数及其梯度

Mean Squared error(MSE)

x = tf.random.normal([3,4])
w = tf.random.normal([4,3])
b = tf.random.normal([3])
y = tf.constant([2,0,1])
with tf.GradientTape() as tape:
    tape.watch([w,b])                    #此处必须要观测,否则就要在上方添加 w = tf.Variable(w)这样的语句,这是规范,必须人为添加观测
    prob = tf.nn.softmax(x @ w + b, axis = 1)      #probality 概率
    print(prob.numpy())
    loss = tf.reduce_mean(tf.losses.MSE(tf.one_hot(y,depth = 3),prob)) #先把求得的softmax预测概率变为onehot,然后再求其与标签的每一项的均方误差,然后再求平均
grads = tape.gradient(loss,[w,b])
print('grads[0]:\n',grads[0].numpy())
print('grads[1]:\n',grads[1].numpy())
[[0.6746528  0.0133454  0.31200182]
 [0.74632466 0.15744632 0.09622896]
 [0.5631675  0.4008012  0.0360313 ]]
grads[0]:
 [[ 0.0165339  -0.03772166  0.02118775]
 [ 0.03422298 -0.08465307  0.05043009]
 [ 0.01001721 -0.0136021   0.00358488]
 [ 0.05285116  0.02282622 -0.07567739]]
grads[1]:
 [ 0.10942388 -0.05007509 -0.05934878]

Cross Entropy Loss:(binary,multi-class,+softmax,leave it to losgistic regression part)

softmax(soft version of max)使强者俞强,弱者越弱,类似与金字塔效应,高度比别人高一点点,得到的收入会比别人高好多倍

Crossentropy gradient

x = tf.random.normal([2,4])
w = tf.random.normal([4,3])
b = tf.zeros([3])
y = tf.constant([2,0])
with tf.GradientTape()as tape:
    tape.watch([w,b])
    logits = x@w+b
    #将logits输入激活函数会输出概率的形式probability,当和one_hot放在一起的时候,会涉及到数据稳定性的问题,
    #在这里集成在categorical_crossentropy一起做了,不过要输入from_logits = True
    loss = tf.reduce_mean(tf.losses.categorical_crossentropy(tf.one_hot(y,depth = 3),logits, from_logits = True))
grads = tape.gradient(loss,[w,b])

grads
[<tf.Tensor: id=579, shape=(4, 3), dtype=float32, numpy=
 array([[ 0.59179735, -0.6549072 ,  0.06310985],
        [-0.51698416,  0.70245177, -0.18546759],
        [ 0.38274866, -0.50988907,  0.12714042],
        [ 0.3166223 ,  0.03204821, -0.3486705 ]], dtype=float32)>,
 <tf.Tensor: id=577, shape=(3,), dtype=float32, numpy=array([-0.31501144,  0.7013504 , -0.38633895], dtype=float32)>]
grads[1]
<tf.Tensor: id=577, shape=(3,), dtype=float32, numpy=array([-0.31501144,  0.7013504 , -0.38633895], dtype=float32)>
grads[0]
<tf.Tensor: id=579, shape=(4, 3), dtype=float32, numpy=
array([[ 0.59179735, -0.6549072 ,  0.06310985],
       [-0.51698416,  0.70245177, -0.18546759],
       [ 0.38274866, -0.50988907,  0.12714042],
       [ 0.3166223 ,  0.03204821, -0.3486705 ]], dtype=float32)>

10 单输出感知机梯度

x = tf.random.normal([1,3])
w = tf.ones([3,1])
b = tf.ones([1])
y = tf.constant([1])

with tf.GradientTape() as tape:
    tape.watch([w,b])
    prob = tf.sigmoid(x@w+b)
    loss = tf.reduce_mean(tf.losses.MSE(y,prob))

grads = tape.gradient(loss,[w,b])
grads[0]
<tf.Tensor: id=869, shape=(3, 1), dtype=float32, numpy=
array([[ 0.41205588],
       [-0.10961942],
       [ 0.04508047]], dtype=float32)>
grads[1]
<tf.Tensor: id=867, shape=(1,), dtype=float32, numpy=array([-0.27709824], dtype=float32)>

11 多输出感知机梯度

x = tf.random.normal([2,4])
w = tf.ones([4,3])
b = tf.ones([3])
y = tf.constant([2,0])

with tf.GradientTape() as tape:
    tape.watch([w,b])
    prob = tf.nn.softmax(x@w+b,axis=1)        #axis等于1意味着:softmax之后是形状为[b,3]的输出,我们希望axis = 1这一维度上代表的是概率。
    loss = tf.reduce_mean(tf.losses.MSE(tf.one_hot(y,depth = 3),prob))

grads = tape.gradient(loss,[w,b])
grads
[<tf.Tensor: id=957, shape=(4, 3), dtype=float32, numpy=
 array([[-0.033397  ,  0.05529919, -0.02190218],
        [-0.02506173, -0.00697992,  0.03204165],
        [ 0.00255801,  0.04580436, -0.04836237],
        [-0.04055597,  0.01948298,  0.021073  ]], dtype=float32)>,
 <tf.Tensor: id=955, shape=(3,), dtype=float32, numpy=array([-0.03703704,  0.07407407, -0.03703704], dtype=float32)>]

12 链式法则

x = tf.constant(1.)
w1 = tf.constant(2.)
b1 = tf.constant(1.)
w2 = tf.constant(2.)
b2 = tf.constant(1.)
with tf.GradientTape(persistent = True) as tape:
    tape.watch([w1,b1,w2,b2])
    y1 = x * w1 + b1
    y2 = y1 * w2 + b2
dy2_dy1 = tape.gradient(y2,[y1])
dy1_dw1 = tape.gradient(y1,[w1])
dy2_dw1 = tape.gradient(y2,[w1])
print(dy2_dy1.numpy())
print(dy1_dw1.numpy())
print(dy2_dw1.numpy())
2.0
1.0
2.0

13 多层感知机梯度

公式推导过程省略。从后向前,一层一层的计算,并没有我们想象中的复杂,激活函数就那么几种,因此是可以编写程序解决的,唯一的难点可能是矩阵的运算以及算法的优化,不过这些都是搞底层研发的人搞得,我们只需要明白其原理并会加以使用就好。

14 函数优化实战(Himmelblau函数优化)

import  numpy as np
from    mpl_toolkits.mplot3d import Axes3D
from    matplotlib import pyplot as plt
import  tensorflow as tf

def himmelblau(x):
    return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2


x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x,y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y)
print('X,Y maps:', X.shape, Y.shape)
Z = himmelblau([X, Y])

fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z)
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()
x,y range: (120,) (120,)
X,Y maps: (120, 120) (120, 120)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-w0yl8RE5-1570938316958)(output_171_1.png)]

import  numpy as np
from    mpl_toolkits.mplot3d import Axes3D
from    matplotlib import pyplot as plt
import  tensorflow as tf



def himmelblau(x):
    return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2


x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x,y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y)
print('X,Y maps:', X.shape, Y.shape)
Z = himmelblau([X, Y])

fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z)
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()


# [1., 0.], [-4, 0.], [4, 0.]
x = tf.constant([-4., 0.])                  #此处可以发现,初始化的值不同,会影响最终的结果

for step in range(200):

    with tf.GradientTape() as tape:
        tape.watch([x])
        y = himmelblau(x)

    grads = tape.gradient(y, [x])[0] 
    x -= 0.01*grads

    

    if step % 20 == 0:
        print ('step {}: x = {}, f(x) = {}'
               .format(step, x.numpy(), y.numpy()))       #格式化
x,y range: (120,) (120,)
X,Y maps: (120, 120) (120, 120)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-jdcu7mRU-1570938316959)(output_172_1.png)]

step 0: x = [-2.98       -0.09999999], f(x) = 146.0
step 20: x = [-3.6890156 -3.1276684], f(x) = 6.054738998413086
step 40: x = [-3.7793102 -3.283186 ], f(x) = 0.0
step 60: x = [-3.7793102 -3.283186 ], f(x) = 0.0
step 80: x = [-3.7793102 -3.283186 ], f(x) = 0.0
step 100: x = [-3.7793102 -3.283186 ], f(x) = 0.0
step 120: x = [-3.7793102 -3.283186 ], f(x) = 0.0
step 140: x = [-3.7793102 -3.283186 ], f(x) = 0.0
step 160: x = [-3.7793102 -3.283186 ], f(x) = 0.0
step 180: x = [-3.7793102 -3.283186 ], f(x) = 0.0
  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值