【深度学习学习笔记】3.SoftmaxRegression：实现softmax的交叉熵损失函数

最新推荐文章于 2024-05-27 23:52:35 发布

cazenove

最新推荐文章于 2024-05-27 23:52:35 发布

阅读量1.1k

点赞数 2

本文链接：https://blog.csdn.net/cazenove/article/details/107674730

版权

SoftmaxRegression可以说是LogisticRegression的扩展情况，可以实现多分类。

Softmax函数生成每个类别的后验概率，取最大的作为最终类别。

屏蔽tensorflow的警告

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
import matplotlib.pyplot as plt

from matplotlib import animation, rc
from IPython.display import HTML
import matplotlib.cm as cm
import numpy as np

tf.enable_eager_execution()

'''生成数据集'''
dot_num = 100
# 服从N(3, 6, 1, 1, 0).的高斯分布中采样
x_p = np.random.normal(3., 1, dot_num)
y_p = np.random.normal(6., 1, dot_num)
y = np.ones(dot_num)
C1 = np.array([x_p, y_p, y]).T
# 服从N(6, 3, 1, 1, 0).的高斯分布中采样
x_n = np.random.normal(6., 1, dot_num)
y_n = np.random.normal(3., 1, dot_num)
y = np.zeros(dot_num)
C2 = np.array([x_n, y_n, y]).T
# 服从N(7, 7, 1, 1, 0).的高斯分布中采样
x_b = np.random.normal(7., 1, dot_num)
y_b = np.random.normal(7., 1, dot_num)
y = np.ones(dot_num)*2
C3 = np.array([x_b, y_b, y]).T

plt.scatter(C1[:, 0], C1[:, 1], c='b', marker='+')
plt.scatter(C2[:, 0], C2[:, 1], c='g', marker='o')
plt.scatter(C3[:, 0], C3[:, 1], c='r', marker='*')

data_set = np.concatenate((C1, C2, C3), axis=0)
np.random.shuffle(data_set)

定义SoftmaxRegression

'''建立模型'''
epsilon = 1e-12

class SoftmaxRegression():
    def __init__(self):
        '''==============构建模型所需的参数==============='''
        self.W = tf.Variable(initial_value=tf.random.uniform(shape=[2,3], minval=-0.1, maxval=0.1),
                             dtype=tf.float32)
        self.b = tf.Variable(initial_value=tf.zeros(shape=[1]), dtype=tf.float32)

        self.trainable_variables = [self.W, self.b]


    def __call__(self, inp):
        inp = np.array(inp, dtype='float32')

        logits = tf.matmul(inp, self.W) + self.b  # shape(N, 3)
        pred = tf.nn.softmax(logits)
        return pred

计算损失：y是实际值，ˆy是预测值
在这里插入图片描述

def compute_loss(pred, label):
    # tf.cast(data，目标数据类型)数据类型转换
    # tf.one_hot()独热编码,在有监督学习中对数据集进行标注.例如[1,0,0],[0,1,0]
    label = tf.one_hot(tf.cast(label, dtype=tf.int32), dtype=tf.float32, depth=3)
    '''
    ==============实现softmax的交叉熵损失函数===============
    # 输入label shape(N, 3), pred shape(N, 3)
    # 输出 losses shape(N,) 每一个样本一个loss
    '''
    losses = -tf.reduce_sum(label * tf.math.log(pred),axis=1)

    loss = tf.reduce_mean(losses)

    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(label, axis=1), tf.argmax(pred, axis=1)), dtype=tf.float32))
    return loss, accuracy

更新梯度：tf.GradientTape()

def train_one_step(model, optimizer, x, y):
    with tf.GradientTape() as tape:
        pred = model(x)
        loss, accuracy = compute_loss(pred, y)

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss, accuracy

```python
model = SoftmaxRegression()
opt = tf.keras.optimizers.SGD(learning_rate=0.01)
x1, x2, y = list(zip(*data_set))
x = list(zip(x1, x2))
for i in range(1000):
    loss, accuracy = train_one_step(model, opt, x, y)
    if i%50==49:
        print(f'loss: {loss.numpy():.4}\t accuracy: {accuracy.numpy():.4}')

结果展示

plt.scatter(C1[:, 0], C1[:, 1], c='b', marker='+')
plt.scatter(C2[:, 0], C2[:, 1], c='g', marker='o')
plt.scatter(C3[:, 0], C3[:, 1], c='r', marker='*')

x = np.arange(0., 10., 0.1)
y = np.arange(0., 10., 0.1)

X, Y = np.meshgrid(x, y)    # 生成网格点坐标矩阵
inp = np.array(list(zip(X.reshape(-1), Y.reshape(-1))), dtype=np.float32)
print(inp.shape)
Z = model(inp)  # shape(N,3)
Z = np.argmax(Z, axis=1)    # 获取每一行最大值的索引
Z = Z.reshape(X.shape)
plt.contour(X,Y,Z)  # 绘制轮廓
plt.show()

loss: 0.7446 accuracy: 0.8
loss: 0.6302 accuracy: 0.85
loss: 0.5656 accuracy: 0.86
loss: 0.5232 accuracy: 0.8633
loss: 0.4929 accuracy: 0.8667
loss: 0.4699 accuracy: 0.87
loss: 0.4519 accuracy: 0.8667
loss: 0.4372 accuracy: 0.8733
loss: 0.4251 accuracy: 0.8733
loss: 0.4149 accuracy: 0.88
loss: 0.4062 accuracy: 0.88
loss: 0.3986 accuracy: 0.88
loss: 0.392 accuracy: 0.88
loss: 0.3862 accuracy: 0.88
loss: 0.3811 accuracy: 0.88
loss: 0.3765 accuracy: 0.88
loss: 0.3723 accuracy: 0.88
loss: 0.3686 accuracy: 0.88
loss: 0.3652 accuracy: 0.88
loss: 0.3621 accuracy: 0.88
(10000, 2)