[AI 笔记] Softmax

[AI 笔记] Softmax


参考:
CS231n课程

Softmax

Softmax,亦称Multinomial Logistic Regression,与SVM一样,用于计算分类损失。

Softmax可以理解为将线性分类中,线性函数的得分归一化成概率,而后依据概率计算损失,同时在分类时,分类结果为概率最高的类别。

其公式如下图所示:
在这里插入图片描述
其中 s 即为线性函数的得分,其下标指代某一类别;P 为当样本为 xi 时,将其分类为 k 的概率,P 右边的式子即为归一化函数,将得分归一化为概率。

而Softmax的损失,即图中 Li,为当样本为 xi 时,将其正确归类为其标签 yi 的概率的对数的相反数。

则训练损失为 Li 对其下标 i 的累加。

下图形象地展示了Softmax计算损失的过程。
在这里插入图片描述

代码实现

Softmax 对 cifar-10 进行分类。

#SoftMax_object.py
import numpy as np
class SoftMax(object):

    def __init__(self):
        self.W = None
        self.reg = 1e-5
        self.num_classes = 10
        self.learning_rate = 1e-3

    def softmax_loss_vectorized(self, W, X, y, reg):
        # 初始化参数
        loss = 0.0
        dW = np.zeros_like(W)
        num_train = X.shape[0]    # N
        scores = X.dot(W)         # N×10
        shift_scores = scores - np.max(scores, axis=1).reshape(-1, 1)  # 减去每一行最大值
        softmax_output = np.exp(
            shift_scores)/np.sum(np.exp(shift_scores), axis=1).reshape(-1, 1)  # 归一化概率
        # 计算损失
        loss = -np.sum(np.log(softmax_output[range(num_train), list(y)]))
        loss /= num_train
        loss += 0.5 * reg * np.sum(W*W)  # 正则化

        dS = softmax_output.copy()
        dS[range(num_train), list(y)] += -1
        dW = (X.T).dot(dS)
        dW = dW / num_train + reg * W  # 计算梯度
        return loss, dW

    def train(self, X, y, num_classes=10, learning_rate=2.0e-7, reg=9.0e+3, num_iters=1000,
              batch_size=200, verbose=False):
        """
         使用随机梯度下降来训练这个分类器
         输入:
         -X :一个numpy数组,维数为(N,D)
         -Y : 一个numpy数组,维数为(N,)
         -learning rate: float ,优化的学习率
         -reg : float,正则化强度
         -num_iters: integer, 优化时训练的步数
         -batch_size:integer, 每一步使用的训练样本数
         -ver bose : boolean, 若为真,优化时打印过程

         输出:
         一个存储每次训练的损失函数值的List
        """
        num_train, dim = X.shape
        self.num_classes = num_classes

        if self.W is None:
            # 简易初始化W
            self.W = 0.001 * np.random.randn(dim, num_classes)

        # 使用随机梯度下降优化W
        loss_history = []
        for it in range(num_iters):
            X_batch = None
            y_batch = None
            """
            从训练集中采样batch_size个样本和对应的标签,在这一轮梯度下降中使用。
            把数据存储在X_batch中,把对应的标签存储在y_batch中
            采样后,X_batch的形状为(dim,batch_size),y_batch的形状为(batch_size,)
            """
            batch_inx = np.random.choice(num_train, batch_size)
            X_batch = X[batch_inx, :]
            y_batch = y[batch_inx]

            loss, grad = self.softmax_loss_vectorized(
                self.W, X_batch, y_batch, reg)
            loss_history.append(loss)

            """
            使用梯度和学习率更新权重
            """
            self.W = self.W - learning_rate * grad
            if verbose and it % 10 == 0:
                print('iteration %d / %d: loss %f' % (it, num_iters, loss))

        return loss_history

    def predict(self, X):

        y_pred = np.zeros(X.shape[0])
        scores = X.dot(self.W)
        y_pred = np.argmax(scores, axis=1)
        return y_pred
#SoftMax_run.py
from SoftMax_object import SoftMax
import time
import numpy as np
#import matplotlib.pyplot as plt


def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict


# get the training data
dataTrain = []
labelTrain = []
for i in range(1, 6):
    #dic = unpickle("/home/project/data/ZhouZhenKang/cifar-10-batches-py/data_batch_"+str(i))
    dic = unpickle(
        "D:/Desktop/论文及算法实现/DATA_SETS/cifar-10-batches-py/data_batch_"+str(i))
    for item in dic[b"data"]:
        # print(item.shape)
        #item = item.tolist()
        # item.append(1)
        dataTrain.append(item)
    for item in dic[b"labels"]:
        labelTrain.append(item)

# get test data
dataTest = []
labelTest = []
dic = unpickle("D:/Desktop/论文及算法实现/DATA_SETS/cifar-10-batches-py/test_batch")
#dic = unpickle("/home/project/data/ZhouZhenKang/cifar-10-batches-py/test_batch")
for item in dic[b"data"]:
    #item = item.tolist()
    # item.append(1)
    dataTest.append(item)
for item in dic[b"labels"]:
    labelTest.append(item)


dataTr = np.asarray(dataTrain)
dataTs = np.asarray(dataTest)
labelTr = np.asarray(labelTrain)
labelTs = np.asarray(labelTest)
print('dataTr.shape: {}'.format(dataTr.shape))


softmax = SoftMax()
tic = time.time()
loss_hist = softmax.train(dataTr, labelTr, num_classes=10, learning_rate=2.0e-7, reg=9.0e+3,
                          num_iters=1000, batch_size=1000, verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
'''
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()
'''

# Write the SVM.predict function and evaluate the performance on both the
# training and validation set
y_train_pred = softmax.predict(dataTr)
print('training accuracy: %f' % (np.mean(labelTr == y_train_pred), ))
y_val_pred = softmax.predict(dataTs)
print('validation accuracy: %f' % (np.mean(labelTs == y_val_pred), ))
iteration 950 / 1000: loss 6.313303
iteration 960 / 1000: loss 6.184987
iteration 970 / 1000: loss 6.014891
iteration 980 / 1000: loss 5.860609
iteration 990 / 1000: loss 5.765472
That took 37.708521s
training accuracy: 0.346120
validation accuracy: 0.339200
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值