Softmax代码实现（Python，附测试）

最新推荐文章于 2024-02-02 09:53:12 发布

To_be_thinking

最新推荐文章于 2024-02-02 09:53:12 发布

阅读量2.6k

点赞数 4

分类专栏：算法设计（Practice）统计机器学习

本文链接：https://blog.csdn.net/To_be_to_thought/article/details/82791771

版权

算法设计（Practice）同时被 2 个专栏收录

43 篇文章 1 订阅

订阅专栏

统计机器学习

22 篇文章 2 订阅

订阅专栏

import numpy as np
import math
from matplotlib import pyplot as plt
from sklearn import datasets

#计算假设的“相对概率”分布,注意防止指数运算数据溢出  dataset: m*(n+1)    theta: k*(n+1)  m：样本数   n：特征数   k：标签类别数
def Hypothesis(theta,dataset):
    score=np.dot(theta,dataset.T)
    a=np.max(score,axis=0)
    exp_score=np.exp(score-a)
    sum_score=np.sum(exp_score,axis=0)
    relative_probability=exp_score/sum_score
    return relative_probability

#计算损失函数
#theta为参数矩阵k*(n+1)
def Cost_function(theta,dataset,labels,lamda):
    m,n=dataset.shape
    new_code=One_hot_encode(labels)
    log_probability = np.log(Hypothesis(theta,dataset))
    cost = -1/m * np.sum(np.multiply(log_probability,new_code)) + lamda * np.sum(theta)/2
    return cost

#对标签进行独热编码
#new_code为 k*m  k为标签数 m为样本数
def One_hot_encode(labels):
    m=len(labels)
    k=len(np.unique(labels))
    new_code=np.zeros((k,m))
    for i in range(m):
        new_code[labels[i],i]=1
    return new_code

#进行梯度检验
def Gradient_checking(gradient,theta,EPSILON,eps,dataset,labels,lamda):
    theta_vector= theta.ravel()  #将参数矩阵向量化
    num=len(theta_vector)
    vector=np.zeros(num)
    for i in range(num):
        vector[i]=1
        theta_plus= theta_vector + EPSILON * vector  #将已求得参数进行微调求近似梯度
        theta_minus = theta_vector - EPSILON * vector
        approxiamte_gradient=(Cost_function(theta_plus.reshape(theta.shape),dataset,labels,lamda)-\
                             Cost_function(theta_minus.reshape(theta.shape),dataset,labels,lamda))/(2*EPSILON)
        vector[i]=0
        a = abs(approxiamte_gradient-gradient[i])
        if a > eps:
            return False
    if np.linalg.norm(approxiamte_gradient-gradient,ord=2)/(np.linalg.norm(approxiamte_gradient,ord=2))> eps:
        return False
    return True

#使用Batch Gradient Descent优化损失函数
#迭代终止条件：  1：达到最大迭代次数   2：前后两次梯度变化小于一个极小值   3：迭代前后损失函数值变化极小
#dataset为原始数据集：m*n     labels:标签   lamda：正则项系数   learning_rate：学习率   max_iter：最大迭代次数
#eps1：损失函数变化量的阈值  eps2：梯度变化量阈值
def SoftmaxRegression(dataset,labels,lamda,learning_rate,max_iter,eps1,eps2,EPS):
    loss_record=[]
    m,n = dataset.shape
    k = len(np.unique(labels))
    new_code = One_hot_encode(labels)
    iter = 0
    new_cost = 0
    cost = 0
    dataset=np.column_stack((dataset,np.ones(m)))
    theta = np.random.random((k,n+1))
    gradient = np.zeros(n)
    while iter < max_iter:
        new_theta = theta.copy()
        temp = new_code - Hypothesis(new_theta,dataset)
        for j in range(k):
            sum = np.zeros(n+1)
            for i in range(m):
                a=dataset[i,:]
                sum += a * temp[j,i]
            j_gradient=-1/m * sum + lamda * new_theta[j,:] #计算属于第j类相对概率的梯度向量
            new_theta[j,:] = new_theta[j,:] - learning_rate * j_gradient
        iter += 1
        print("第"+str(iter)+"轮迭代的参数：")
        print(new_theta)
        new_cost = Cost_function(new_theta,dataset,labels,lamda)
        loss_record.append(new_cost)
        print(new_theta)
        print("损失函数变化量：" + str(abs(new_cost-cost)))
        if abs(new_cost-cost) < eps1:
            break
        theta = new_theta
    return theta,loss_record

def SoftmaxRegression2(dataset,labels,lamda,learning_rate,max_iter,eps1,eps2,EPS):
    loss_record=[]
    m,n = dataset.shape
    k = len(np.unique(labels))
    new_code = One_hot_encode(labels)
    iter = 0
    new_cost = 0
    cost = 0
    dataset=np.column_stack((dataset,np.ones(m)))
    theta = np.random.random((k,n+1))
    gradient = np.zeros(n)
    while iter < max_iter:
        new_theta = theta.copy()
        temp = new_code - Hypothesis(new_theta,dataset)
        for j in range(k):
            sum = np.zeros(n+1)
            for i in range(m):
                a=dataset[i,:]
                sum += a * temp[j,i]
            j_gradient=-1/m * sum + lamda * new_theta[j,:] #计算属于第j类相对概率的梯度向量
            new_theta[j,:] = new_theta[j,:] - learning_rate * j_gradient
        iter += 1
        print("第"+str(iter)+"轮迭代的参数：")
        print(new_theta)
        new_cost = Cost_function(new_theta,dataset,labels,lamda)
        loss_record.append(new_cost)
        print(new_theta)
        print("损失函数变化量：" + str(abs(new_cost-cost)))
        if abs(new_cost-cost) < eps1:
            break
        theta = new_theta
    return theta,loss_record

def Classification(theta,dataset):
    X=dataset.copy()
    X=np.column_stack((X,np.ones(X.shape[0])))
    relative_probability=Hypothesis(theta,X)
    return np.argmax(relative_probability,axis=0)

测试：

iris= datasets.load_iris()
X=iris.data
y = iris.target
target_names = iris.target_names

theta,loss_record=SoftmaxRegression(dataset=X,labels=y,lamda=0.1,learning_rate=1e-4,max_iter=500000,eps1=1e-6,eps2=1e-4,EPS=1e-6)
predict=Classification(theta,X)
(predict==y).astype(np.int).mean()  #训练集上精度
plt.plot(np.arange(len(loss_record)),loss_record)

损失函数迭代曲线（坐标轴没标清楚请原谅，不想画图惹）如下：

To_be_thinking

关注

4
点赞
踩
13

收藏

觉得还不错? 一键收藏
4
评论
Softmax代码实现（Python，附测试）

import numpy as npimport mathfrom matplotlib import pyplot as pltfrom sklearn import datasets#计算假设的“相对概率”分布,注意防止指数运算数据溢出 dataset: m*(n+1) theta: k*(n+1) m：样本数 n：特征数 k：标签类别数def Hypothes...
复制链接

扫一扫

专栏目录