Softmax代码实现(Python,附测试)

import numpy as np
import math
from matplotlib import pyplot as plt
from sklearn import datasets

#计算假设的“相对概率”分布,注意防止指数运算数据溢出  dataset: m*(n+1)    theta: k*(n+1)  m:样本数   n:特征数   k:标签类别数
def Hypothesis(theta,dataset):
    score=np.dot(theta,dataset.T)
    a=np.max(score,axis=0)
    exp_score=np.exp(score-a)
    sum_score=np.sum(exp_score,axis=0)
    relative_probability=exp_score/sum_score
    return relative_probability

#计算损失函数
#theta为参数矩阵k*(n+1)
def Cost_function(theta,dataset,labels,lamda):
    m,n=dataset.shape
    new_code=One_hot_encode(labels)
    log_probability = np.log(Hypothesis(theta,dataset))
    cost = -1/m * np.sum(np.multiply(log_probability,new_code)) + lamda * np.sum(theta)/2
    return cost

#对标签进行独热编码
#new_code为 k*m  k为标签数 m为样本数
def One_hot_encode(labels):
    m=len(labels)
    k=len(np.unique(labels))
    new_code=np.zeros((k,m))
    for i in range(m):
        new_code[labels[i],i]=1
    return new_code

#进行梯度检验
def Gradient_checking(gradient,theta,EPSILON,eps,dataset,labels,lamda):
    theta_vector= theta.ravel()  #将参数矩阵向量化
    num=len(theta_vector)
    vector=np.zeros(num)
    for i in range(num):
        vector[i]=1
        theta_plus= theta_vector + EPSILON * vector  #将已求得参数进行微调求近似梯度
        theta_minus = theta_vector - EPSILON * vector
        approxiamte_gradient=(Cost_function(theta_plus.reshape(theta.shape),dataset,labels,lamda)-\
                             Cost_function(theta_minus.reshape(theta.shape),dataset,labels,lamda))/(2*EPSILON)
        vector[i]=0
        a = abs(approxiamte_gradient-gradient[i])
        if a > eps:
            return False
    if np.linalg.norm(approxiamte_gradient-gradient,ord=2)/(np.linalg.norm(approxiamte_gradient,ord=2))> eps:
        return False
    return True

#使用Batch Gradient Descent优化损失函数
#迭代终止条件:  1:达到最大迭代次数   2:前后两次梯度变化小于一个极小值   3:迭代前后损失函数值变化极小
#dataset为原始数据集:m*n     labels:标签   lamda:正则项系数   learning_rate:学习率   max_iter:最大迭代次数
#eps1:损失函数变化量的阈值  eps2:梯度变化量阈值
def SoftmaxRegression(dataset,labels,lamda,learning_rate,max_iter,eps1,eps2,EPS):
    loss_record=[]
    m,n = dataset.shape
    k = len(np.unique(labels))
    new_code = One_hot_encode(labels)
    iter = 0
    new_cost = 0
    cost = 0
    dataset=np.column_stack((dataset,np.ones(m)))
    theta = np.random.random((k,n+1))
    gradient = np.zeros(n)
    while iter < max_iter:
        new_theta = theta.copy()
        temp = new_code - Hypothesis(new_theta,dataset)
        for j in range(k):
            sum = np.zeros(n+1)
            for i in range(m):
                a=dataset[i,:]
                sum += a * temp[j,i]
            j_gradient=-1/m * sum + lamda * new_theta[j,:] #计算属于第j类相对概率的梯度向量
            new_theta[j,:] = new_theta[j,:] - learning_rate * j_gradient
        iter += 1
        print("第"+str(iter)+"轮迭代的参数:")
        print(new_theta)
        new_cost = Cost_function(new_theta,dataset,labels,lamda)
        loss_record.append(new_cost)
        print(new_theta)
        print("损失函数变化量:" + str(abs(new_cost-cost)))
        if abs(new_cost-cost) < eps1:
            break
        theta = new_theta
    return theta,loss_record

def SoftmaxRegression2(dataset,labels,lamda,learning_rate,max_iter,eps1,eps2,EPS):
    loss_record=[]
    m,n = dataset.shape
    k = len(np.unique(labels))
    new_code = One_hot_encode(labels)
    iter = 0
    new_cost = 0
    cost = 0
    dataset=np.column_stack((dataset,np.ones(m)))
    theta = np.random.random((k,n+1))
    gradient = np.zeros(n)
    while iter < max_iter:
        new_theta = theta.copy()
        temp = new_code - Hypothesis(new_theta,dataset)
        for j in range(k):
            sum = np.zeros(n+1)
            for i in range(m):
                a=dataset[i,:]
                sum += a * temp[j,i]
            j_gradient=-1/m * sum + lamda * new_theta[j,:] #计算属于第j类相对概率的梯度向量
            new_theta[j,:] = new_theta[j,:] - learning_rate * j_gradient
        iter += 1
        print("第"+str(iter)+"轮迭代的参数:")
        print(new_theta)
        new_cost = Cost_function(new_theta,dataset,labels,lamda)
        loss_record.append(new_cost)
        print(new_theta)
        print("损失函数变化量:" + str(abs(new_cost-cost)))
        if abs(new_cost-cost) < eps1:
            break
        theta = new_theta
    return theta,loss_record

def Classification(theta,dataset):
    X=dataset.copy()
    X=np.column_stack((X,np.ones(X.shape[0])))
    relative_probability=Hypothesis(theta,X)
    return np.argmax(relative_probability,axis=0)

测试:

iris= datasets.load_iris()
X=iris.data
y = iris.target
target_names = iris.target_names

theta,loss_record=SoftmaxRegression(dataset=X,labels=y,lamda=0.1,learning_rate=1e-4,max_iter=500000,eps1=1e-6,eps2=1e-4,EPS=1e-6)
predict=Classification(theta,X)
(predict==y).astype(np.int).mean()  #训练集上精度
plt.plot(np.arange(len(loss_record)),loss_record)

损失函数迭代曲线(坐标轴没标清楚请原谅,不想画图惹)如下:

                                                         

  • 4
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值