深度学习入门---softmax回归 Python实现

深度学习入门 —softmax回归博客里,我详细说明了softmax回归的算法原理和应用场景,这篇博客会利用Python实现softmax回归算法,包括几个部分:

  1. 代价函数J(),返回代价值和其对w求导结果
  2. 梯度检验函数check_gradient(),打印出每个 wij 对应的偏导结果与检验之后的差异值

  3. 训练函数train(),预测函数predict()

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
import random
from sklearn.utils import shuffle
from sklearn.preprocessing import label_binarize
from scipy.optimize import minimize

data=load_iris()
dataSet=data['data']
classLabels=data['target']
m,n=dataSet.shape
k=len(np.unique(classLabels))
#打乱数据
listt=shuffle(np.arange(dataSet.shape[0]))
dataSet=dataSet[listt]
classLabels=classLabels[listt]

def sigmoid(X):
    return 1/(1+np.exp(-X))

#theta.shape==(k,n+1)
#lenda是正则化系数/权重衰减项系数,alpha是学习率
def J(X,classLabels,theta,alpha,lenda): 
    bin_classLabels=label_binarize(classLabels,classes=np.unique(classLabels).tolist()).reshape((m,k))  #二值化 (m*k) 
    dataSet=np.concatenate((X,np.ones((m,1))),axis=1).reshape((m,n+1)).T   #转换为(n+1,m)
    theta_data=theta.dot(dataSet)  #(k,m)
    theta_data = theta_data - np.max(theta_data)   #k*m
    prob_data = np.exp(theta_data) / np.sum(np.exp(theta_data), axis=0)  #(k*m)
    #print(bin_classLabels.shape,prob_data.shape
    cost = (-1 / m) * np.sum(np.multiply(bin_classLabels,np.log(prob_data).T)) + (lenda / 2) * np.sum(np.square(theta))  #标量
    #print(dataSet.shape,prob_data.shape)
    grad = (-1 / m) * (dataSet.dot(bin_classLabels - prob_data.T)).T + lenda * theta  #(k*N+1)

    return cost,grad

def train(X,classLabels,theta,alpha=0.1,lenda=1e-4,maxiter=1000):
    #options_ = {'maxiter': 400, 'disp': True}
    #result =minimize(J(X,classLabels,theta,alpha,lenda), theta, method='L-BFGS-B', jac=True, options=options_)
    #return result.x
    for i in range(maxiter):
        cost,grad=J(X,classLabels,theta,alpha,lenda)
        theta=theta-alpha*grad
    return theta

def predict(theta,testSet,testClass):  #testSet (m,n+1)
    prod = theta.dot(testSet.T)
    pred = np.exp(prod) / np.sum(np.exp(prod), axis=0)
    pred = pred.argmax(axis=0)
    accuracy=0.0
    for i in range(len(testClass)):
        if testClass[i]==pred[i]:
            accuracy+=1

    return pred,float(accuracy/len(testClass))

def check_gradient(X,classLabels,theta,alpha,lenda,eplison=1e-4):    # gradient check
    cost= lambda theta:J(X,classLabels,theta,alpha,lenda)
    print("Norm of the difference between numerical and analytical num_grad (should be < 1e-9)\n")
    print(theta.shape)
    for i in range(theta.shape[0]):
        for j in range(theta.shape[1]):
            #print(i,j)
            theta_1=np.array(theta)
            theta_2=np.array(theta)
            theta_1[i,j]=theta[i,j]+eplison
            theta_2[i,j]=theta[i,j]-eplison
            num_cost_1,x=cost(theta_1)
            num_cost_2,x=cost(theta_2)
            num_grad=(num_cost_1-num_cost_2)/(2*eplison)
            x,grad=cost(theta)
            iff = np.linalg.norm(num_grad- grad[i,j]) / np.linalg.norm(num_grad + grad[i,j])
            print("the difference of the grad and num_grad: ",iff)
            #print("Norm of the difference between numerical and analytical num_grad (should be < 1e-7)\n")

theta=np.random.random((k,n+1))   #初始化theta
check_gradient(dataSet,classLabels,theta,alpha=0.1,lenda=1e-4,eplison=1e-4)  #首先进行梯度检验
theta_train=train(dataSet,classLabels,theta,alpha=0.1)    #训练数据,在这里我不太严格,将所有数据都用于训练了
testSet=np.concatenate((dataSet,np.ones((m,1))),axis=1).reshape((m,n+1))    #所有训练数据同时作为预测数据,不建议,我这只是为了方便

pred,accuracy=predict(theta_train,testSet,classLabels)  #预测
print("accuracy: ",accuracy)   #最终准确率在98%左右

准确率达到98%,可以说明代码实现没错,但由于这是基于很标准的iris数据集,应该可以达到100%准确率的,证明代码有需要优化的地方。

  • 2
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值