一、前言
本节需要读者具有逻辑回归的理论基础,可以观看逻辑回归(Logistic regression算法原理讲解。
二、多分类逻辑回归的多分类任务
本质是多个二分类任务
如上图所示:当区分出蓝色和其他颜色时,只需要把蓝色元素添加标签1,其它的标签为0,
如下图所示:
同理红色,黄色:
假设出现一个新的元素判断类别,其是蓝,红,黄的概率是,例如
判断出其为蓝色。
假设有100个样本点,每个样本有3个特征值,那么其数据维度为 data:100X3,经过计算后的结果是概率值为 P:100X3,那么其权值维度为theta:3X3,data x theta = P,再根据其概率值判断类别。
计算损失:分类任务最后得到是其概率值,把结果放入对数函数,负数部分取正中如下图:
当选择正类标签时,即把标签为1分类正确时,其sigmoid值越接近1,表示分类效果越好,损失越小。
当选择负类标签是,即把标签为0分类正确时,其sigmoid值越接近0,表示分类效果越好,损失越小,但是对应的loss值却变大,所以其sigmoid变为1-sigmoid。
三、代码
需要先实现的工具包
预处理模块:
"""Prepares the dataset for training"""
import numpy as np
from .normalize import normalize
from .generate_sinusoids import generate_sinusoids
from .generate_polynomials import generate_polynomials
def prepare_for_training(data, polynomial_degree=0, sinusoid_degree=0, normalize_data=True):
# 计算样本总数
num_examples = data.shape[0]
data_processed = np.copy(data)
# 预处理
features_mean = 0
features_deviation = 0
data_normalized = data_processed
if normalize_data:
(
data_normalized,
features_mean,
features_deviation
) = normalize(data_processed)
data_processed = data_normalized
# 特征变换sinusoidal
if sinusoid_degree > 0:
sinusoids = generate_sinusoids(data_normalized, sinusoid_degree)
data_processed = np.concatenate((data_processed, sinusoids), axis=1)
# 特征变换polynomial
if polynomial_degree > 0:
polynomials = generate_polynomials(data_normalized, polynomial_degree, normalize_data)
data_processed = np.concatenate((data_processed, polynomials), axis=1)
# 加一列1
data_processed = np.hstack((np.ones((num_examples, 1)), data_processed))
return data_processed, features_mean, features_deviation
实现sigmoid函数模块
"""Sigmoid function"""
import numpy as np
def sigmoid(matrix):
"""Applies sigmoid function to NumPy matrix"""
return 1 / (1 + np.exp(-matrix))
实现逻辑回归的模块
import numpy as np
from scipy.optimize import minimize
from features import prepare_for_training
from hypothesis import sigmoid
class LogisticRegression:
def __init__(self, data, labels, polynomial_degree=0, sinusoid_degree=0, normalize_data=False):
"""
1.对数据进行预处理操作
2.先得到所有的特征个数
3.初始化参数矩阵
"""
(data_processed,
features_mean,
features_deviation) = prepare_for_training(data, polynomial_degree, sinusoid_degree, normalize_data=False)
self.data = data_processed
self.labels = labels
self.unique_labels = np.unique(labels)
self.features_mean = features_mean
self.features_deviation = features_deviation
self.polynomial_degree = polynomial_degree
self.sinusoid_degree = sinusoid_degree
self.normalize_data = normalize_data
num_features = self.data.shape[1]
num_unique_labels = np.unique(labels).shape[0]
self.theta = np.zeros((num_unique_labels, num_features))
def train(self, max_iterations=1000):
cost_histories = []
num_features = self.data.shape[1] #样本个数
for label_index, unique_label in enumerate(self.unique_labels): #把每个特征都训练
current_initial_theta = np.copy(self.theta[label_index].reshape(num_features, 1)) #当前特征的权值
current_lables = (self.labels == unique_label).astype(float) #当前特征的对应的标签
(current_theta, cost_history) = LogisticRegression.gradient_descent(self.data, current_lables,
current_initial_theta, max_iterations) #执行梯度下降
self.theta[label_index] = current_theta.T #更新
cost_histories.append(cost_history)
return self.theta, cost_histories
@staticmethod
def gradient_descent(data, labels, current_initial_theta, max_iterations):
cost_history = []
num_features = data.shape[1]
#print(labels.flatten())
#labels = labels.flatten()
result = minimize(
# 要优化的目标: 优化函数 损失函数
# lambda current_theta:LogisticRegression.cost_function(data,labels,current_initial_theta.reshape(num_features,1)),
lambda current_theta: LogisticRegression.cost_function(data, labels,
current_theta.reshape(num_features, 1)),
# 初始化的权重参数
current_initial_theta,
# 选择优化策略
method='CG',#共轭梯度
# 梯度下降迭代计算公式
# jac = lambda current_theta:LogisticRegression.gradient_step(data,labels,current_initial_theta.reshape(num_features,1)),
jac=lambda current_theta: LogisticRegression.gradient_step(data, labels,
current_theta.reshape(num_features, 1)),
# 记录结果
callback=lambda current_theta: cost_history.append(
LogisticRegression.cost_function(data, labels, current_theta.reshape((num_features, 1)))),
# 迭代次数
options={'maxiter': max_iterations}
)
if not result.success:
raise ArithmeticError('Can not minimize cost function' + result.message)
optimized_theta = result.x.reshape(num_features, 1) #最后的结果
return optimized_theta, cost_history
#计算损失
@staticmethod
def cost_function(data, labels, theat):
num_examples = data.shape[0]
predictions = LogisticRegression.hypothesis(data, theat)
y_is_set_cost = np.dot(labels[labels == 1].T, np.log(predictions[labels == 1])) # 是正类的损失
y_is_not_set_cost = np.dot(1 - labels[labels == 0].T, np.log(1 - predictions[labels == 0])) #是负类的损失
cost = (-1 / num_examples) * (y_is_set_cost + y_is_not_set_cost)
return cost
@staticmethod
def hypothesis(data, theat):
predictions = sigmoid(np.dot(data, theat))
return predictions
@staticmethod #计算梯度值
def gradient_step(data, labels, theta):
num_examples = labels.shape[0]
predictions = LogisticRegression.hypothesis(data, theta)
label_diff = predictions - labels
gradients = (1 / num_examples) * np.dot(data.T, label_diff)
return gradients.T.flatten()
def predict(self, data):
num_examples = data.shape[0]
data_processed = prepare_for_training(data, self.polynomial_degree, self.sinusoid_degree, self.normalize_data)[0]
prob = LogisticRegression.hypothesis(data_processed, self.theta.T)
max_prob_index = np.argmax(prob, axis=1) #把概率最大的那个值拿出来
class_prediction = np.empty(max_prob_index.shape, dtype=object)
for index, label in enumerate(self.unique_labels):
class_prediction[max_prob_index == index] = label
return class_prediction.reshape((num_examples, 1))