传统机器学习实现--自实现逻辑回归

最新推荐文章于 2022-12-19 17:31:29 发布

宋建国

最新推荐文章于 2022-12-19 17:31:29 发布

阅读量400

点赞数 1

分类专栏：机器学习

本文链接：https://blog.csdn.net/hot7732788/article/details/92556265

版权

机器学习专栏收录该内容

12 篇文章 1 订阅

订阅专栏

在这里插入图片描述

1.导入运行库

# 导入必要的运行包
# 三大件
import numpy as np
import pandas as pd
import numpy.random
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import time

2.定义数据输入函数，在这里是实现简单的自定义目标函数和梯度计算的逻辑回归，所以特征控制在了两个。

# 数据输入函数-注意输入的是带标签的，并且在最后一列
def create_data():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['label'] = iris.target
    df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
    data = np.array(df.iloc[:100, [0, 1, -1]])
    return data[:, :2], data[:, 2:3], data

3.编写数据处理模块

1.矩阵加一列操作

在这里插入图片描述

为实现矩阵特征与theta参数的相乘，需要在特征数据前面加一列1

    # 为了方便矩阵运算，需要在特征数据前面添加一列1
    def data_matrix(self, X):
        data_mat = []
        for d in X:
            data_mat.append([1.0, *d])
        return np.array(data_mat)

2.数据打乱+特征标签数据划分工作


    # 打乱数据.并进行特征与标签的分解功能
    def shuffleData(self, data):
        np.random.shuffle(data)
        cols = data.shape[1]
        X = data[:, 0:cols - 1]
        y = data[:, cols - 1:]
        return X, y

4.编写模型函数定义模块，实现一开始图中的各个部分的功能

1.sigmoid函数概率映射

在这里插入图片描述

    # 进行概率映射
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

2.预测结果返回，即与theta进行相乘后的概率集合

    # 返回预测结果--h(x)
    def model(self, X, theta):
        return self.sigmoid(np.dot(X, theta.T))

3.定义损失函数

在这里插入图片描述

    # 定义损失函数--目标函数-->输出损失值
    def cost(self, X, y, theta):
        left = np.multiply(-y, np.log(self.model(X, theta)))
        right = np.multiply(1 - y, np.log(1 - self.model(X, theta)))
        return np.sum(left - right) / (len(X))

4.定义梯度计算函数

在这里插入图片描述

    # 定义梯度的计算公式
    def gradient(self, X, y, theta):
        grad = np.zeros(theta.shape)  # 对应theta的形状
        error = (self.model(X, theta) - y).ravel()
        for j in range(len(theta.ravel())):  # for each parmeter
            term = np.multiply(error, X[:, j])  # 分三次对不同的theta进行梯度计算
            grad[0, j] = np.sum(term) / len(X)  # 累计和取均值


        return grad  # 返回的是一个更新的方向

5.定义了三种不同的停止策略，迭代次数，损失值变化，梯度变化

在这里插入图片描述

    # 设定三种不同的停止策略
    def stopCriterion(self, type, value, threshold):

        if type == 0:
            return value > threshold
        elif type == 1:
            return abs(value[-1] - value[-2]) < threshold
        elif type == 2:
            return np.linalg.norm(value) < threshold

6.模型预测-根据阈值0.5进行分类

    # 对输出的softmax概率进行二分类处理

    def predict(self, X, theta):
        X = self.data_matrix(X)
        # print(self.model(X, theta))  # 为什么全他妈大于0.5
        return [1 if x >= 0.5 else 0 for x in self.model(X, theta)]

7.进行梯度计算进行训练以及损失值可视化调用的函数

   def descent(self, data, theta, batchSize, stopType, thresh, alpha):
        # 梯度下降求解
        init_time = time.time()
        i = 0  # 迭代次数
        k = 0  # batch
        X, y = self.shuffleData(data)
        grad = np.zeros(theta.shape)  # 计算的梯度
        costs = [self.cost(X, y, theta)]  # 损失值

        while True:
            grad = self.gradient(X[k:k + batchSize], y[k:k + batchSize], theta)  # 给的数据越多梯度进行计算时考虑的样本也就越多
            k += batchSize  # 取batch数量个数据
            if k >= len(data):  # 如果取得数据范围大于了原有数据的长度，就重新从第一个开始，每次都打乱顺序，综合考虑数据
                k = 0
                X, y = self.shuffleData(data)  # 重新洗牌
            theta = theta - alpha * grad  # 参数更新, alpha是学习率，grad代表的是更新方向
            costs.append(self.cost(X, y, theta))  # 计算新的损失,将损失放入列表中，便于后期进行变化大小判别
            i += 1  # 迭代次数加一

            if stopType == 0:
                value = i
            elif stopType == 1:
                value = costs
            elif stopType == 2:
                value = grad
            if self.stopCriterion(stopType, value, thresh): break

        return theta, i - 1, costs, grad, time.time() - init_time

    def runExpe(self, data, theta, batchSize, stopType, thresh, alpha):

        # 核心代码
        theta, iter, costs, grad, dur = self.descent(data, theta, batchSize, stopType, thresh, alpha)

        # 进行显示代码
        name = "Original" if (data[:, 1] > 2).sum() > 1 else "Scaled"
        name += " data - learning rate: {} - ".format(alpha)
        if batchSize == len(data):
            strDescType = "Gradient"
        elif batchSize == 1:
            strDescType = "Stochastic"
        else:
            strDescType = "Mini-batch ({})".format(batchSize)
        name += strDescType + " descent - Stop: "
        if stopType == 0:
            strStop = "{} iterations".format(thresh)
        elif stopType == 1:
            strStop = "costs change < {}".format(thresh)
        else:
            strStop = "gradient norm < {}".format(thresh)
        name += strStop
        print("***{}\nTheta: {} - Iter: {} - Last cost: {:03.2f} - Duration: {:03.2f}s".format(
            name, theta, iter, costs[-1], dur))
        fig, ax = plt.subplots(figsize=(12, 4))
        ax.plot(np.arange(len(costs)), costs, 'r')
        ax.set_xlabel('Iterations')
        ax.set_ylabel('Cost')
        ax.set_title(name.upper() + ' - Error vs. Iteration')
        plt.show()
        return theta

5.主调用模块

1.执行训练过程

    def fit(self):
        self.theta = self.runExpe(self.data, self.theta, self.batchSize, self.stopType, thresh=self.thresh,
                                  alpha=self.alpha)

2.执行测试过程

    def score(self, test_data):
        cols = test_data.shape[1]
        X = test_data[:, 0:cols - 1]
        y = test_data[:, cols - 1:]
        predictions = self.predict(X, self.theta)

        # print([i for i in zip(predictions, y)])
        correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y)]
        accuracy = (sum(map(int, correct)) % len(correct))
        print('accuracy = {0}%'.format(accuracy))

6.主函数

def main():
    X, y, orig_data = create_data()  # X, y是将数据分开，在这里暂时没有什么用处

    # 定义theta参数
    theta = np.zeros([1, 3])
    lr = customize_LR(orig_data, theta, 20, 0, 5000, 0.001)
    lr.fit()
    lr.score(orig_data)

if __name__ == '__main__':
    main()

7.代码源码

# 导入必要的运行包
# 三大件
import numpy as np
import pandas as pd
import numpy.random
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import time


# 数据输入函数-注意输入的是带标签的，并且在最后一列
def create_data():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['label'] = iris.target
    df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
    data = np.array(df.iloc[:100, [0, 1, -1]])
    return data[:, :2], data[:, 2:3], data

class customize_LR:
    """
        STOP_ITER = 0  # 根据迭代次数
        STOP_COST = 1  # 根据损失值变化
        STOP_GRAD = 2  # 根据梯度变化
    """

    def __init__(self, data, theta, batchSize, stopType, thresh, alpha):

        # 确定迭代停止条件的映射


        self.batchSize = batchSize  # 定义每次迭代的样本数
        self.alpha = alpha  # 定义学习率
        self.data = self.data_matrix(data)  # 确定输入数据
        self.theta = theta  # 确定权重参数
        self.stopType = stopType  # 确定迭代停止条件
        self.thresh = thresh  # 指定迭代次数

    """数据处理部分函数"""


    # 为了方便矩阵运算，需要在特征数据前面添加一列1
    def data_matrix(self, X):
        data_mat = []
        for d in X:
            data_mat.append([1.0, *d])
        return np.array(data_mat)


    # 打乱数据.并进行特征与标签的分解功能
    def shuffleData(self, data):
        np.random.shuffle(data)
        cols = data.shape[1]
        X = data[:, 0:cols - 1]
        y = data[:, cols - 1:]
        return X, y


    """模型定义部分函数"""


    # 进行概率映射
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))


    # 返回预测结果--h(x)
    def model(self, X, theta):
        return self.sigmoid(np.dot(X, theta.T))


    # 定义损失函数--目标函数-->输出损失值
    def cost(self, X, y, theta):
        left = np.multiply(-y, np.log(self.model(X, theta)))
        right = np.multiply(1 - y, np.log(1 - self.model(X, theta)))
        return np.sum(left - right) / (len(X))


    # 定义梯度的计算公式
    def gradient(self, X, y, theta):
        grad = np.zeros(theta.shape)  # 对应theta的形状
        error = (self.model(X, theta) - y).ravel()
        for j in range(len(theta.ravel())):  # for each parmeter
            term = np.multiply(error, X[:, j])  # 分三次对不同的theta进行梯度计算
            grad[0, j] = np.sum(term) / len(X)  # 累计和取均值


        return grad  # 返回的是一个更新的方向


    # 设定三种不同的停止策略
    def stopCriterion(self, type, value, threshold):

        if type == 0:
            return value > threshold
        elif type == 1:
            return abs(value[-1] - value[-2]) < threshold
        elif type == 2:
            return np.linalg.norm(value) < threshold

    # 对输出的softmax概率进行二分类处理

    def predict(self, X, theta):
        X = self.data_matrix(X)
        # print(self.model(X, theta))  # 为什么全他妈大于0.5
        return [1 if x >= 0.5 else 0 for x in self.model(X, theta)]

    def descent(self, data, theta, batchSize, stopType, thresh, alpha):
        # 梯度下降求解
        init_time = time.time()
        i = 0  # 迭代次数
        k = 0  # batch
        X, y = self.shuffleData(data)
        grad = np.zeros(theta.shape)  # 计算的梯度
        costs = [self.cost(X, y, theta)]  # 损失值

        while True:
            grad = self.gradient(X[k:k + batchSize], y[k:k + batchSize], theta)  # 给的数据越多梯度进行计算时考虑的样本也就越多
            k += batchSize  # 取batch数量个数据
            if k >= len(data):  # 如果取得数据范围大于了原有数据的长度，就重新从第一个开始，每次都打乱顺序，综合考虑数据
                k = 0
                X, y = self.shuffleData(data)  # 重新洗牌
            theta = theta - alpha * grad  # 参数更新, alpha是学习率，grad代表的是更新方向
            costs.append(self.cost(X, y, theta))  # 计算新的损失,将损失放入列表中，便于后期进行变化大小判别
            i += 1  # 迭代次数加一

            if stopType == 0:
                value = i
            elif stopType == 1:
                value = costs
            elif stopType == 2:
                value = grad
            if self.stopCriterion(stopType, value, thresh): break

        return theta, i - 1, costs, grad, time.time() - init_time

    def runExpe(self, data, theta, batchSize, stopType, thresh, alpha):

        # 核心代码
        theta, iter, costs, grad, dur = self.descent(data, theta, batchSize, stopType, thresh, alpha)

        # 进行显示代码
        name = "Original" if (data[:, 1] > 2).sum() > 1 else "Scaled"
        name += " data - learning rate: {} - ".format(alpha)
        if batchSize == len(data):
            strDescType = "Gradient"
        elif batchSize == 1:
            strDescType = "Stochastic"
        else:
            strDescType = "Mini-batch ({})".format(batchSize)
        name += strDescType + " descent - Stop: "
        if stopType == 0:
            strStop = "{} iterations".format(thresh)
        elif stopType == 1:
            strStop = "costs change < {}".format(thresh)
        else:
            strStop = "gradient norm < {}".format(thresh)
        name += strStop
        print("***{}\nTheta: {} - Iter: {} - Last cost: {:03.2f} - Duration: {:03.2f}s".format(
            name, theta, iter, costs[-1], dur))
        fig, ax = plt.subplots(figsize=(12, 4))
        ax.plot(np.arange(len(costs)), costs, 'r')
        ax.set_xlabel('Iterations')
        ax.set_ylabel('Cost')
        ax.set_title(name.upper() + ' - Error vs. Iteration')
        plt.show()
        return theta

    """主功能部分"""


    def fit(self):
        self.theta = self.runExpe(self.data, self.theta, self.batchSize, self.stopType, thresh=self.thresh,
                                  alpha=self.alpha)


    def score(self, test_data):
        cols = test_data.shape[1]
        X = test_data[:, 0:cols - 1]
        y = test_data[:, cols - 1:]
        predictions = self.predict(X, self.theta)

        # print([i for i in zip(predictions, y)])
        correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y)]
        accuracy = (sum(map(int, correct)) % len(correct))
        print('accuracy = {0}%'.format(accuracy))

def main():
    X, y, orig_data = create_data()  # X, y是将数据分开，在这里暂时没有什么用处

    # 定义theta参数
    theta = np.zeros([1, 3])
    lr = customize_LR(orig_data, theta, 20, 0, 5000, 0.001)
    lr.fit()
    lr.score(orig_data)

if __name__ == '__main__':
    main()