Task03：逻辑回归

最新推荐文章于 2021-10-20 15:32:24 发布

R_TRIG

最新推荐文章于 2021-10-20 15:32:24 发布

阅读量235

点赞数

分类专栏： Learning 文章标签：机器学习

本文链接：https://blog.csdn.net/ocean56/article/details/103964238

版权

Learning 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

理论部分

逻辑回归与线性回归的联系与区别
模型建立：逻辑回归原理、逻辑回归模型
学习策略：逻辑回归损失函数、推导及优化
算法求解：批量梯度下降
正则化与模型评估指标
逻辑回归的优缺点
样本不均衡问题
sklearn参数详解

案例:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline


df_X = pd.read_csv('./logistic_x.txt', sep='\ +',header=None, engine='python') #读取X值
ys = pd.read_csv('./logistic_y.txt', sep='\ +',header=None, engine='python') #读取y值
ys = ys.astype(int)
df_X['label'] = ys[0].values #将X按照y值的结果一一打标签

ax = plt.axes()
#在二维图中描绘X点所处位置，直观查看数据点的分布情况
df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

#提取用于学习的数据
Xs = df_X[[0, 1]].values
Xs = np.hstack([np.ones((Xs.shape[0], 1)), Xs])
ys = df_X['label'].values


from __future__ import print_function
import numpy as np
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(fit_intercept=False) #因为前面已经将截距项的值合并到变量中，此处参数设置不需要截距项
lr.fit(Xs, ys) #拟合
score = lr.score(Xs, ys) #结果评价
print("Coefficient: %s" % lr.coef_)
print("Score: %s" % score)


ax = plt.axes()

df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

_xs = np.array([np.min(Xs[:,1]), np.max(Xs[:,1])])

#将数据以二维图形式描点，并用学习得出的参数结果作为阈值，划分数据区域
_ys = (lr.coef_[0][0] + lr.coef_[0][1] * _xs) / (- lr.coef_[0][2])
plt.plot(_xs, _ys, lw=1)


class LGR_GD():
    def __init__(self):
        self.w = None
        self.n_iters = None

    def fit(self, X, y, alpha=0.03, loss=1e-10):  # 设定步长为0.002，判断是否收敛的条件为1e-10
        y = y.reshape(-1, 1)  # 重塑y值的维度以便矩阵运算
        [m, d] = np.shape(X)  # 自变量的维度
        self.w = np.zeros((1, d))  # 将参数的初始值定为0
        tol = 1e5
        self.n_iters = 0
        # ============================= show me your code =======================
        while tol > loss: #设置收敛条件
            for i in range(d):
                temp = y - X.dot(self.w)
                self.w[i] = self.w[i] + alpha *np.sum(temp * X[:,i])/m
            
            tol = np.abs(np.sum(y -  X.dot(self.w)))
            self.n_iters += 1 #更新迭代次数
 
        # ============================= show me your code =======================

    def predict(self, X):
        # 用已经拟合的参数值预测新自变量
        y_pred = X.dot(self.w)
        return y_pred


if __name__ == "__main__":
    lr_gd = LGR_GD()
    lr_gd.fit(Xs, ys)

    ax = plt.axes()

    df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
    df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

    _xs = np.array([np.min(Xs[:, 1]), np.max(Xs[:, 1])])
    _ys = (lr_gd.w[0][0] + lr_gd.w[0][1] * _xs) / (- lr_gd.w[0][2])
    plt.plot(_xs, _ys, lw=1)


class LGR_NT():
    def __init__(self):
        self.w = None
        self.n_iters = None

    def fit(self, X, y, loss=1e-10):  # 判断是否收敛的条件为1e-10
        y = y.reshape(-1, 1)  # 重塑y值的维度以便矩阵运算
        [m, d] = np.shape(X)  # 自变量的维度
        self.w = np.zeros((1, d))  # 将参数的初始值定为0
        tol = 1e5
        n_iters = 0
        Hessian = np.zeros((d, d))
        # ============================= show me your code =======================
        while tol > loss:
            n_iters += 1
        # ============================= show me your code =======================
        self.w = theta
        self.n_iters = n_iters

    def predict(self, X):
        # 用已经拟合的参数值预测新自变量
        y_pred = X.dot(self.w)
        return y_pred


if __name__ == "__main__":
    lgr_nt = LGR_NT()
    lgr_nt.fit(Xs, ys)





print("梯度下降法结果参数：%s;梯度下降法迭代次数：%s" %(lgr_gd.w,lgr_gd.n_iters))
print("牛顿法结果参数：%s;牛顿法迭代次数：%s" %(lgr_nt.w,lgr_nt.n_iters))