风火编程--机器学习之逻辑回归LogisticRegression

逻辑回归

描述
根据杨本特征 计算样本发生的概率,
再通过sigmoid函数去线性分类(大于0.5为1, 小于0.5为0).
逻辑回归时二分类算法, 可以通过OVR或者OVO处理多分类问题.
OVR: one vs rest, 时间复杂度为n
OVO: one vs one, 时间复杂度为n2

要点实现

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
X = iris.data
y = iris.target
X= X[y<2, :2]
y = y[y<2]

X_train, X_test, y_train, y_test = train_test_split(X,y)

sigmoid = lambda t: 1 / (1 + np.exp(-t))


def J(theta, X_b, y):
    """损失函数"""
    y_hat = sigmoid(X_b.dot(theta))
    try:
        return np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) / len(y)
    except:
        return float('inf')


def dJ(theta, X_b, y):
    """梯度"""
    return X_b.T.dot(sigmoid(X_b.dot(theta)) - y) / len(X_b)


def gradient_descent(X, y, initial_theta, eta=0.001, iters=1e4, epsilon=1e-8):
    X_b = np.hstack([np.ones([len(X), 1]), X])

    theta = initial_theta
    i = 0.0
    while i < iters:
        i += 1.0
        gradient = dJ(theta, X_b, y)
        last_theta = theta
        theta = theta - eta * gradient

        if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
            break
    intercept_ = theta[0]
    coef_ = theta[1]
    return theta

def predict(X, theta):
    """二分类预测"""
    X_b = np.hstack([np.ones([len(X), 1]), X])
    probality = X_b.dot(theta)
    return np.array(probality > 0.5, dtype='int')

if __name__ == '__main__':
    initial_theta = initial_w = np.random.random(X.shape[1] + 1)

    theta = gradient_descent(X_train, y_train, initial_theta)
    y_predict = predict(X_test, theta)
    print(y_predict)
    print(y_test)
    
    x2 = lambda x1: (-theta[1]*x1 - theta[0])/theta[2]
    plt.scatter(X[y==0, 0], X[y==0, 1], alpha=0.5)
    plt.scatter(X[y==1, 0], X[y==1, 1], alpha=0.5)
    x1_plot = np.linspace(4, 8, 1000)
    x2_plot = x2(x1_plot)
    plt.plot(x1_plot, x2_plot)
    plt.show()

接口

from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
X = iris.data
y = iris.target
X= X[y<2, :2]
y = y[y<2]

X_train, X_test, y_train, y_test = train_test_split(X,y)
# C是参数sme项系数, 值越大正则化力度越小;penalty是正则化类型;
# 使用ovo进行多分类参数mulity_class='multinomial', solver='newton_cg';默认使用ovr

clf = LogisticRegression(C=1.0, penalty=‘l2’, )
clf = LogisticRegression(C=1.0, penalty=‘l2’, multi_class=‘ovr’)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print(score)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值