逻辑回归(logistics regression)
逻辑回归 = 线性回归 + sigmoid函数
sigmoid函数表达的是将样本分为1类的概率(二分类问题)
通过sigmoid函数表示p(y|x)的概率,通过极大似然估计将参数估计变成最优化求解问题,进而采用梯度下降法或者拟牛顿法解出最优解。
代码实现如下:
- logistic regression底层代码实现
# logistic回归梯度上升优化算法底层代码实现
import numpy as np
#读取数据
def loadDataSet(filname):
dataSet = []
labelSet = []
fr = open(filname)
for file in fr.readlines():
line = file.strip().split('/t')
dataSet.append([float(line[0]),float(line[1])])
labelSet.append(line[2])
return dataSet,labelSet
#导入sigmoid函数
def sigmoid(inX):
return 1.0/(1+np.exp(-inX))
#梯度上升法优化
def gradAscent(dataSet,labelSet):
dataMat = np.mat(dataSet) #将数据转化为矩阵100*3
labelMat = np.mat(labelSet).transpose() #数据转化为矩阵并转置
m,n = np.shape(dataMat) #矩阵的行数和列数
alpha = 0.001 #学习率(步长)
maxcycle = 500 #最大循环次数
theta = np.ones((n,1)) #参数(权重)
for i in range(maxcycle):
h = sigmoid(dataMat * theta)
error = labelMat - h
theta = theta + alpha * dataMat.transpose() * error
return theta
2.调用python包实现logistic regression
#直接调用python包
import pandas as pd
import numpy as np
import matplotlib as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
#1、读取数据
data = pd.read_csv('E:\\pythonProject\\iris.csv',header = 0)
data.drop_duplicates(inplace=True)
data.dropna(inplace=True)
#2、建立逻辑回归模型
#(1)构建x(特征向量)和y(标签列)
data['Species'] = data['Species'].map({"setosa": 0,"versicolor":1 ,"virginica": 2})
#选取数据类别为二分类
label = data[data['Species'] != 2]
feature_cols = label.iloc[:,1:3]
# print(feature_cols)
# print(label)
x = feature_cols
y = label['Species']
#(2)构建训练集和测试集
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=1)
#(3)构建逻辑回归模型并训练
model = LogisticRegression().fit(x_train,y_train)
#(4)输出模型训练结果
print('截距:',model.intercept_)
print('回归系数:',model.coef_)
#(5)评价模型准确率
print(model.score(x_test,y_test))
#参考:
1、《机器学习实战》
2、https://blog.csdn.net/weixin_42837961/article/details/104081627?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522161641645216780262586870%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.%2522%257D&request_id=161641645216780262586870&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2allfirst_rank_v2~rank_v29-1-104081627.pc_search_positive&utm_term=%E5%9B%9E%E5%BD%92%E5%88%86%E6%9E%90%E8%AF%A6%E8%A7%A3%E4%B8%8Epython%E5%AE%9E%E7%8E%B0