机器学习4：逻辑回归

最新推荐文章于 2022-01-05 19:13:12 发布

马踏飞燕&lin_li

最新推荐文章于 2022-01-05 19:13:12 发布

阅读量318

点赞数 1

分类专栏：机器学习文章标签：机器学习逻辑回归

本文链接：https://blog.csdn.net/matafeiyanll/article/details/104139781

版权

机器学习专栏收录该内容

20 篇文章 2 订阅

订阅专栏

机器学习4：逻辑回归

逻辑回归

原理

决策边界

逻辑回归的代价函数

梯度下降法

多分类问题

逻辑回归正则化

正确率、召回率、F1

算法实现

梯度下降—逻辑回归手工实现

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report   #计算准确率召回率F1
from sklearn import preprocessing
#数据是否需要标准化
scale=False

#载入数据
data=np.genfromtxt("LR-testSet.csv",delimiter=',')
x_data=data[:,:-1]
y_data=data[:,-1]

def plot():
    x0=[]
    x1=[]
    y0=[]
    y1=[]
    #切分不同类别的数据
    for i in range(len(x_data)):
        if y_data[i]==0:
            x0.append(x_data[i,0])
            y0.append(x_data[i,1])
        else:
            x1.append(x_data[i,0])
            y1.append(x_data[i,1])
    #画图
    scatter0=plt.scatter(x0,y0,c='b',marker='o')
    scatter1=plt.scatter(x1,y1,c='r',marker='x')
    #画图例
    plt.legend(handles=[scatter0,scatter1],labels=['label0','label1'],loc='best')
    
plot()
plt.show()

在这里插入图片描述

#数据处理，添加偏置项
x_data=data[:,:-1]
y_data=data[:,-1,np.newaxis]

print(np.mat(x_data).shape)
print(np.mat(y_data).shape)
#给样本添加偏置项
X_data=np.concatenate((np.ones((100,1)),x_data),axis=1)
print(X_data.shape)

(100, 2)
(100, 1)
(100, 3)

def sigmoid(x):
    return 1.0/(1+np.exp(-x))

def cost(xMat,yMat,ws):
    left = np.multiply(yMat, np.log(sigmoid(xMat*ws)))   #按位相乘
    right = np.multiply(1 - yMat, np.log(1 - sigmoid(xMat*ws)))
    return np.sum(left + right) / -(len(xMat))

def gradAscent(xArr,yArr):
    if scale==True:
        xArr=preprocessing.scale(xArr)
    xMat=np.mat(xArr)
    yMat=np.mat(yArr)
    
    lr=0.001
    epochs = 10000
    costList = []
    # 计算数据行列数
    # 行代表数据个数，列代表权值个数
    m,n = np.shape(xMat)
    # 初始化权值
    ws = np.mat(np.ones((n,1)))
    
    for i in range(epochs+1):             
        # xMat和weights矩阵相乘
        h = sigmoid(xMat*ws)   
        # 计算误差
        ws_grad = xMat.T*(h - yMat)/m
        ws = ws - lr*ws_grad 
        
        if i % 50 == 0:
            costList.append(cost(xMat,yMat,ws))
    return ws,costList

# 训练模型，得到权值和cost值的变化
ws,costList = gradAscent(X_data, y_data)
print(ws)

[[ 2.05836354]
[ 0.3510579 ]
[-0.36341304]]

if scale == False:
    # 画图决策边界
    plot()
    x_test = [[-4],[3]]
    y_test = (-ws[0] - x_test*ws[1])/ws[2]
    plt.plot(x_test, y_test, 'k')
    plt.show()

在这里插入图片描述

# 画图 loss值的变化
x = np.linspace(0,10000,201)   #10000/50
plt.plot(x, costList, c='r')
plt.title('Train')
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.show()

在这里插入图片描述

# 预测
def predict(x_data, ws):
    if scale == True:
        x_data = preprocessing.scale(x_data)
    xMat = np.mat(x_data)
    ws = np.mat(ws)
    return [1 if x >= 0.5 else 0 for x in sigmoid(xMat*ws)]

predictions = predict(X_data, ws)

print(classification_report(y_data, predictions))

在这里插入图片描述

逻辑回归-sklearn

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from sklearn import preprocessing
from sklearn import linear_model
# 数据是否需要标准化
scale = False

# 载入数据
data = np.genfromtxt("LR-testSet.csv", delimiter=",")
x_data = data[:,:-1]
y_data = data[:,-1]
    
def plot():
    x0 = []
    x1 = []
    y0 = []
    y1 = []
    # 切分不同类别的数据
    for i in range(len(x_data)):
        if y_data[i]==0:
            x0.append(x_data[i,0])
            y0.append(x_data[i,1])
        else:
            x1.append(x_data[i,0])
            y1.append(x_data[i,1])

    # 画图
    scatter0 = plt.scatter(x0, y0, c='b', marker='o')
    scatter1 = plt.scatter(x1, y1, c='r', marker='x')
    #画图例
    plt.legend(handles=[scatter0,scatter1],labels=['label0','label1'],loc='best')
    
plot()
plt.show()

在这里插入图片描述

logistic = linear_model.LogisticRegression()
logistic.fit(x_data, y_data)

if scale == False:
    # 画图决策边界
    plot()
    x_test = np.array([[-4],[3]])
    y_test = (-logistic.intercept_ - x_test*logistic.coef_[0][0])/logistic.coef_[0][1]
    plt.plot(x_test, y_test, 'k')
    plt.show()

在这里插入图片描述

predictions = logistic.predict(x_data)
print(classification_report(y_data, predictions))

在这里插入图片描述

梯度下降法-非线性逻辑回归

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures  #特征处理
# 数据是否需要标准化
scale = False

# 载入数据
data = np.genfromtxt("LR-testSet2.txt", delimiter=",")
x_data = data[:,:-1]
y_data = data[:,-1,np.newaxis]
    
def plot():
    x0 = []
    x1 = []
    y0 = []
    y1 = []
    # 切分不同类别的数据
    for i in range(len(x_data)):
        if y_data[i]==0:
            x0.append(x_data[i,0])
            y0.append(x_data[i,1])
        else:
            x1.append(x_data[i,0])
            y1.append(x_data[i,1])

    # 画图
    scatter0 = plt.scatter(x0, y0, c='b', marker='o')
    scatter1 = plt.scatter(x1, y1, c='r', marker='x')
    #画图例
    plt.legend(handles=[scatter0,scatter1],labels=['label0','label1'],loc='best')
    
plot()
plt.show()

在这里插入图片描述

# 定义多项式回归,degree的值可以调节多项式的特征
poly_reg  = PolynomialFeatures(degree=3) 
# 特征处理
x_poly = poly_reg.fit_transform(x_data)

def sigmoid(x):
    return 1.0/(1+np.exp(-x))

def cost(xMat, yMat, ws):
    left = np.multiply(yMat, np.log(sigmoid(xMat*ws)))
    right = np.multiply(1 - yMat, np.log(1 - sigmoid(xMat*ws)))
    return np.sum(left + right) / -(len(xMat))

def gradAscent(xArr, yArr):
    
    if scale == True:
        xArr = preprocessing.scale(xArr)
    xMat = np.mat(xArr)
    yMat = np.mat(yArr)
    
    lr = 0.03
    epochs = 50000
    costList = []
    # 计算数据列数，有几列就有几个权值
    m,n = np.shape(xMat)
    # 初始化权值
    ws = np.mat(np.ones((n,1)))
    
    for i in range(epochs+1):             
        # xMat和weights矩阵相乘
        h = sigmoid(xMat*ws)   
        # 计算误差
        ws_grad = xMat.T*(h - yMat)/m
        ws = ws - lr*ws_grad 
        
        if i % 50 == 0:
            costList.append(cost(xMat,yMat,ws))
    return ws,costList

# 训练模型，得到权值和cost值的变化
ws,costList = gradAscent(x_poly, y_data)
print(ws)

[[ 4.16787292]
[ 2.72213524]
[ 4.55120018]
[-9.76109006]
[-5.34880198]
[-8.51458023]
[-0.55950401]
[-1.55418165]
[-0.75929829]
[-2.88573877]]

# 获取数据值所在的范围
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1

# 生成网格矩阵
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))

# np.r_按row来组合array， 
# np.c_按colunm来组合array
# >>> a = np.array([1,2,3])
# >>> b = np.array([5,2,5])
# >>> np.r_[a,b]
# array([1, 2, 3, 5, 2, 5])
# >>> np.c_[a,b]
# array([[1, 5],
#        [2, 2],
#        [3, 5]])
# >>> np.c_[a,[0,0,0],b]
# array([[1, 0, 5],
#        [2, 0, 2],
#        [3, 0, 5]])
z = sigmoid(poly_reg.fit_transform(np.c_[xx.ravel(), yy.ravel()]).dot(np.array(ws)))# ravel与flatten类似，多维数据转一维。flatten不会改变原始数据，ravel会改变原始数据
for i in range(len(z)):
    if z[i] > 0.5:
        z[i] = 1
    else:
        z[i] = 0
z = z.reshape(xx.shape)

# 等高线图
cs = plt.contourf(xx, yy, z)
plot() 
plt.show()

在这里插入图片描述

# 预测
def predict(x_data, ws):
#     if scale == True:
#         x_data = preprocessing.scale(x_data)
    xMat = np.mat(x_data)
    ws = np.mat(ws)
    return [1 if x >= 0.5 else 0 for x in sigmoid(xMat*ws)]

predictions = predict(x_poly, ws)

print(classification_report(y_data, predictions))

在这里插入图片描述

sklearn-非线性逻辑回归

import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.datasets import make_gaussian_quantiles  #生成数据
from sklearn.preprocessing import PolynomialFeatures

# 生成2维正态分布，生成的数据按分位数分为两类，500个样本,2个样本特征
# 可以生成两类或多类数据
x_data, y_data = make_gaussian_quantiles(n_samples=500, n_features=2,n_classes=2)

plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)  #把分类传到颜色中去,代码简单很多
plt.show()

在这里插入图片描述

logistic = linear_model.LogisticRegression()
logistic.fit(x_data, y_data)

# 定义多项式回归,degree的值可以调节多项式的特征
poly_reg  = PolynomialFeatures(degree=5) 
# 特征处理
x_poly = poly_reg.fit_transform(x_data)
# 定义逻辑回归模型
logistic = linear_model.LogisticRegression()
# 训练模型
logistic.fit(x_poly, y_data)

# 获取数据值所在的范围
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1

# 生成网格矩阵
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))

# np.r_按row来组合array， 
# np.c_按colunm来组合array
# >>> a = np.array([1,2,3])
# >>> b = np.array([5,2,5])
# >>> np.r_[a,b]
# array([1, 2, 3, 5, 2, 5])
# >>> np.c_[a,b]
# array([[1, 5],
#        [2, 2],
#        [3, 5]])
# >>> np.c_[a,[0,0,0],b]
# array([[1, 0, 5],
#        [2, 0, 2],
#        [3, 0, 5]])
z = logistic.predict(poly_reg.fit_transform(np.c_[xx.ravel(), yy.ravel()]))# ravel与flatten类似，多维数据转一维。flatten不会改变原始数据，ravel会改变原始数据
z = z.reshape(xx.shape)
# 等高线图
cs = plt.contourf(xx, yy, z)
# 样本散点图
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()

print('score:',logistic.score(x_poly,y_data))

在这里插入图片描述

马踏飞燕&lin_li

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
机器学习4：逻辑回归

机器学习4：逻辑回归逻辑回归原理决策边界逻辑回归的代价函数梯度下降法多分类问题逻辑回归正则化正确率、召回率、F1算法实现梯度下降—逻辑回归手工实现import matplotlib.pyplot as pltimport numpy as npfrom sklearn.metrics import classification_report ...
复制链接

扫一扫

专栏目录