逻辑回归

最新推荐文章于 2023-08-20 10:53:11 发布

Jim1235

最新推荐文章于 2023-08-20 10:53:11 发布

阅读量144

点赞数

文章标签： python 机器学习

本文链接：https://blog.csdn.net/Jim1235/article/details/103980698

版权

逻辑回归

使用三种方法实现：
1、使用sklearn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

df_X = pd.read_csv('./logistic_x.txt', sep='\ +',header=None, engine='python') #读取X值
ys = pd.read_csv('./logistic_y.txt', sep='\ +',header=None, engine='python') #读取y值
ys = ys.astype(int)
df_X['label'] = ys[0].values #将X按照y值的结果一一打标签
ax = plt.axes()
#在二维图中描绘X点所处位置，直观查看数据点的分布情况
df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')
#提取用于学习的数据
Xs = df_X[[0, 1]].values
Xs = np.hstack([np.ones((Xs.shape[0], 1)), Xs]) 
ys = df_X['label'].values

from __future__ import print_function
import numpy as np
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(fit_intercept=False) #因为前面已经将截距项的值合并到变量中，此处参数设置不需要截距项
lr.fit(Xs, ys) #拟合
score = lr.score(Xs, ys) #结果评价
print("Coefficient: %s" % lr.coef_)
print("Score: %s" % score)

ax = plt.axes()

df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

_xs = np.array([np.min(Xs[:,1]), np.max(Xs[:,1])])
#将数据以二维图形式描点，并用学习得出的参数结果作为阈值，划分数据区域
_ys = (lr.coef_[0][0] + lr.coef_[0][1] * _xs) / (- lr.coef_[0][2])
plt.plot(_xs, _ys, lw=1)

2、用梯度下降法将相同的数据分类，观察与sklearnd的不同

class LGR_GD():
    def __init__(self):
        self.w = None 
        self.n_iters = None
    def fit(self,X,y,alpha=0.03,loss = 1e-10): # 设定步长为0.002，判断是否收敛的条件为1e-10
        y = y.reshape(-1,1) #重塑y值的维度以便矩阵运算
        [m,d] = np.shape(X) #自变量的维度
        self.w = np.zeros((1,d)) #将参数的初始值定为0
        tol = 1e5
        self.n_iters = 0

        while tol > loss: #设置收敛条件
        
             self.n_iters += 1 #更新迭代次数

    def predict(self, X):
        # 用已经拟合的参数值预测新自变量
        y_pred = X.dot(self.w)
        return y_pred 

if __name__ == "__main__":
    lr_gd = LGR_GD()
    lr_gd.fit(Xs,ys)

    ax = plt.axes()

    df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
    df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

    _xs = np.array([np.min(Xs[:,1]), np.max(Xs[:,1])])
    _ys = (lr_gd.w[0][0] + lr_gd.w[0][1] * _xs) / (- lr_gd.w[0][2])
    plt.plot(_xs, _ys, lw=1

3、用牛顿法实现

import numpy as npimport matplotlib.pyplot as pltfrom file2matrix import file2matrixfrom sigmoid import sigmoidfrom compute_loss import compute_loss

a=np.diag(np.array([1,2]))
print(a)

def nt(x,y,theta,iterations=100):   
  n,m=x.shape  
  J_loss=[]  
  orig_loss=np.inf  
  real_iter=0
  for i in range(iterations):
     l=compute_loss(x,y,theta)       
     J_loss.append(l)     
     h=sigmoid(np.dot(x,theta))
     j_first_order=1/n*np.dot(x.T,h-y)
     j_second_order=1/n*np.dot(np.dot(np.dot(x.T,np.diag(h.reshape(n))),np.diag(1-h.reshape(n))),x)#(m,m)    
     theta=theta-np.dot(np.linalg.inv(j_second_order),j_first_order)
     if orig_loss-l<0.001:         
         real_iter=i+1       
         break
      orig_loss=l
    return theta,J_loss,real_iter
 
 if __name__=="__main__":  
   X = file2matrix('./ex4x.dat')
   y = file2matrix('./ex4y.dat', 1)
   
   n, m = X.shape   
   X = np.column_stack((np.ones(n), X))    # print(X.shape)  
   m = m + 1 
   theta = np.zeros((m, 1))

   theta, J_his,real_iter = nt(X, y, theta)  
   print(real_iter)

   print("theta", theta)    
   print("J", J_his)  
   plt.xlabel("iteration")   
   plt.ylabel("J")  
   plt.plot(np.arange(real_iter), J_his)   
   plt.show()

   pos = list(np.where(y == 1.0)[0])  
   X_pos = X[pos,1:3]  
   neg = list(np.where(y == 0.0)[0])   
   X_neg = X[neg,1:3]  
   plt.plot(X_pos[:, 0], X_pos[:, 1], '+', label='admitted')  
   plt.plot(X_neg[:, 0], X_neg[:, 1], 'o', label='Not admitted')   
   plt.xlabel("exam1 score")    
   plt.ylabel("exam2 score") 
   plt.legend()
   
   xx = np.linspace(20, 70, 6)   
   yy = []  
   for i in xx:
         res = (i * -(theta[1][0]) - (theta[0][0])) / (theta[2][0])     
         yy.append(res)
   plt.plot(xx, yy)   
   plt.show()

参考文献

1、https://www.cnblogs.com/danning13/articles/7410470.html
2、https://blog.csdn.net/u010953266/article/details/78551348
3、https://blog.csdn.net/m0_37393514/article/details/82708285