机器学习之逻辑回归--实战(三)

*逻辑回归预测是否购买*
1)基于HomeWork LosisticRegression.csv数据,以Age和EstimatedSalaryi为变量,建立逻辑回归模型(二阶边界),评估模型表现
2)以函数形式求解边界曲线
3)描绘出完整的决策边界曲线

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.metrics import accuracy_score

# load data;visualization
data = pd.read_csv(r'D:/tencent/qicq/HomeWork_LogisticRegression.csv')
data.head()

mask = data.loc[:,'Purchased']==1

fig1 = plt.figure()
passed = plt.scatter(data.loc[:,'Age'][mask],data.loc[:,'EstimatedSalary'][mask])
failed = plt.scatter(data.loc[:,'Age'][~mask],data.loc[:,'EstimatedSalary'][~mask])
plt.title('Decision Boundary')
plt.xlabel('Age')
plt.ylabel('EstimatedSalary')
plt.legend((passed,failed),('passed','failed'))
plt.show()
# define X,y
x = data.drop(labels=['Purchased','User ID','Gender'],axis=1)
y = data.loc[:,'Purchased']
X1 = data.loc[:,'Age']
X2 = data.loc[:,'EstimatedSalary']

# generate the new data
X1_2 = X1*X1
X2_2 = X2*X2
X1_X2 = X1*X2
X_new = {'X1':X1,'X2':X2,'X1_2':X1_2,'X2_2':X2_2,'X1_X2':X1_X2}
X_new = pd.DataFrame(X_new)
X_new.head()
print(X_new)

# Establish the new model and train
LR2 = LogisticRegression(max_iter=10000)
LR2.fit(X_new,y)

# Evaluate the model 
y2_predict = LR2.predict(X_new)
accuarcy2 = accuracy_score(y,y2_predict)
print(f'accuarcy2 : {accuarcy2}')
#boundary
X1_new = X1.sort_values()
print(X1,X1_new)
theat0 = LR2.intercept_
theat1,theat2,theat3,theat4,theat5 = LR2.coef_[0][0],LR2.coef_[0][1],LR2.coef_[0][2],LR2.coef_[0][3],LR2.coef_[0][4]
print(theat0,theat1,theat2,theat3,theat4,theat5)
a = theat4
b = theat5*X1_new+theat2
c = theat0 + theat1*X1_new+theat3*X1_new*X1_new
X2_new_boundary = (-b+np.sqrt(b*b-4*a*c))/(2*a)
X2_new_boundary2 = (-b-np.sqrt(b*b-4*a*c))/(2*a)
print(f'X2_new_boundary : {X2_new_boundary}')

fig2 = plt.figure()
passed = plt.scatter(data.loc[:,'Age'][mask],data.loc[:,'EstimatedSalary'][mask])
failed = plt.scatter(data.loc[:,'Age'][~mask],data.loc[:,'EstimatedSalary'][~mask])
plt.plot(X1_new,X2_new_boundary)
plt.plot(X1_new,X2_new_boundary2)
plt.title('Decision Boundary')
plt.xlabel('Age')
plt.ylabel('EstimatedSalary')
plt.legend((passed,failed),('passed','failed'))
plt.plot(X1_new,X2_new_boundary,'-m')
plt.show()
# define f(x)
def f(x):
    theat0 = LR2.intercept_
    theat1,theat2,theat3,theat4,theat5 = LR2.coef_[0][0],LR2.coef_[0][1],LR2.coef_[0][2],LR2.coef_[0][3],LR2.coef_[0][4]
    a = theat4
    b = theat5*x+theat2
    c = theat0 + theat1*x+theat3*x*x
    X2_new_boundary = (-b+np.sqrt(b*b-4*a*c))/(2*a)
    X2_new_boundary2 = (-b-np.sqrt(b*b-4*a*c))/(2*a)
    return X2_new_boundary,X2_new_boundary2
X2_boundary1 = []
X2_boundary2 = []
for x in X1_new:
    X2_boundary1.append(f(x)[0])
    X2_boundary2.append(f(x)[1])

# 完整决策边界
X1_range = np.linspace(start=X1.min()-1, stop=X1.max()+1, num=10000)
X2_boundary3 = []
X2_boundary4 = []
for x in X1_range:
    X2_boundary3.append(f(x)[0])
    X2_boundary4.append(f(x)[1])
fig4 = plt.figure()
passed = plt.scatter(data.loc[:,'Age'][mask],data.loc[:,'EstimatedSalary'][mask])
failed = plt.scatter(data.loc[:,'Age'][~mask],data.loc[:,'EstimatedSalary'][~mask])
plt.plot(X1_range,X2_boundary3)
plt.plot(X1_range,X2_boundary4)
plt.title('Decision Boundary')
plt.xlabel('Age')
plt.ylabel('EstimatedSalary')
plt.legend((passed,failed),('Purchased','Not Purchased'))
plt.show()

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值