步骤等同于单项式逻辑回归,不同点为边界函数发生改变,本文将他设定为:#多项式边界函数 theta0+theta1*x1+theta2*x2+theta3*x1^2+theta4*x2^2+theta5*x1*x2
#加载数据
import pandas as pd
import numpy as np
data = pd.read_csv('examdata.csv')
data.head()
#原始数据绘图
from matplotlib import pyplot as plt
fig1=plt.figure()
plt.scatter(data.loc[:,'Exam1'],data.loc[:,'Exam2'])
plt.title('Exam1-Exam2')
plt.xlabel('Exam1')
plt.ylabel('Exam2')
plt.show()
#增加标签mask
mask = data.loc[:,'Pass'] == 1#如果右侧等于1
print(mask)
fig2=plt.figure()
passed = plt.scatter(data.loc[:,'Exam1'][mask],data.loc[:,'Exam2'][mask])
failed = plt.scatter(data.loc[:,'Exam1'][~mask],data.loc[:,'Exam2'][~mask])
plt.title('Exam1-Exam2')
plt.xlabel('Exam1')
plt.ylabel('Exam2')
plt.legend((passed,failed),('passed','failed'))
plt.show()
X = data.drop(['Pass'],axis=1)
y = data.loc[:,'Pass']
x1 = data.loc[:,'Exam1']
x2 = data.loc[:,'Exam2']
X.head()
#多项式边界函数 theta0+theta1*x1+theta2*x2+theta3*x1^2+theta4*x2^2+theta5*x1*x2
x1_2 = x1 * x1
x2_2 = x2 * x2
x1_x2 = x1 * x2
x_new = {'x1':x1,'x2':x2,'x1_2':x1_2,'x2_2':x2_2,'x1_x2':x1_x2}
x_new = pd.DataFrame(x_new)#二维表格形数据专户
x_new.head()
#模型
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression()
LR.fit(x_new,y)
#预测
y_predict = LR.predict(x_new)
#计算评估系数
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y,y_predict)
print(accuracy)
#2阶需要对x排序不然会乱
x1_new = x1.sort_values()
print(x1_new)
#系数
theta0 = LR.intercept_
theta1,theta2,theta3,theta4,theta5 = LR.coef_[0][0],LR.coef_[0][1],LR.coef_[0][2],LR.coef_[0][3],LR.coef_[0][4]
a = theta4
b = theta5 * x1_new + theta2
c = theta0 + theta1 * x1_new + theta3 * x1_new * x1_new
x2_new_boubdary = (-b + np.sqrt(b * b - 4 * a *c)) / (2 * a)
print(x2_new_boubdary)
#绘图
plt.figure(figsize=(10,5))
#原始数据散点
passed = plt.scatter(data.loc[:,'Exam1'][mask],data.loc[:,'Exam2'][mask])
failed = plt.scatter(data.loc[:,'Exam1'][~mask],data.loc[:,'Exam2'][~mask])
plt.title('Exam1-Exam2')
plt.xlabel('Exam1')
plt.ylabel('Exam2')
plt.legend((passed,failed),('passed','failed'))
plt.plot(x1_new,x2_new_boubdary)
plt.show()
运行结果:
预测率:
模型对比图: