导入数据
处理数据
训练模型 一阶函数
训练模型 二阶边界函数
预测准确率为1 画图也看得出拟合的比一阶的好
#%%
import pandas as pd
import numpy as np
data = pd.read_csv("examdata.csv")
data.head()
#%%
%matplotlib inline
from matplotlib import pyplot as plt
fig1 = plt.figure()
plt.scatter(data.loc[:,'Exam1'],data.loc[:,"Exam2"])
plt.title("Exam1-Exam2")
plt.xlabel("Exam1")
plt.ylabel("Exam2")
plt.show()
#%%
#add lable mask
#取通过的
mask = data.loc[:,'Pass']==1
print(mask)
#%%
#区分画点
from matplotlib import pyplot as plt
fig2 = plt.figure()
passed = plt.scatter(data.loc[:,'Exam1'][mask],data.loc[:,"Exam2"][mask])
failed = plt.scatter(data.loc[:,'Exam1'][~mask],data.loc[:,"Exam2"][~mask])
plt.title("Exam1-Exam2")
plt.xlabel("Exam1")
plt.ylabel("Exam2")
plt.legend((passed,failed),('passed','failed'))
plt.show()
#%% define X.y
X = data.drop(['Pass'],axis=1)
y = data.loc[:,'Pass']
X1 = data.loc[:,'Exam1']
X2 = data.loc[:,'Exam2']
#%%
#数据大小
print(X.shape,y.shape)
#%%
#训练模型
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression()
LR.fit(X,y)
#%%
#预测结果
y_predict = LR.predict(X)
y_predict
#%%
# 评估模型表现 (准确率)
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y,y_predict)
print(accuracy)
#越接近1越好
#%%
#预测
#exam1=70 exam2=65
y_test = LR.predict([[70,65]])
print('passed' if y_test==1 else 'failed')
#%%
#评估模型表现2
#直接画图看边界曲线
theta0 = LR.intercept_
theta1,theta2 = LR.coef_[0][0],LR.coef_[0][1]
X2_new = - (theta0 + theta1 * X1)/theta2
print(X2_new)
#%%
#绘图
fig3 = plt.figure()
passed = plt.scatter(data.loc[:,'Exam1'][mask],data.loc[:,"Exam2"][mask])
failed = plt.scatter(data.loc[:,'Exam1'][~mask],data.loc[:,"Exam2"][~mask])
plt.plot(X1,X2_new)
plt.title("Exam1-Exam2")
plt.xlabel("Exam1")
plt.ylabel("Exam2")
plt.legend((passed,failed),('passed','failed'))
plt.show()
#%% md
# 用二阶边界函数
#%%
X1_2 = X1 * X1
X2_2 = X2 * X2
X1_X2 = X1 * X2
#%%
#处理数据
X_new = {'X1':X1,'X2':X2,'X1_2':X1_2,'X2_2':X2_2,"X1_X2":X1_X2}
X_new = pd.DataFrame(X_new)
#%%
#训练新模型
LR2 = LogisticRegression()
LR2.fit(X_new,y)
#%%
#预测
y2_predict = LR2.predict(X_new)
accuracy2 = accuracy_score(y,y2_predict)
accuracy2
#%%
#要先对点排序 否则画图会点点连线错乱
X1_new = X1.sort_values()
X1_new
#%%
#绘图
theta0 = LR2.intercept_
theta1= LR2.coef_[0][0]
theta2= LR2.coef_[0][1]
theta3= LR2.coef_[0][2]
theta4= LR2.coef_[0][3]
theta5= LR2.coef_[0][4]
print(theta0)
a = theta4
b = theta5 * X1_new + theta2
c = theta0 + theta1 * X1_new + theta3 * X1_new * X1_new
X2_new_boundary = (-b + np.sqrt(b*b-4*a*c))/(2*a)
X2_new_boundary
#%%
#绘图
fig4 = plt.figure()
passed = plt.scatter(data.loc[:,'Exam1'][mask],data.loc[:,"Exam2"][mask])
failed = plt.scatter(data.loc[:,'Exam1'][~mask],data.loc[:,"Exam2"][~mask])
plt.plot(X1_new,X2_new_boundary)
plt.title("Exam1-Exam2")
plt.xlabel("Exam1")
plt.ylabel("Exam2")
plt.legend((passed,failed),('passed','failed'))
plt.show()
#%%