机器学习之逻辑回归 3.4实战（一）

最新推荐文章于 2023-06-27 15:01:08 发布

jy_mx

最新推荐文章于 2023-06-27 15:01:08 发布

阅读量206

点赞数

分类专栏：人工智能程序设计文章标签：机器学习逻辑回归人工智能

本文链接：https://blog.csdn.net/jy_mx/article/details/129923251

版权

人工智能程序设计专栏收录该内容

4 篇文章 0 订阅

订阅专栏

实战（一）：考试通过预测

1、基于examdata.csv数据，建立逻辑
回归模型，评估模型表现：
2、预测EXam1=75,Exam2=60时，该同
学能否通过Exam3
3、建立二阶边界函数，重复任务1、2

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.metrics import accuracy_score
# 数据加载
data = pd.read_csv(r'D:/tencent/qicq/exam_data.csv')
data.head()

# Visualize the data
fig1 = plt.figure()
plt.scatter(data.loc[:,'Exam1'],data.loc[:,'Exam2'])
plt.title('Exam1-Exam2')
plt.xlabel('Exam1')
plt.ylabel('Exam2')
plt.show()

# add lable mask
mask = data.loc[:,'Pass'] == 1
print(f'{mask}')
fig2 = plt.figure()
plt.scatter(data.loc[:,'Exam1'][mask],data.loc[:,'Exam2'][mask])
plt.scatter(data.loc[:,'Exam1'][~mask],data.loc[:,'Exam2'][~mask])
plt.title('Exam1-Exam2')
plt.xlabel('Exam1')
plt.ylabel('Exam2')
plt.show()

# define X, y
X = data.drop(['Pass'],axis=1)
y = data.loc[:,'Pass'] 

y.head()
X.head()
print(X.shape,y.shape,type(X),type(y))

# establish the model and train it
LR = LogisticRegression()
LR.fit(X,y)

# show the predicted results and its accuracy
y_predict = LR.predict(X)
print(y_predict)

# evaluate the model
accuracy = accuracy_score(y,y_predict)
print(f'accuracy:{accuracy}\n')

# task2,when predict Exam1 = 75,Exam2 = 60,the student whthere or not pass
y_test = LR.predict([[75,60]])
print(y_test)
print('passed'if y_test == 1 else 'failed')

theta0 = LR.intercept_
theta1,theta2 = LR.coef_[0][0],LR.coef_[0][1]
print(theta0,theta1,theta2)
X1 = data.loc[:,'Exam1']
X2 = data.loc[:,'Exam2']
X1.head()
X2_new = -(theta0+theta1*X1)/theta2
print(X2_new)

fig3 = plt.figure()
plt.plot(X1,X2_new)
plt.show()

fig4 = plt.figure()
plt.scatter(data.loc[:,'Exam1'][mask],data.loc[:,'Exam2'][mask])
plt.scatter(data.loc[:,'Exam1'][~mask],data.loc[:,'Exam2'][~mask])
plt.plot(X1,X2_new)
plt.title('Exam1-Exam2')
plt.xlabel('Exam1')
plt.ylabel('Exam2')
plt.show()

二阶边界函数

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.metrics import accuracy_score
# 数据加载
data = pd.read_csv(r'D:/tencent/qicq/exam_data.csv')
data.head()
X1 = data.loc[:,'Exam1']
X2 = data.loc[:,'Exam2']
y = data.loc[:,'Pass']

# creat the new data
X1_2 = X1*X1
X2_2 = X2*X2
X1_X2 = X1*X2
X_new = {'X1':X1,'X2':X2,'X1_2':X1_2,'X2_2':X2_2,'X1_X2':X1_X2}
X_new = pd.DataFrame(X_new)
print(X_new)

# Establish the new model and train
LR2 = LogisticRegression()
LR2.fit(X_new,y)
# Evaluate the model 
y2_predict = LR2.predict(X_new)
accuarcy2 = accuracy_score(y,y2_predict)
print(f'accuarcy2 : {accuarcy2}')
theat0 = LR2.intercept_
theat1,theat2,theat3,theat4,theat5 = LR2.coef_[0][0],LR2.coef_[0][1],LR2.coef_[0][2],LR2.coef_[0][3],LR2.coef_[0][4]
print(theat0,theat1,theat2,theat3,theat4,theat5)
X1_new = X1.sort_values()
print(X1,X1_new)

a = theat4
b = theat5*X1_new+theat2
c = theat0 + theat1*X1_new+theat3*X1_new*X1_new
X2_new_boundary = (-b+np.sqrt(b*b-4*a*c))/(2*a)
print(f'X2_new_boundary : {X2_new_boundary}')

fig4 = plt.figure()
plt.plot(X1_new,X2_new_boundary,'-g')
plt.show()

mask = y == 1
fig5 = plt.figure()
passed = plt.scatter(data.loc[:,'Exam1'][mask],data.loc[:,'Exam2'][mask])
failed = plt.scatter(data.loc[:,'Exam1'][~mask],data.loc[:,'Exam2'][~mask])
plt.plot(X1_new,X2_new_boundary)
plt.title('Exam1-Exam2')
plt.xlabel('Exam1')
plt.ylabel('Exam2')
plt.legend((passed,failed),('passed','failed'))
plt.plot(X1_new,X2_new_boundary,'-m')
plt.show()