机器学习-逻辑回归（二阶）-考试通过问题

Yu_Chen__

已于 2023-06-28 14:45:58 修改

阅读量137

点赞数

文章标签：机器学习逻辑回归 python Powered by 金山文档

于 2023-02-28 21:26:17 首次发布

本文链接：https://blog.csdn.net/Yu_Chen__/article/details/129270461

版权

import pandas as  pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

if __name__ == '__main__':
    data = pd.read_csv("D:\\GoogleWebDown\\testdata.csv")
    # print(data)
    # figure = plt.figure()
    # plt.scatter(data.loc[:, 'Exam1'], data.loc[:, 'Exam2'])
    # plt.title('Exam1-Exam2')
    # plt.xlabel('Exam1')
    # plt.ylabel('Exam2')
    # # plt.show()
    # # pass=1赋值给PASS变量
    mask = data.loc[:, 'Pass'] == 1
    # # print(mask)
    # figure2 = plt.figure()
    # passed = plt.scatter(data.loc[:, 'Exam1'][mask], data.loc[:, 'Exam2'][mask])
    # failed = plt.scatter(data.loc[:, 'Exam1'][~mask], data.loc[:, 'Exam2'][~mask])
    # plt.title('first-Exam1-Exam2')
    # plt.xlabel('Exam1')
    # plt.ylabel('Exam2')
    # plt.legend((passed, failed), ('passed', 'failed'))
    #plt.show()

    # 将Exam1和Exam2作为x变量，Pass作为y变量，调用sklearn的逻辑回归模型，训练数据
    x = data.drop(['Pass'], axis=1)
    y = data.loc[:, 'Pass']

    x1 = data.loc[:, 'Exam1']
    x2 = data.loc[:, 'Exam2']

    LG = LogisticRegression()
    LG.fit(x,y)
    y_predict = LG.predict(x)
    #print(y_predict)
    accuracy_score_1 = accuracy_score(y,y_predict)
    print('accuracy_score_1:\n',accuracy_score_1)
    # theta0 + theta1 * x1 + theta2 * x2 = 0
    theta0 = LG.intercept_
    theta1,theta2 = LG.coef_[0][0],LG.coef_[0][1]
    #print(theta0,theta1,theta2)
    x2_new = -(theta0+theta1*x1) / theta2
    print(x2_new)
    fih3 = plt.figure()
    passed = plt.scatter(data.loc[:, 'Exam1'][mask], data.loc[:, 'Exam2'][mask])
    failed = plt.scatter(data.loc[:, 'Exam1'][~mask], data.loc[:, 'Exam2'][~mask])
    plt.plot(x1,x2_new)
    plt.title('Exam1-Exam2')
    plt.xlabel('Exam1')
    plt.ylabel('Exam2')
    plt.legend((passed, failed), ('passed', 'failed'))
    plt.show()

    # 预测Exam1 = 75，Exam2 = 60时，该同学能否通过Exam3
    # y_test = LG.predict([[75,50]])
    # print('Pass' if y_test==1 else 'Failed')
    # 二阶边界函数
    X1_X1 = x1 * x1
    # print(X1_X1)
    X2_X2 = x2 * x2
    X1_X2 = x1 * x2
    X_new = {'X1': x1, 'X2': x2, 'X1_X1': X1_X1, 'X2_X2': X2_X2, 'X1_X2': X1_X2}
    X_new = pd.DataFrame(X_new)
    # print(X_new)
    LR2 = LogisticRegression()
    LR2.fit(X_new, y)
    y_predict2 = LR2.predict(X_new)
    score = accuracy_score(y, y_predict2)
    print('二阶score:\n',score)
    print('before sort X1_new:\n',x1)
    X1_new = np.sort(x1)
    print('after sort X1_new:\n', X1_new)
    theta0 = LR2.intercept_
    theta1, theta2, theta3, theta4, theta5 = LR2.coef_[0][0], LR2.coef_[0][1], LR2.coef_[0][2], LR2.coef_[0][3], \
                                             LR2.coef_[0][4]
    a = theta4
    b = theta5 * X1_new + theta2
    c = theta0 + theta1 * X1_new + theta3 * X1_new*X1_new
    X2_new_boundary = (-b + np.sqrt(b * b - 4 * a * c)) / (2 * a)
    print(X2_new_boundary)

    figure11 = plt.figure()
    passed = plt.scatter(data.loc[:, 'Exam1'][mask], data.loc[:, 'Exam2'][mask])
    failed = plt.scatter(data.loc[:, 'Exam1'][~mask], data.loc[:, 'Exam2'][~mask])
    plt.plot(X1_new, X2_new_boundary)
    plt.title('Exam1-Exam2-2阶')
    # plt.xlabel('Exam1')
    plt.ylabel('Exam2')
    plt.legend((passed, failed), ('passed', 'failed'))
    plt.show()

    #预测
    x11 = 75
    x22 = 50
    X11_X11 = x11 * x11
    X22_X22 = x22 * x22
    X11_X22 = x11 * x22
    X_new2 = {'X1': x11, 'X2': x22, 'X1_X1': X11_X11, 'X2_X2': X22_X22, 'X1_X2': X11_X22}
    X_new2 = pd.DataFrame(X_new2,index=[0])
    print('X_new2:::',X_new2)

    y_test = LR2.predict(X_new2)
    print('Pass' if y_test==1 else 'Failed')