python读取mysql实现一元和多元的线性拟合

最新推荐文章于 2021-10-26 07:42:06 发布

安然烟火

最新推荐文章于 2021-10-26 07:42:06 发布

阅读量419

点赞数 1

分类专栏：数据挖掘

本文链接：https://blog.csdn.net/qq_30868737/article/details/104041600

版权

数据挖掘专栏收录该内容

27 篇文章 19 订阅

订阅专栏

python读取mysql实现一元和多元的线性拟合

一元线性方程拟合

# -*- coding:utf-8 -*-
# __author__ = "LQ"
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from DataAnalysis.TeachingModel.dbc import dbcConnect

def main(examDf,colsName):

    # 绘制散点图,examDf.jt为X轴，examDf.hk为Y轴
    plt.scatter(examDf[colsName[0]], examDf[colsName[1]], color='darkgreen', label="Exam Data")

    # 添加图的标签（x轴，y轴）
    plt.xlabel(colsName[0])  # 设置X轴标签
    plt.ylabel(colsName[1])  # 设置Y轴标签
    plt.show()  # 显示图像

    rDf = examDf.corr()  # 查看数据间的相关系数
    print("相关系数:", rDf)

    # 拆分训练集和测试集（train_test_split是存在与sklearn中的函数）
    X_train, X_test, Y_train, Y_test = train_test_split(examDf[colsName[0]], examDf[colsName[1]], train_size=0.8)
    # train为训练数据,test为测试数据,examDf为源数据,train_size 规定了训练数据的占比

    print("自变量---源数据:", examDf[colsName[0]].shape, "；  训练集:", X_train.shape, "；  测试集:", X_test.shape)
    print("因变量---源数据:", examDf[colsName[1]].shape, "；  训练集:", Y_train.shape, "；  测试集:", Y_test.shape)

    # 调用线性规划包
    model = LinearRegression()

    # 在这里加一段
    X_train = X_train.values.reshape(-1, 1)
    X_test = X_test.values.reshape(-1, 1)

    # 线性回归训练
    model.fit(X_train, Y_train)  # 调用线性回归包

    a = model.intercept_  # 截距
    b = model.coef_  # 回归系数

    # 训练数据的预测值
    y_train_pred = model.predict(X_train)
    # 绘制最佳拟合线：标签用的是训练数据的预测值y_train_pred
    plt.plot(X_train, y_train_pred, color='blue', linewidth=2, label="best line")

    # 测试数据散点图
    plt.scatter(X_train, Y_train, color='darkgreen', label="train data")
    plt.scatter(X_test, Y_test, color='red', label="test data")

    # 添加图标标签
    plt.legend(loc=2)  # 图标位于左上角，即第2象限，类似的，1为右上角，3为左下角，4为右下角
    plt.xlabel(colsName[0])  # 添加 X 轴名称
    plt.ylabel(colsName[1])  # 添加 Y 轴名称
    plt.show()  # 显示图像

    print("拟合参数:截距", a, ",回归系数：", b)
    print("最佳拟合线: Y = ", round(a, 2), "+", round(b[0], 2), "* X")  # 显示线性方程，并限制参数的小数位为两位

if __name__ == '__main__':
    sql = '''SELECT
    	total_log_num,
    	getscore
    FROM
    	t_study_behavior_bak
    	'''
    df = dbcConnect.selectDf(sql)
    colsName=['total_log_num','getscore']
    main(df,colsName)

多元线性方程拟合

# -*- coding:utf-8 -*-
# __author__ = "LQ"
import pymysql
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from DataAnalysis.TeachingModel.dbc import dbcConnect
from sklearn import preprocessing

def main(d,yName):
      # 检验数据
      print(d.describe())  # 数据描述，会显示最值，平均数等信息，可以简单判断数据中是否有异常值
      print(d[d.isnull() == True].count())  # 检验缺失值，若输出为0，说明该列没有缺失值

      # 输出相关系数，判断是否值得做线性回归模型
      print(d.corr())  # 0-0.3弱相关；0.3-0.6中相关；0.6-1强相关；

      # 拆分训练集和测试集
      X_train, X_test, Y_train, Y_test = train_test_split(d.ix[:, :5], d.getscore, train_size=0.8)
      # new_examDf.ix[:,:2]取了数据中的前两列为自变量，此处与单变量的不同

      print("自变量---源数据:", d.ix[:, :5].shape, "；  训练集:", X_train.shape, "；  测试集:", X_test.shape)
      print("因变量---源数据:", d[yName].shape, "；  训练集:", Y_train.shape, "；  测试集:", Y_test.shape)

      # 调用线性规划包
      model = LinearRegression()

      model.fit(X_train, Y_train)  # 线性回归训练

      a = model.intercept_  # 截距
      b = model.coef_  # 回归系数
      print("拟合参数:截距", a, ",回归系数：", b)
      for b0 in b:
            print(b0)
      # 显示线性方程，并限制参数的小数位为两位
      print("最佳拟合线: Y = ", round(a, 2), "+", round(b[0], 2), "* X1", "+", round(b[1], 2), "* X2", "+", round(b[2], 2), "* X3", "+", round(b[3], 2), "* X4", "+", round(b[4], 2), "* X5")

      Y_pred = model.predict(X_test)  # 对测试集数据，用predict函数预测

      plt.plot(range(len(Y_pred)), Y_pred, 'red', linewidth=2.5, label="predict data")
      plt.plot(range(len(Y_test)), Y_test, 'green', label="test data")
      plt.legend(loc=2)
      plt.show()  # 显示预测值与测试值曲线

      # 均方差
      # 查看残差平方的均值(mean square error,MSE)
      print("Mean squared error: %.2f"
            % mean_squared_error(Y_test, Y_pred))

      # Explained variance score: 1 is perfect prediction
      #  R2 决定系数（拟合优度）
      # 模型越好：r2→1
      # 模型越差：r2→0
      print('Variance score: %.2f' % r2_score(Y_test, Y_pred))

      # Plot outputs
      plt.scatter(range(len(Y_pred)), Y_test, color='black')
      plt.plot(range(len(Y_test)), Y_pred, color='blue', linewidth=3)

      plt.xticks(())
      plt.yticks(())

      plt.show()
if __name__ == '__main__':
      sql = '''SELECT
	study_document_num,
	actual_brain_num,
	actual_discuss_num,
	actual_quiz_num,
	actual_homework_num,
	getscore
	FROM
		t_study_behavior_rate
      '''
      conn=dbcConnect.dbcconnect()
      cursor = conn.cursor()
      cursor.execute(sql)
      # 获取剩余结果所有数据
      results = cursor.fetchall()
      # 获取列名
      cols = [i[0] for i in cursor.description]
      # sql内表转换pandas的DF
      df = pd.DataFrame(np.array(results), columns=cols).astype(float)
      conn.commit()
      cursor.close()
      # 归一化处理
      min_max_scaler = preprocessing.MinMaxScaler()
      df_minMax = min_max_scaler.fit_transform(df)
      examDf = pd.DataFrame(df_minMax, columns=cols).astype(float)
      yName = 'getscore'
      main(examDf,yName)

自定义方法dbcConnect
在下面博客中
https://blog.csdn.net/qq_30868737/article/details/103995174

安然烟火

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
python读取mysql实现一元和多元的线性拟合

python读取mysql实现一元和多元的线性拟合一元线性方程拟合# -*- coding:utf-8 -*-# __author__ = "LQ"import matplotlib.pyplot as pltfrom sklearn.linear_model import LinearRegressionfrom sklearn.model_selection import trai...
复制链接

扫一扫

专栏目录