Python实现一元线性回归

总目录:Python数据分析整理

本文基本是对文章的整理,修改了一些我这个版本跑不通的地方,多加了一个模型保存部分而已。整理后用于之后使用。

原作者大佬文章地址:Python实现多元线性回归


数据分析

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pandas as pd

plt.rcParams['font.sans-serif'] = ['SimHei']

examDict = {'学习时间': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75,
                     2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50],
            '分数': [10, 22, 13, 43, 20, 22, 33, 50, 62,
                   48, 55, 75, 62, 73, 81, 76, 64, 82, 90, 93]}

# 转换为DataFrame的数据格式
examDf = DataFrame(examDict)

plt.scatter(examDf['学习时间'], examDf['分数'], color='b', label="Exam Data")
#
# 添加图的标签(x轴,y轴)
plt.xlabel("Hours")
plt.ylabel("Score")
# 显示图像
plt.legend(loc=2)
plt.title('整体展示')
plt.savefig('整体展示.jpg')
plt.show()

# 计算相关系数
# 0~0.3 弱相关
# 0.3~0.6  中等程度相关
# 0.6~1  强相关
rDf = examDf.corr()
print(rDf)

在这里插入图片描述

数据拆分

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pandas as pd

plt.rcParams['font.sans-serif'] = ['SimHei']

examDict = {'学习时间': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75,
                     2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50],
            '分数': [10, 22, 13, 43, 20, 22, 33, 50, 62,
                   48, 55, 75, 62, 73, 81, 76, 64, 82, 90, 93]}

# 转换为DataFrame的数据格式
examDf = DataFrame(examDict)

exam_X = examDf['学习时间']
exam_Y = examDf['分数']

# 将原数据集拆分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(exam_X, exam_Y, train_size=0.8, random_state=5)
# X_train为训练数据标签,X_test为测试数据标签,exam_X为样本特征,exam_y为样本标签,train_size 训练数据占比

print("X:原始数据特征:", exam_X.shape,
      ",训练数据特征:", X_train.shape,
      ",测试数据特征:", X_test.shape)

print(":原始数据标签:", exam_Y.shape,
      ",训练数据标签:", Y_train.shape,
      ",测试数据标签:", Y_test.shape)

# 散点图
plt.scatter(X_train, Y_train, color="blue", label="train data")
plt.scatter(X_test, Y_test, color="red", label="test data")

# 添加图标标签
plt.legend(loc=2)
plt.xlabel("Hours")
plt.ylabel("Score")
plt.title("数据拆分")
plt.savefig("数据拆分.jpg")
# 显示图像
plt.show()

在这里插入图片描述

数据建模

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pandas as pd

plt.rcParams['font.sans-serif'] = ['SimHei']

examDict = {'学习时间': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75,
                     2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50],
            '分数': [10, 22, 13, 43, 20, 22, 33, 50, 62,
                   48, 55, 75, 62, 73, 81, 76, 64, 82, 90, 93]}

# 转换为DataFrame的数据格式
examDf = DataFrame(examDict)

exam_X = examDf['学习时间']
exam_Y = examDf['分数']

# 将原数据集拆分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(exam_X, exam_Y, train_size=0.8, random_state=5)

model = LinearRegression()

# 对于模型错误我们需要把我们的训练集进行reshape操作来达到函数所需要的要求
# model.fit(X_train,Y_train)

# reshape如果行数=-1的话可以使我们的数组所改的列数自动按照数组的大小形成新的数组
# 因为model需要二维的数组来进行拟合但是这里只有一个特征所以需要reshape来转换为二维数组

X_train = X_train.values.reshape(-1, 1)
X_test = X_test.values.reshape(-1, 1)
# print(X_train)

# 开始拟合
model.fit(X_train, Y_train)
#
a = model.intercept_  # 截距
b = model.coef_  # 回归系数
print("最佳拟合线:截距", a, ",回归系数:", b)
print('Y = '+str(b)+' X + '+ str(a))

# 训练数据的预测值
y_train_pred = model.predict(X_train)

# 绘制最佳拟合线:标签用的是训练数据的预测值y_train_pred
plt.plot(X_train, y_train_pred, color='black', linewidth=1, label="best line")

# 测试数据散点图
plt.scatter(X_train, Y_train, color='b', label="train data")
plt.scatter(X_test, Y_test, color='r', label="test data")

# 添加图标标签 loc代表象限
plt.legend(loc=2)

# 添加横坐标,纵坐标
plt.xlabel("Hours")
plt.ylabel("Score")
plt.title('线性回归')

# 保存并显示图像
plt.savefig("best_line.jpg")
plt.show()

# score越接近1拟合程度越好 越接近0拟合程度越差
score = model.score(X_test, Y_test)
print(score)

在这里插入图片描述

模型保存

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pandas as pd
import pickle

examDict = {'学习时间': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75,
                     2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50],
            '分数': [10, 22, 13, 43, 20, 22, 33, 50, 62,
                   48, 55, 75, 62, 73, 81, 76, 64, 82, 90, 93]}

# 转换为DataFrame的数据格式
examDf = DataFrame(examDict)

exam_X = examDf['学习时间']
exam_Y = examDf['分数']

# 将原数据集拆分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(exam_X, exam_Y, train_size=0.8, random_state=5)

model = LinearRegression()

X_train = X_train.values.reshape(-1, 1)
X_test = X_test.values.reshape(-1, 1)
# print(X_train)

# 开始拟合
model.fit(X_train, Y_train)

with open('yy_model.pickle','wb') as f:
    pickle.dump(model, f)

模型调用

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pandas as pd
import pickle

plt.rcParams['font.sans-serif'] = ['SimHei']

examDict = {'学习时间': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75,
                     2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50],
            '分数': [10, 22, 13, 43, 20, 22, 33, 50, 62,
                   48, 55, 75, 62, 73, 81, 76, 64, 82, 90, 93]}

# 转换为DataFrame的数据格式
examDf = DataFrame(examDict)


# 读取模型
pickle_out = open('yy_model.pickle','rb')
model = pickle.load(pickle_out)

x_test = examDf['学习时间'].values.reshape(-1, 1)
y_test = examDf['分数']
y_pred = model.predict(x_test)

plt.scatter(examDf['学习时间'], y_pred, color='r', label="pred data")
plt.scatter(examDf['学习时间'], y_test, color='b', label="test data")

plt.plot(examDf['学习时间'], y_pred, color='black', linewidth=1, label="best line")

# 添加图标标签 loc代表象限
plt.legend(loc=2)

# 添加横坐标,纵坐标
plt.xlabel("Hours")
plt.ylabel("Score")
plt.title('模型调用展示')

# 保存并显示图像
plt.savefig("use_model.jpg")

score = model.score(x_test, y_test)
print(score)
plt.show()

在这里插入图片描述

  • 12
    点赞
  • 113
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值