总目录:Python数据分析整理
本文基本是对文章的整理,修改了一些我这个版本跑不通的地方,多加了一个模型保存部分而已。整理后用于之后使用。原作者大佬文章地址:Python实现多元线性回归
数据分析
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pandas as pd
plt.rcParams['font.sans-serif'] = ['SimHei']
examDict = {
'学习时间': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75,
2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50],
'分数': [10, 22, 13, 43, 20, 22, 33, 50, 62,
48, 55, 75, 62, 73, 81, 76, 64, 82, 90, 93]}
# 转换为DataFrame的数据格式
examDf = DataFrame(examDict)
plt.scatter(examDf['学习时间'], examDf['分数'], color='b', label="Exam Data")
#
# 添加图的标签(x轴,y轴)
plt.xlabel("Hours")
plt.ylabel("Score")
# 显示图像
plt.legend(loc=2)
plt.title('整体展示')
plt.savefig('整体展示.jpg')
plt.show()
# 计算相关系数
# 0~0.3 弱相关
# 0.3~0.6 中等程度相关
# 0.6~1 强相关
rDf = examDf.corr()
print(rDf)
数据拆分
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pandas as pd
plt.rcParams['font.sans-serif'] = ['SimHei']
examDict = {
'学习时间': [0.50,