演示建模过程:
import collections # collections 是python内建的一个集合模块
import pandas as pd
import matplotlib.pyplot as plt
# 1 创建数据集
JobDict = {
'工作时间':[0.50,0.75,1.00,1.25,1.50,1.75,1.75,2.00,2.25,2.50,2.75,3.00,3.25,3.50,4.00,4.25,4.50,4.75,5.00,5.50],
'业绩':[10,22,13,43,20,22,33,50,62,48,55,75,62,73,81,76,64,82,90,93]
}
Job_orderDict = collections.OrderedDict(JobDict) # ordereddict:有序字典
Job_Df = pd.DataFrame(Job_orderDict)
Job_Df.head()
# 2 准备数据
# 提取特征和标签
Job_X = Job_Df.loc[:,'工作时间'] # loc是根据index来索引
Job_Y = Job_Df.loc[:,'业绩']
# 3 分析数据
# 绘制散点图
plt.scatter(Job_X,Job_Y,color='b')
plt.xlabel('Hours')
plt.ylabel('Scores')
# 4 构建训练模型
from sklearn.model_selection import train_test_split # train_test_split(),随机划分训练集和测试集的函数
# (1)建立训练数据和测试数据
x_train,x_test,y_train,y_test = train_test_split(Job_X,Job_Y,train_