‘2019/Jan/15/Tue 09:24:50’
机器学习100天——第二天:简单线性回归
第一步:数据预处理
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
datasets = pd.read_csv("../datasets/studentscores.csv")
datasets.head()
Hours Scores
0 2.5 21
1 5.1 47
2 3.2 27
3 8.5 75
4 3.5 30
X = datasets.iloc[:,:1].values
Y = datasets.iloc[:,1].values
X,Y
(array([[2.5],
[5.1],
[3.2],
[8.5],
[3.5],
[1.5],
[9.2],
[5.5],
[8.3],
[2.7],
[7.7],
[5.9],
[4.5],
[3.3],
[1.1],
[8.9],
[2.5],
[1.9],
[6.1],
[7.4],
[2.7],
[4.8],
[3.8],
[6.9],
[7.8]]),
array([21, 47, 27, 75, 30, 20, 88, 60, 81, 25, 85, 62, 41, 42, 17, 95, 30,
24, 67, 69, 30, 54, 35, 76, 86], dtype=int64))
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 1/4,random_state = 0 )
训练线性回归
from sklearn.linear_model import LinearRegression
#使用训练集对模型进行训练
regressor = LinearRegression()
regressor = regressor.fit(X_train, Y_train)
Y_pred = regressor.predict(X_test)
可视化
训练集结果可视化
#散点图
plt.scatter(X_train , Y_train, color = 'red')
#线图
plt.plot(X_train , regressor.predict(X_train), 'bo-')
plt.show()
测试集结果可视化
#散点图
plt.scatter(X_test , Y_test, color = 'red')
#线图
plt.plot(X_test ,Y_pred, 'bo-')
plt.show()