线性回归公式:
import pandas as pd
import matplotlib.pyplot as plt
columns = ["mpg","cylinders","displacement","horsepower","weight","acceleration","model year","origin","car name"]
#由于原使数据只有数据,因此columns是为他们增加列的分类,delim_whitespace代表以空格作为分隔符
cars = pd.read_table("D:\\test\machineLearning\\auto-mpg.data",delim_whitespace=True,names=columns)
cars.head(2)
mpg | cylinders | displacement | horsepower | weight | acceleration | model year | origin | car name | |
---|---|---|---|---|---|---|---|---|---|
0 | 18.0 | 8 | 307.0 | 130.0 | 3504.0 | 12.0 | 70 | 1 | chevrolet chevelle malibu |
1 | 15.0 | 8 | 350.0 | 165.0 | 3693.0 | 11.5 | 70 | 1 | buick skylark 320 |
fig = plt.figure()
ax1=fig.add_subplot(2,1,1)
cars.plot("weight","mpg",kind="scatter",ax=ax1)
plt.show()
import sklearn
from sklearn.linear_model import LinearRegression
lr =LinearRegression();#获取线性回归模型
lr.fit(cars[["weight"]], cars["mpg"]) #输入是重量,输出是每加仑能跑的距离,对它进行训练
prediction = lr.predict(cars[["weight"]])#训练完后,就可以用测试数据进行测试,观察预测的输出值是多少,此处为方便仍然使用训练的数据
print (prediction[0:5])
print (cars["mpg"][0:5])
[ 19.41852276 17.96764345 19.94053224 19.96356207 19.84073631]
0 18.0
1 15.0
2 18.0
3 16.0
4 17.0
Name: mpg, dtype: float64
plt.scatter(cars["weight"],cars["mpg"],c='red')
plt.scatter(cars["weight"],prediction,c='blue')
plt.show()
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(cars["mpg"],prediction)#求真实值与预测值之间的均方差
print (mse)
18.7809397346
mse**0.5 #再开根号
4.3336981591509574