简介
调用葡萄酒质量的csv文件,对葡萄酒质量与年份的关系进行探究
代码
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
data = np.genfromtxt('linear.csv',delimiter = ',')
plt.scatter(data[1:,0],data[1:,1])
plt.title('Age VS Quality (test set)')
plt.xlabel('Age')
plt.ylabel('Quality')
plt.show()
x_train,x_test,y_train,y_test = train_test_split(data[1:,0],data[1:,1],test_size = 0.3)
x_train = x_train[:,np.newaxis] #给数据增加维度,LinearRegression需要2维数据; 变成n行一列
x_test = x_test[:,np.newaxis]
model = LinearRegression()
model.fit(x_train,y_train)
#训练集的散点图
plt.scatter(x_train,y_train, c = 'b')
#模型对训练集预测
plt.plot(x_train,model.predict(x_train),c = 'r',linewidth = 5)
plt.title('Age VS Quality(Training)')
plt.xlabel('Age')
plt.ylabel('Quality')
plt.show()
#测试集的散点图
plt.scatter(x_test,y_test, c = 'b'
#模型对测试集预测
plt.plot(x_test,model.predict(x_test),c = 'r',linewidth = 5)
plt.title('Age VS Quality(Testing)')
plt.xlabel('Age')
plt.ylabel('Quality')
plt.show()