自变量 #TV:电视类广告支出 #Radio:广播类广告支出 #Newspaper:报纸类广告支出 因变量 #Sales:商品的销量
求解 #上三个因素对于商品价格的回归模型
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
data=pd.read_csv("Advertising.csv",header=0)
data.head()
数据理解:
sns.pairplot(data, x_vars=['TV','radio','newspaper'], y_vars='sales', size=7, aspect=0.8, kind='reg')
plt.show()
#构建数据集
X=data.drop(['Number','sales'],axis=1)
X.head()
sales=data['sales']
type(sales)
#拆分训练集和测试集
from sklearn.model_selection import train_test_split
X_train,X_test, y_train, y_test = train_test_split(X, sales, test_size=0.3,random_state=1)
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)
print(linreg.intercept_)
print(linreg.coef_)
linreg.score(X_test,y_test) #R^2
模型预测
## 判定系数
print("测试集判定系数:", linreg.score(X_test, y_test))
# 均方误差
from sklearn.metrics import mean_squared_error
print("测试集均方误差:", mean_squared_error(y_test, y_pred))
# 判定系数
from sklearn import metrics
print("测试集均方误差:", metrics.r2_score(y_test, y_pred))