一元线性回归
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import statsmodels.api as sm
data = pd.read_csv(r"C:\Users\yjr\Desktop\Advertising.csv")
#先绘图
plt.figure(figsize=(16,8))
plt.scatter(data['TV'],data['sales'],c='black')
plt.xlabel("Money spent on TV ads ")
plt.ylabel("Sales")
#求线性近似
#reshape()创建一个改变了尺寸的新数组
#reshape(1,-1)转化成一行
#reshape(2,-1)转化成两行
#reshape(-1,1)转化成一列
#reshape(-1,2)转化成两列
X = data['TV'].values.reshape(-1,1)
Y = data['sales'].values.reshape(-1,1)
reg = LinearRegression()
#建立模型
reg.fit(X,Y)
print("The linear model is: Y = {:.5} + {:.5}X".format(reg.intercept_[0], reg.coef_[0][0]))
#将得到的数据拟合线可视化
predictions = reg.predict(X)
plt.figure(figsize=(16,8))
#scatter用来绘制散点 plot绘制经过点的曲线
plt.scatter(data['TV'],data['sales'],c = 'black')
plt.plot(data['TV'],predictions,c='blue', linewidth = 2)
plt.xlabel("Money spent on TV ads ($)")
plt.ylabel("Sales ($)")
plt.show()
https://www.cnblogs.com/learnbydoing/p/12190168.html
https://zhuanlan.zhihu.com/p/107195201