首先做出一元线性回归的模型与评估,并在最后绘制出线性回归的图形
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
#%% 当噪声较小时
rng = np.random.RandomState(10)
x =rng.normal(100,100,50)
x =x[:, np.newaxis]
y = 1.25 * x - 20 + 5 * rng.randn(50, 1) # randn是标准正态分布
#标准正态分布是正态分布的一种,平均数为0,标准差为1
model = LinearRegression(fit_intercept=True)
model.fit(x, y)
x_fit = np.linspace(min(x), max(x), 1000)
y_fit = model.predict(x_fit)
print("Model slope: ", model.coef_[0])
print("Model intercept:", model.intercept_)
print('方程的判定系数(R^2): %.6f' % model.score(x, y))
plt.figure(figsize=(16, 12))
plt.scatter(x, y, s=10, c='k', marker='.')
plt.plot(x_fit, y_fit)
ax = plt.gca()
ax.set_aspect("equal") # 纵横坐标单位相同
plt.grid(True)
plt.xlabel('x')
plt.ylabel('y')
plt.title('noise is samll')
plt.show()
使用Python实现多元线性回归,以三元为例,但多元回归是无法进行绘图的,所以省去绘图部分,具体实现如下:
import numpy as np
from sklearn.linear_model import LinearRegression
#%% #小噪声
print("噪声较小情况:")
rng = np.random.RandomState(10) #设置随机种子
x1 =rng.normal(100,100,50)
x1 =x1[:, np.newaxis]
x2 =rng.normal(100,100,50)
x2 =x2[:, np.newaxis]
x3 =rng.normal(100,100,50)
x3 =x3[:, np.newaxis]
x=np.hstack((x1,x2,x3)) #将x1,x2,x3沿水平方向堆叠在一起
y = 1.25 * x1 + 20 * x2 + 10 * x3 - 5 + 10 * rng.randn(50,1) #randn是标准正态分布
#线性e回归,intercept为截距,coef为系数
model = LinearRegression(fit_intercept=True)
model.fit(x, y) # x,y的每一行是一个样本,即要求是列向量
a = np.linspace(0,50,1000) #从0到50创建1000个等差数列
x1_fit= a[:,np.newaxis] #将a转置成列
x2_fit =a[:,np.newaxis]
x3_fit =a[:,np.newaxis]
x_fit = np.hstack((x1_fit,x2_fit,x3_fit)) #将x1,x2,x3沿水平方向堆叠在一起
y_fit = model.predict(x_fit) #根据x对y的预测
print("Model slope: ", model.coef_[0])
print("Model intercept:", model.intercept_)
print('方程的判定系数(R^2): %.6f' % model.score(x, y))
print("\n")
#%% #大噪声
print("噪声较大情况:")
rng = np.random.RandomState(10)
x1 =rng.normal(100,100,50)
x1 =x1[:, np.newaxis]
x2 =rng.normal(100,100,50)
x2 =x2[:, np.newaxis]
x3 =rng.normal(100,100,50)
x3 =x3[:, np.newaxis]
x=np.hstack((x1,x2,x3))
y = 1.25 * x1 + 20 * x2 + 10 * x3 - 5 + 200 * rng.randn(50,1)
model = LinearRegression(fit_intercept=True)
model.fit(x, y)
a = np.linspace(0,50,1000)
x1_fit= a[:,np.newaxis]
x2_fit =a[:,np.newaxis]
x3_fit =a[:,np.newaxis]
x_fit = np.hstack((x1_fit,x2_fit,x3_fit))
y_fit = model.predict(x_fit)
print("Model slope: ", model.coef_[0])
print("Model intercept:", model.intercept_)
print('方程的判定系数(R^2): %.6f' % model.score(x, y))