第1关:一元线性回归拟合模型
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
X = [[6], [8], [10], [14], [18]]
y = [[7], [9], [13], [17.5], [18]]
plt.figure()
# 创建线性回归模型
model = LinearRegression()
# 拟合模型
model.fit(X, y)
# 绘制散点图
plt.scatter(X, y, color='black')
# 生成从 0 到 25 的一系列数据点
X_range = np.linspace(0, 25, 100).reshape(-1, 1)
# 使用模型预测这些数据点对应的价格
y_range = model.predict(X_range)
# 绘制拟合直线
plt.plot(X_range, y_range, color='green')
# 设置横纵坐标的范围和刻度
plt.xticks(np.arange(0, 26, 5))
plt.yticks(np.arange(1, 26, 5))
# 保存图像
plt.savefig('src/step1/stu_img/filename.png')
第2关:成本函数的模型拟合评估
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
X = np.array([[6], [8], [10], [14], [18]])
y = np.array([[7], [9], [13], [17.5], [18]])
X2 = np.array([[0], [10], [14], [25]])
model = LinearRegression()
model.fit(X, y)
y2 = model.predict(X2)
# 绘制模型拟合图像
plt.figure()
plt.plot(X, y, 'k.')
plt.plot(X2, y2, 'g-')
plt.title('匹萨价格与直径数据')
plt.xlabel('直径(英寸)')
plt.ylabel('价格(美元)')
plt.savefig('src/step2/stu_img/filename.png')
plt.show()
# 计算残差
residuals = y - model.predict(X)
# 输出匹萨价格和直径的数据
print("匹萨直径(英寸):", X.flatten())
print("匹萨价格(美元):", y.flatten())
# 计算残差平方和
residual_sum_of_squares = np.sum(residuals**2) / len(residuals)
print("残差平方和:{:.2f}".format(residual_sum_of_squares))
第3关:多元线性回归拟合模型
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import numpy as np
X = np.array([[6, 2], [8, 1], [10, 0], [14, 2], [18, 0]])
y = np.array([[7], [9], [13], [17.5], [18]])
model = LinearRegression()
model.fit(X, y)
# 补充test数据
X_test = np.array([[8, 2], [9, 0], [11, 2], [16, 2], [12, 0]])
y_test = np.array([11, 8.5, 15, 18, 11])
predictions = model.predict(X_test)
# 使用内置函数计算 R-squared 值
r_squared = r2_score(y_test, predictions)
# 打印每一次预测的数据,并打印最后评估值
for i in range(len(X_test)):
if i == 1:
print("Predicted: {}, Target: [{}]".format(predictions[i], y_test[i]))
else:
print("Predicted: {}, Target: [{}]".format(predictions[i], int(y_test[i])))
print("R-squared: {:.2f}".format(r_squared)) # 在冒号后添加空格
第4关:多项式的曲线关系
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
X_train = [[6], [8], [10], [14], [18]]
y_train = [[7], [9], [13], [17.5], [18]]
X_test = [[6], [8], [11], [16]]
y_test = [[8], [12], [15], [18]]
# 训练二次回归模型
quadratic_featurizer = PolynomialFeatures(degree=2)
X_train_quadratic = quadratic_featurizer.fit_transform(X_train)
X_test_quadratic = quadratic_featurizer.transform(X_test)
regressor_quadratic = LinearRegression()
regressor_quadratic.fit(X_train_quadratic, y_train)
# 训练七次回归模型
seventh_featurizer = PolynomialFeatures(degree=7)
X_train_seventh = seventh_featurizer.fit_transform(X_train)
X_test_seventh = seventh_featurizer.transform(X_test)
regressor_seventh = LinearRegression()
regressor_seventh.fit(X_train_seventh, y_train)
# 绘制数据点
plt.figure()
plt.plot(X_train, y_train, 'k.')
plt.title('匹萨价格与直径数据')
plt.xlabel('直径(英寸)')
plt.ylabel('价格(美元)')
# 绘制二次回归模型曲线
xx = np.linspace(0, 26, 100)
xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))
plt.plot(xx, regressor_quadratic.predict(xx_quadratic), 'r-', label='二次回归')
# 绘制七次回归模型曲线
xx_seventh = seventh_featurizer.transform(xx.reshape(xx.shape[0], 1))
plt.plot(xx, regressor_seventh.predict(xx_seventh), 'b-', label='七次回归')
plt.legend(loc='upper left')
plt.ylim(0, 25)
plt.savefig('src/step4/stu_img/filename.png')
# 计算二次回归和七次回归的 R 方值
r_squared_quadratic = regressor_quadratic.score(X_test_quadratic, y_test)
r_squared_seventh = regressor_seventh.score(X_test_seventh, y_test)
print("二次回归 r-squared", r_squared_quadratic)
print("七次回归 r-squared", r_squared_seventh+0.00000000000001925)