import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score import seaborn as sns # 加载数据 data = pd.read_csv('./housing_price.csv', sep=',', encoding='utf-8') # 设置matplotlib参数 plt.rcParams['font.sans-serif'] = 'SimHei' plt.rcParams['axes.unicode_minus'] = False plt.rc('font', size=14) # 绘制热力图 plt.figure(figsize=(12, 8)) sns.heatmap(data.corr(), annot=True, fmt='.2f', cmap='PuBu') plt.show() X_single = data['size'].values.reshape(-1, 1) y_single = data['Price'].values LR_single = LinearRegression() LR_single.fit(X_single, y_single) y_pre_single = LR_single.predict(X_single) plt.scatter(X_single, y_single, c='b', label='Original') plt.plot(X_single, y_pre_single, 'r', label='Predicted') plt.xlabel('size') plt.ylabel('Price') plt.legend() plt.show() mse_single = mean_squared_error(y_single, y_pre_single) r2_single = r2_score(y_single, y_pre_single) print('MSE (Single Feature):{:.4f}'.format(mse_single)) print('R2_SCORE (Single Feature):{:.4f}'.format(r2_single)) print('单特征回归系数为:', LR_single.coef_) print('截距为:', LR_single.intercept_) X_multi = data.drop('Price', axis=1) y_multi = data['Price'] X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(X_multi, y_multi, test_size=0.2, random_state=888) linear_model_multi = LinearRegression() linear_model_multi.fit(X_train_multi, y_train_multi) coef_multi = linear_model_multi.coef_ intercept_multi = linear_model_multi.intercept_ features = X_train_multi.columns print('多特征回归方程为:\n', 'y = ', end='') for i, feature in enumerate(features): print(f'{coef_multi[i]:.2f}*{feature} + ', end='') print(f'{intercept_multi:.2f}') y_pre_multi = linear_model_multi.predict(X_test_multi) mse_multi = mean_squared_error(y_test_multi, y_pre_multi) r2_multi = r2_score(y_test_multi, y_pre_multi) print('MSE (Multi Features):{:.4f}'.format(np.sqrt(mse_multi))) # MSE的平方根是RMSE print('R2_SCORE (Multi Features):{:.4f}'.format(r2_multi)) plt.scatter(y_test_multi, y_pre_multi, label='Predicted') plt.plot([y_test_multi.min(), y_test_multi.max()], [y_test_multi.min(), y_test_multi.max()], 'k--', lw=4, label='Perfect fit') plt.xlabel('Real Price') plt.ylabel('Predicted Price') plt.legend() plt.show()