import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
# 加载数据集
data = pd.read_csv('housing_price.csv')
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
# 计算属性之间的相关系数矩阵
correlation_matrix = data[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms', 'Area Population', 'size', 'Price']].corr()
# 绘制热力图
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('属性关联矩阵', fontproperties='