热力图和子图的绘制
知识点:
- 介绍了热力图的绘制方法
- 介绍了enumerate()函数
- 介绍了子图的绘制方法
import pandas as pd
data = pd.read_csv('data.csv')
# 创建嵌套字典用于映射
mappings = {
"Years in current job": {
"10+ years": 10,
"2 years": 2,
"3 years": 3,
"< 1 year": 0,
"5 years": 5,
"1 year": 1,
"4 years": 4,
"6 years": 6,
"7 years": 7,
"8 years": 8,
"9 years": 9
},
"Home Ownership": {
"Home Mortgage": 0,
"Rent": 1,
"Own Home": 2,
"Have Mortgage": 3
}
}
# 使用映射字典进行转换
data["Years in current job"] = data["Years in current job"].map(mappings["Years in current job"])
data["Home Ownership"] = data["Home Ownership"].map(mappings["Home Ownership"])
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# 提取连续值特征
continuous_features = [
'Annual Income', 'Years in current job', 'Tax Liens',
'Number of Open Accounts', 'Years of Credit History',
'Maximum Open Credit', 'Number of Credit Problems',
'Months since last delinquent', 'Bankruptcies',
'Current Loan Amount', 'Current Credit Balance', 'Monthly Debt',
'Credit Score'
]
# 计算相关系数矩阵
correlation_matrix = data[continuous_features].corr()
# 设置图片清晰度
plt.rcParams['figure.dpi'] = 300
# 绘制热力图
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Heatmap of Continuous Features')
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
# 定义要绘制的特征
features = ['Annual Income', 'Years in current job', 'Tax Liens', 'Number of Open Accounts']
# 随便选的4个特征,不要在意对不对
# 设置图片清晰度
plt.rcParams['figure.dpi'] = 300
# 创建一个包含 2 行 2 列的子图布局
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
# 手动指定特征索引进行绘图,仔细观察下这个坐标
i = 0
feature = features[i]
axes[0, 0].boxplot(data[feature].dropna())
axes[0, 0].set_title(f'Boxplot of {feature}')
axes[0, 0].set_ylabel(feature)
i = 1
feature = features[i]
axes[0, 1].boxplot(data[feature].dropna())
axes[0, 1].set_title(f'Boxplot of {feature}')
axes[0, 1].set_ylabel(feature)
i = 2
feature = features[i]
axes[1, 0].boxplot(data[feature].dropna())
axes[1, 0].set_title(f'Boxplot of {feature}')
axes[1, 0].set_ylabel(feature)
i = 3
feature = features[i]
axes[1, 1].boxplot(data[feature].dropna())
axes[1, 1].set_title(f'Boxplot of {feature}')
axes[1, 1].set_ylabel(feature)
# 调整子图之间的间距
plt.tight_layout()
# 显示图形
# 定义要绘制的特征
features = ['Annual Income', 'Years in current job', 'Tax Liens', 'Number of Open Accounts']
# 设置图片清晰度
plt.rcParams['figure.dpi'] = 300
# 创建一个包含 2 行 2 列的子图布局
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
# 使用 for 循环遍历特征
for i in range(len(features)):
row = i // 2 # 计算当前特征在子图中的行索引,// 是整除,即取整 ,之所以用整除是因为我们要的是行数
# 例如 0//2=0, 1//2=0, 2//2=1, 3//2=1
col = i % 2 # 计算当前特征在子图中的列索引,% 是取余,即取模
# 例如 0%2=0, 1%2=1, 2%2=0, 3%2=1
# 绘制箱线图
feature = features[i]
axes[row, col].boxplot(data[feature].dropna())
axes[row, col].set_title(f'Boxplot of {feature}')
axes[row, col].set_ylabel(feature)
# 调整子图之间的间距
plt.tight_layout()
# 显示图形
plt.show()