def calcute_corr(data,aim):
'''
data : 包含所有需要计算corr相关性的特征数据集
aim : 相关性删除的阈值
return :
result : 留下待使用的特征
the_same_feature : 要删除的相关性大于阈值的特征
'''
the_same_feature = []
result = []
for col_name in data.columns:
if col_name in the_same_feature:
continue
else:
temp = data.corrwith(data[col_name])
result.append(col_name)
te = list(temp[temp > aim].index)
the_same_feature.extend(te)
return result,the_same_feature