# 特征去重--equals
# 检测重复列的函数
def FeatureEquals(df):
dfEquals = pd.DataFrame([],columns=df.columns,index=df.columns) # 生成列的表
for i in df.columns:
for j in df.columns:
dfEquals.loc[i,j] = df.loc[:,i].equals(df.loc[:,j]) # 检测重复
return dfEquals
result_v2 = FeatureEquals(use_df)
print(result_v2)
# 筛选出重复列的方法
lenDet = result_v2.shape[0]
print(lenDet)
dupCol = []
for k in range(lenDet):
for l in range(k+1,lenDet):
# 判断哪个列重复
if result_v2.iloc[k,l] & (result_v2.columns[l] not in dupCol):
dupCol.append(result_v2.columns[l])
print(dupCol)