import pandas as pd
import numpy as np
def topsis(data, weight=None):
data = data / np.sqrt((data**2).sum())
ideal_solution = data.max()
negative_ideal_solution = data.min()
distance_to_ideal_solution = np.sqrt(
((data - ideal_solution)**2 * weight).sum(axis=1))
distance_to_negative_ideal_solution = np.sqrt(
((data - negative_ideal_solution)**2 * weight).sum(axis=1))
similarity = distance_to_negative_ideal_solution / (
distance_to_ideal_solution + distance_to_negative_ideal_solution)
rank = similarity.rank(method='min', ascending=False)
return rank, similarity
data = pd.read_excel('data1-3(睡醒次数+1).xlsx')
sleep_quality_data = data[["整晚睡眠时间", "睡醒次数", "入睡方式"]]
# 权重
weight = np.array([1, 1, 1])
rank, similarity = topsis(sleep_quality_data, weight)
data["相似度"] = similarity
data["等级"] = rank
data["睡眠质量"] = pd.qcut(data["相似度"], 4, labels=["差", "中", "良", "优"])
data.to_excel('data2(Topsis评分评级).xlsx')
唯一的缺点是我需要一个服从正态分布的等级分布,但是结果是四个等级人数差不多。所以后来我直接自己定义评分标准了。