import pandas as pd
import numpy as np
data = pd.read_csv('data/data1to21.csv', header=None)data[52] = data[52].astype(int).data
# # 计算fisher得分
items = list(range(52))num_classes = len(set(data[52]))
fisher_score = []
grouped = data.groupby([52], as_index=False)
n = [len(data[data[52] == k+1]) for k in range(num_classes)]
for i in items: # 遍历所有特征列
temp = grouped[i].agg({str(i)+'_mean': 'mean',
str(i)+'_std': 'std'}) # 已求出特征i在各类别k中的均值u_ik、方差p_iknumerator = 0
denominator = 0u_i = data[i].mean()
for k in range(num_classes):
n_k = n[k]
u_ik = temp.iloc[k, :][str(i)+'_mean']
p_ik = temp.iloc[k, :][str(i)+'_std']numerator += n_k*(u_ik-u_i)**2
denominator += n_k*p_ik**2fisher_score.append(numerator/denominator)
pd.DataFrame(fisher_score).to_csv('fisher_score.csv', index=False, header=None)
pandas[python]实现fisher score,衡量特征用于分类的重要性,可用于特征选择
最新推荐文章于 2024-03-12 01:05:34 发布