python3 带注释 基于粗糙集的属性约简。

import pandas as pd
from collections import defaultdict
import numpy as np
import scipy.io as sio
from scipy.io import savemat
from sklearn import preprocessing
from scipy.io import loadmat
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import train_test_split
import time
#加载数据
m= loadmat("Data\\optimal_data.mat")
data=m['data']
target=m['target']
target=np.array(target).flatten()
time_start = time.perf_counter()  # 记录开始时间
#归一化处理
min_max_scaler = preprocessing.MinMaxScaler()
x_minmax = min_max_scaler.fit_transform(data)
data1=x_minmax
#print(data1)
# #邻域粗糙集创新点:对每个属性列进行单独的归一化处理
neighbor_list=list()#存放邻域的集合
for a in range(data1.shape[1]):
    # 计算每个属性的邻域,避免原始邻域粗糙集邻域半径不能确定问题,lambda(一般取0.5-1.5之间)
    neighbor = np.std(data1[:,a]) / 0.5
    neighbor_list.append(neighbor)
#print(neighbor_list)
# #计算样本的δ邻域
delta_neighbor_dict = dict()
delta_neighbor = list()#存放每个属性对应邻域的样本集合
for index in range(data1.shape[1]):
    delta_neighbor_dict=dict()
    for k in range(data1.shape[0]):
        delta_neighbor_list = list()
        for v in range(data1.shape[0]):
            #欧氏距离计算样本间距离
            dis = np.sqrt(np.sum(np.square(data1[k,index] - data1[v,index])))
            if dis <= neighbor_list[index]:
                delta_neighbor_list.append(v)
            delta_neighbor_dict.update({k: delta_neighbor_list})
    delta_neighbor.append(delta_neighbor_dict)
#print(delta_neighbor)
#对决策属性进行划分,计算U/IND(D)
sample_list = list()
decision_value_dict=defaultdict(list)
for m, n in [(n, m) for m, n in list(enumerate(target[:]))]:
    decision_value_dict[m].append(n)  # m为某个标签,n为某些样本
#print(decision_value_dict)
#计算各个属性的下近似U/IND(ai) & U/IND(A)
#计算当前特征下的属性依赖度
corr_list=list()
for index in range(data1.shape[1]):
    low_similar_set = defaultdict(list)
    for m in decision_value_dict.keys():
        #print(m)
        for k in delta_neighbor[index].keys():
            if set(delta_neighbor[index].get(k)).issubset(decision_value_dict.get(m)):
                #low_similar_set[m].append(delta_neighbor[index].get(k))
                low_similar_set[m].append(k)
    #print(low_similar_set)
    h_sum = [0]
    for i in low_similar_set.values():
        h_sum += i
    h_sum.pop(0)
    POS = list(set(h_sum))
    #print(POS)
    #计算当前特征下的属性依赖度
    corr = len(POS) / data1.shape[0]
    #print(corr)
    corr_list.append(corr)
#print(corr_list)
#计算总依赖度
A_neighbor=np.std(data1[:,:])/0.5
# print(A_neighbor)
A_delta_neighbor_list = list()
A_delta_neighbor = list()
for k in range(data1.shape[0]):
    A_delta_neighbor_list = list()
    A_delta_neighbor_dict = dict()
    for v in range(data1.shape[0]):
        #欧氏距离计算样本间距离
        A_dis = np.sqrt(np.sum(np.square(data1[k] - data1[v])))
        if A_dis <= A_neighbor:
            A_delta_neighbor_list.append(v)
        A_delta_neighbor_dict.update({k: A_delta_neighbor_list})
    #print(A_delta_neighbor_dict)
    A_delta_neighbor.append(A_delta_neighbor_dict)
#print(A_delta_neighbor)
A_low_similar_set = defaultdict(list)
for element in A_delta_neighbor:
    for h,k in element.items():
        for m in decision_value_dict.keys():
            if set(k).issubset(decision_value_dict.get(m)):
                A_low_similar_set[m].append(h)
#print(A_low_similar_set)
A_h_sum = [0]
for l in A_low_similar_set.values():
    A_h_sum += l
A_h_sum.pop(0)
A_POS = list(set(A_h_sum))
#计算当前特征下的属性依赖度
A_corr = len(A_POS) / data1.shape[0]
#print(A_corr)
reduct_attribute = [x for x, y in list(enumerate(corr_list)) if A_corr - y != A_corr]
print(reduct_attribute)#输出约简属性
reduct_sets=data1[:,reduct_attribute]
#print(data1[:,reduct_attribute])#输出约简属性对应集合
time_end = time.perf_counter()  # 记录结束时间
time_sum = time_end - time_start  # 计算的时间差为程序的执行时间,单位为秒/s
print(time_sum)#输出CPU的执行时间
X_train, X_test, y_train, y_test = train_test_split(reduct_sets, target, test_size=0.2,random_state=1)
clf = SVC()
clf.fit(X_train,y_train)
result = clf.predict(X_test)
#准确率 召回率 F值
print(metrics.classification_report(y_test, result))
#汉明损失:取值在0~1之间,距离为0说明预测结果与真实结果完全相同,距离为1就说明模型与我们想要的结果完全就是背道而驰
print("hamming_loss", (metrics.hamming_loss(y_test, result)))

几个月前写的这个基于粗糙集进行属性约简的代码。从网上看了很多这方面的代码,但都不太完善,也没有注释,故做此修改,菜鸟一枚,如有错误,欢迎指正~在我的电脑上,可以顺利运行。

  • 6
    点赞
  • 43
    收藏
    觉得还不错? 一键收藏
  • 9
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值