【代码复现】BAMIC
1.k-Medoids
"""
-*- coding:utf-8 -*-
@FileName: KMedoids.py
@Author: zjy
@DateTime: 2022/9/28 18:22
@Description:
@IDE:PyCharm
"""
from MILFrame.Clustering import Cluster
import numpy as np
class KMedoids(Cluster):
def __init__(self, distance, idx=None, k=10, max_iter=10):
super(KMedoids, self).__init__(distance, idx)
self.distance = distance
self.idx = idx
self.k = k
self.max_iter = max_iter
self._initialized_()
def _initialized_(self):
if self.idx is None:
self.idx = np.arange(self.num_sample)
def compare_centers(self, last_centers, current_centers):
last_centers_sorted = np.sort(last_centers)
current_centers_sorted = np.sort(current_centers)
for i in range(self.k):
if last_centers_sorted[i] != current_centers_sorted[i]:
return False
return True
def clustering(self):
temp_random_idx = np.random.permutation(self.num_sample)
temp_last_center = temp_random_idx[-self.k:]
self.centers = temp_random_idx[: self.k]
loop = 0
while self.compare_centers(temp_last_center, self.centers) and loop < self.max_iter:
temp_last_center = np.copy(self.centers)
temp_distance = self.distance[self.idx][:, temp_last_center]
self.blocks = [[center] for center in self.centers]
self.labels = np.zeros(self.num_sample)
self.labels[self.centers] = list(range(self.k))
for sample_idx in range(self.num_sample):
if sample_idx in self.centers:
continue
temp_min_dis_idx = np.argmin(temp_distance[sample_idx])
self.labels[sample_idx] = self.labels[temp_min_dis_idx]
self.blocks[temp_min_dis_idx].append(sample_idx)
for idx_k in range(self.k):
temp_distance = self.distance[self.blocks[idx_k]][:, self.blocks[idx_k]]
temp_sum_dis = np.sum(temp_distance, 0)
temp_min_dis_center = np.argmin(temp_sum_dis)
self.centers[idx_k] = self.blocks[idx_k][temp_min_dis_center]
loop += 1
return self.idx[self.centers]
if __name__ == "__main__":
d1 = np.random.randn(100, 100)
d1 = np.triu(d1)
d1 = np.transpose(d1) + d1
dp = KMedoids(d1, k=2, max_iter=100)
print(dp.clustering())
2.BAMIC
"""
-*- coding:utf-8 -*-
@FileName: BAMIC.py
@Author: zjy
@DateTime: 2022/9/28 18:18
@Description:
@IDE:PyCharm
"""
import numpy as np
from MILFrame.MIL import MIL
from MILFrame.B2B import B2B
from MILFrame.Utils import get_k_cv_idx
from KMedoids import KMedoids
class BAMIC(MIL):
def __init__(self, data_path, k=10, center_ratio=0.99, b2b_type='ave'):
super(BAMIC, self).__init__(data_path)
self.k = k
self.center_ratio = center_ratio
self.b2b_type = b2b_type
self.distance = []
self.train_idx = []
self.test_idx = []
self._initialized_()
def _initialized_(self):
b2b = B2B(self.data_name, self.bag_space, self.b2b_type,b2b_save_home="Distance/")
self.distance = b2b.get_dis()
def get_mapping(self):
self.train_idx, self.test_idx = get_k_cv_idx(self.num_bag, self.k)
for loop in range(self.k):
temp_train_idx = self.train_idx[loop]
temp_center = KMedoids(self.distance, np.array(temp_train_idx),
int(self.center_ratio * min(100, len(temp_train_idx)))).clustering()
temp_mapping = self.distance[:, temp_center]
yield temp_mapping[temp_train_idx], self.bag_labels[temp_train_idx], \
temp_mapping[self.test_idx[loop]], self.bag_labels[self.test_idx[loop]], None
if __name__ == "__main__":
pass
3.test
from BAMIC import BAMIC
from MILFrame.Classifier import Classifier
import os
def test(data_file_path, k=10, loops=10):
classifier_type = ["knn", "svm", "j48"]
performance_type = ["f1_score", "acc", "roc"]
print("=================================================")
result = {k: [0, 0, 0] for k in classifier_type}
for i in range(loops):
bamic = BAMIC(data_file_path, k)
classifier = Classifier(classifier_type, performance_type)
data_iter = bamic.get_mapping()
test_performance = classifier.test(data_iter)
for temp_classifier in classifier_type:
for temp_performance in range(len(performance_type)):
result[temp_classifier][temp_performance] += test_performance[temp_classifier][temp_performance]
for temp_classifier in classifier_type:
result_str = temp_classifier + " : "
for temp_performance in range(len(performance_type)):
result[temp_classifier][temp_performance] /= loops
result_str += str(
performance_type[temp_performance] + ": %.3lf" % (result[temp_classifier][temp_performance]) + "\t")
print("loops= ", loops, "result= ", result_str)
print("-**-----------------------------------------**--")
if __name__ == '__main__':
dataset_file_path = "Benchmark/"
a = os.listdir(dataset_file_path)
for i in a[1:5:2]:
dataset = str(dataset_file_path + i)
print("当前数据集为:", i)
test(dataset, 10)
4.运行结果