算法
整体流程:通过两阶段实例选择技术快速选取代表实例,再采用自适应的映射方法将包转换成单示例
实例原型选择:
基于包内实例的优先级,选出具有包内部结构特征的实例原型:
优先级:
密度:、
关联值:
实例原型:
代表实例选择:
实例选择技术从
T
T
T 中选择一组峰值密度较大的实例原型作为代表实例,并构建代表实例池
R
R
R
指标:
与高密度点的最近距离:
代表实例:从
T
T
T 中选取前 $n_r $个
λ
i
λ_i
λi 值最大的实例原型,构成代表实例池
R
R
R
自适应包映射技术:
分量:
叠加:
代码
实例原型选择:
DIP_demo = DIP.DIP(self.bags, self.ratio_ins)
return DIP_demo.discriminative_instance
实例原型类
class DIP:
def __init__(self, bags, scale_num):
self.bags = bags
self.scale_num = scale_num
self.inner_bag_distance = []
self.discriminative_instance = self.__get_discriminative_ins()
def __get_discriminative_ins(self):
for bag_i in range(self.bags.shape[0]):
self.__get_instance_in_bag(self.bags[bag_i])
return np.array(self.inner_bag_distance)
def __get_instance_in_bag(self, bag):
# Step 1. 计算实例之间的距离
ins_space_to_bag = []
for ins in bag[0][:, :-1]:
ins_space_to_bag.append(ins)
ins_space_to_bag = np.array(ins_space_to_bag)
distance_ins_to_ins = cdist(ins_space_to_bag, ins_space_to_bag)
# Step 2. 计算每个实例的关联
affinity_ins_to_ins = get_cosine(ins_space_to_bag)
affinity_ins = np.zeros((len(distance_ins_to_ins), len(distance_ins_to_ins))).astype("int32")
affinity_ins_score = []
ave_dis_ins = affinity_ins_to_ins.mean()
for i in range(len(affinity_ins_to_ins)):
for j in range(len(affinity_ins_to_ins)):
if affinity_ins_to_ins[i, j] <= ave_dis_ins:
affinity_ins[i, j] = 1
affinity_ins_score.append(sum(affinity_ins[i]))
# Step 3. 计算每个实例的密度.
dis_cut = 0.4 * distance_ins_to_ins.max()
density_ins_score = np.zeros(len(distance_ins_to_ins)).astype("float64")
for i in range(len(distance_ins_to_ins)):
if dis_cut == 0:
density_ins_score[i] = 1
else:
density_ins_score[i] = sum(np.exp(-(distance_ins_to_ins[i] / dis_cut) ** 2))
# Step 4. 计算优先级.
lambda_ins_score = np.multiply(affinity_ins_score, density_ins_score).tolist()
# Step 5. 获得实例原型.
for i in range(math.ceil(self.scale_num * bag[0].shape[0])):
self.inner_bag_distance.append(ins_space_to_bag[lambda_ins_score.index(min(lambda_ins_score))])
lambda_ins_score[lambda_ins_score.index(min((lambda_ins_score)))] = -1
代表实例选择:
discriminative_instance = SDI(self.train_final_bag, self.ra_ins, self.num_dis_ins).final_discriminative_instance
代表实例类
class SDI:
def __init__(self, bags, ratio_instance_to_bag, num_SDI):
self.bags = bags
self.ratio_ins = ratio_instance_to_bag
self.num_SDI = num_SDI
self.discriminative_instance = self.__print_SDI()
self.final_discriminative_instance = self.__select_discriminative_instance()
def __print_SDI(self):
DIP_demo = DIP.DIP(self.bags, self.ratio_ins)
return DIP_demo.discriminative_instance
def __select_discriminative_instance(self):
# Step 1. 获得实例原型
discriminative_instance_distance = cdist(self.discriminative_instance, self.discriminative_instance)
# Step 2. 阶段距离
dis_cut = 0.4 * discriminative_instance_distance.max()
# 计算密度
density_discriminative_ins = np.zeros(len(discriminative_instance_distance)).astype("float64")
for i in range(len(discriminative_instance_distance)):
if dis_cut == 0:
density_discriminative_ins[i] = 1
else:
density_discriminative_ins[i] = sum(np.exp(-(discriminative_instance_distance[i] / dis_cut) ** 2))
# Step 3. 高密度点的最近距离
distance_closest = []
for i in range(len(density_discriminative_ins)):
more_density_instance_index = []
temp_density_instance = density_discriminative_ins[i]
for j in range(len(density_discriminative_ins)):
if density_discriminative_ins[j] > temp_density_instance:
more_density_instance_index.append(j)
temp_distance_more_instance = []
for index in range(0, len(more_density_instance_index)):
index_k = more_density_instance_index[index]
temp_distance_more_instance.append(discriminative_instance_distance[i][index_k])
if temp_distance_more_instance:
temp_distance_more_instance.sort()
distance_closest.append(temp_distance_more_instance[0])
else:
distance_closest.append(float('inf'))
# Step 4. 计算指标值.
lambda_discriminative_instance = np.multiply(distance_closest, density_discriminative_ins).tolist()
final_discriminative_instance = []
for i in range(self.num_SDI):
index_most = lambda_discriminative_instance.index(max(lambda_discriminative_instance))
final_discriminative_instance.append(self.discriminative_instance[index_most])
lambda_discriminative_instance[index_most] = -1
return np.array(final_discriminative_instance)
自适应包映射技术:
class DIE:
def __init__(self, all_bag, tr_index, bags_status, embed_status, ratio_instance_to_bag,
num_discriminative_instance):
self.bags = all_bag
self.bags_status = bags_status
self.embed_status = embed_status
self.tr_index = tr_index
self.ra_ins = ratio_instance_to_bag
self.num_dis_ins = num_discriminative_instance
self.train_final_bag = self.__get_bags()
self.embedding_vector = self.__embedding()
def __embedding(self):
# Step 1. 获得代表实例池
discriminative_instance = SDI(self.train_final_bag, self.ra_ins, self.num_dis_ins).final_discriminative_instance
# Step 2. 嵌入
bag_to_vector = []
for bag_i in range(self.bags.shape[0]):
temp_single_vector = []
if self.embed_status == 'add':
temp_single_vector = np.zeros(self.bags[bag_i][0].shape[1] - 1).astype("float64")
elif self.embed_status == 'con':
temp_single_vector = np.zeros(self.num_dis_ins * (self.bags[bag_i][0].shape[1] - 1)).astype("float64")
else:
print('Your input model is not exist!\n')
break
for ins_i in self.bags[bag_i][0][:, :-1]:
temp_distance_dis_to_ins = []
for dis_ins_i in range(self.num_dis_ins):
temp_distance_dis_to_ins.append(dis_euclidean(ins_i, discriminative_instance[dis_ins_i]))
temp_index = temp_distance_dis_to_ins.index(min(temp_distance_dis_to_ins))
temp_dis_to_ins_vector = ins_i - discriminative_instance[temp_index]
if self.embed_status == 'add':
temp_single_vector += temp_dis_to_ins_vector
elif self.embed_status == 'con':
start_index = 0
end_index = self.bags[bag_i][0].shape[1] - 1
temp_single_vector[start_index + temp_index * end_index:(temp_index + 1) * end_index] += (
temp_dis_to_ins_vector)
# Step 3. 归一化
temp_single_vector = np.sign(temp_single_vector) * np.sqrt(np.abs(temp_single_vector))
temp_norm = np.linalg.norm(temp_single_vector)
temp_single_vector = temp_single_vector / temp_norm
bag_to_vector.append(temp_single_vector)
return np.array(bag_to_vector)
def __get_bags(self):
if self.bags_status == 'g':
return self.bags[self.tr_index]
elif self.bags_status == 'p':
positive_bags_index = []
for i in range(len(self.tr_index)):
if self.bags[self.tr_index[i], -1] == 1:
positive_bags_index.append(self.tr_index[i])
return self.bags[positive_bags_index]
elif self.bags_status == 'n':
negative_bags_index = []
for i in range(len(self.tr_index)):
if not self.bags[self.tr_index[i], -1] == 1:
negative_bags_index.append(self.tr_index[i])
return self.bags[negative_bags_index]