'''python2.7''' import numpy as np from random import randrange from sklearn.preprocessing import normalize def distanceNorm(Norm, D_value): # initialization # Norm for distance if Norm == '1': counter = np.absolute(D_value); counter = np.sum(counter); elif Norm == '2': counter = np.power(D_value, 2); counter = np.sum(counter); counter = np.sqrt(counter); elif Norm == 'Infinity': counter = np.absolute(D_value); counter = np.max(counter); else: raise Exception('We will program this later......'); return counter; def fit(features, labels, iter_ratio): # initialization (n_samples, n_features) = np.shape(features) distance = np.zeros((n_samples, n_samples)) weight = np.zeros(n_features) if iter_ratio >= 1.0: # compute distance for index_i in range(0, n_samples): for index_j in range(index_i + 1, n_samples): D_value = features[index_i] - features[index_j] distance[index_i, index_j] = distanceNorm('2', D_value) distance += distance.T else: pass # start iteration for iter_num in range(0, int(iter_ratio * n_samples)): # print iter_num; # initialization nearHit = list() nearMiss = list() distance_sort = list() # random extract a sample index_i = randrange(0, n_samples, 1) self_features = features[index_i] # search for nearHit and nearMiss if iter_ratio >= 0.5: distance[index_i, index_i] = np.max(distance[index_i]) # filter self-distance for index in range(0, n_samples): distance_sort.append([distance[index_i, index], index, labels[index]]) else: # compute distance respectively distance = np.zeros(n_samples) for index_j in range(0, n_samples): D_value = features[index_i] - features[index_j] distance[index_j] = distanceNorm('2', D_value) distance[index_i] = np.max(distance) # filter self-distance for index in range(0, n_samples): distance_sort.append([distance[index], index, labels[index]]) distance_sort.sort(key=lambda x: x[0]) for index in range(0, n_samples): if nearHit == [] and distance_sort[index][2] == labels[index_i]: nearHit = features[distance_sort[index][1]] elif nearMiss == [] and distance_sort[index][2] != labels[index_i]: nearMiss = features[distance_sort[index][1]] elif nearHit != [] and nearMiss != []: break else: continue # update weight weight = weight - np.power(self_features - nearHit, 2) + np.power(self_features - nearMiss, 2) print(weight / (iter_ratio * n_samples)) return weight / (iter_ratio * n_samples) X = normalize(X=np.array([[1, 2, 3], [1, 3, 3], [1, 5, 4], [1, 2, 8], [1, 1, 9], [1, 2, 10]]), norm='l2', axis=0) Y = [1, 1, 1, 0, 0, 0] Y = np.array(Y) for i in range(0, 100): weight = fit(X, Y, 1) print(np.average(weight[0]), np.average(weight[1]), np.average(weight[2]))
(Python版本)Relief算法有效的对特征进行选择
最新推荐文章于 2020-12-10 12:29:50 发布