这里的python部分:
利用knn获取k=5时,所有的边和边edges的权重costs
import numpy as np
from itertools import permutations,combinations
from sklearn.datasets import load_iris
from scipy.spatial.distance import pdist,squareform
import matlab
import matlab.engine
X,y = load_iris(return_X_y=True)
engine = matlab.engine.start_matlab()
N = len(y)
neiNum = 5
dist_list = pdist(X,metric='euclidean')
dist_matrix = squareform(dist_list)
nn_matrix = np.zeros((N,5))
nn_list = []
nn_cost = []
for i in range(N):
ord_idx = np.argsort(dist_matrix[i])
neibor = []
for j in range(neiNum+1):
if i != ord_idx[j]:
neibor.append(ord_idx[j])
neibor = np.array(neibor)
nn_matrix[i] = neibor
for i in range(N-1):
for j in range(i,N):
if i in nn_matrix[j] or j in nn_matrix[i]:
nn_list.append([i,j])
nn_cost.append(dist_matrix[i,j])
print(len(nn_list))
print(len(nn_cost))
print(len(nn_matrix))
for i in range(len(nn_list)):
nn_list[i][0] += 1
nn_list[i][1] += 1
nn_list = np.array(nn_list)
nn_cost = np.array(nn_cost)
nn_cost = np.vstack(nn_cost)
e = matlab.double(nn_list.tolist())
cost = matlab.double(nn_cost.tolist())
hyperGraph = engine.sws(e,cost,N)
hyperGraph = np.array(hyperGraph)
hyperGraph = hyperGraph.T
print(hyperGraph.shape)
print(hyperGraph)
接下来是:调用Pulak Purkait写的Swendsen-Wang Sampling的matlab代码
% function inds = sws(e, cost, f, OPTIONS)
function inds = sws(e, cost, N)
% Follows Swendsen-Wang cut of Barbu and Zhu.
%-------
% Input:
%-------
% e (|E|x2) : Edges.
% cost (|E|x1) : Edge costs.
% f (|V|x1) : label of the vertices
% OPTIONS : Options for rcm sampling
%--------
% Output:
%--------
% inds : Sampled hyperedges
% No_Edge = OPTIONS.c;
% n = OPTIONS.n-1; % Its a dense hypergrapgh, model is evaluated rest of the points
% lambda = OPTIONS.lambda;
No_Edge = 200;
n = 5;
lambda = 3;
N = double(N)
% N = length(f);
f = ones(N,1);
csts = cost.^2;
mn_csts = mean(csts);
q = exp(-csts./(lambda^2*mn_csts)); % Edge probabilities.
% Sample the connected component.
% Edges that remain 'on' due to same labels.
eon_det = f(e(:,1))==f(e(:,2));
inds = zeros(n, No_Edge);
k = 1;
while (k <= No_Edge)
% Edges that are turned 'on' stochastically.
eon_sto = rand(length(q),1)<=q;
% Either the edge is already 'off' due to different labels, or
% the edge is turned 'off' stochastically.
eon = logical(eon_det.*eon_sto);
% Get current set of connected components.
Eon = sparse(e(eon,1),e(eon,2),ones(sum(eon),1),N,N);
Eon = Eon + Eon'; % Make it symmetric.
[S,C] = graphconncomp(Eon);
% Pick a connected component R probabilistically.
q2 = q(eon);
w = ones(1,S);
for s=1:S
id = find(C==s);
if numel(id) < n % Remove small clusters having less than 8 points
w(s) = 1e-1000;
else
[X, Y] = meshgrid(id, id);
[vl, idb, idc] = intersect(e(eon, :), [X(:), Y(:)], 'rows');
w(s) = sum(q2(idb))/(numel(idb));
% w(s) = sum(q(idb))/(numel(id)*nn);
end
end
if sum(w) < eps
continue;
end
for i=1:sum(w>0)
if k > No_Edge
return;
end
R = randsample(S,1,true,w);
% indices of points in cluster k and its size
inds_k = find(C == R);
rsmpl = randsample(inds_k, n)';
flag = 1;
for kk =1:k-1
id = double(inds(:, kk) == rsmpl);
if sum(id) == n
flag = 0;
end
end
if flag
inds(:,k) = rsmpl;
k = k+1;
end
end
end
有时间了在用python实现sws