def mann_cost(A, xx, S_m, D_m, label, m, alph, reg, mu = 1,s_type = 1):
"""Neighbourhood Components Analysis: cost function and gradients
ff, gg = mann_cost_log(A, xx, yy)
Evaluate a linear projection from a D-dim space to a K-dim space (K<=D).
See Goldberger et al. (2004).
Inputs:
A KxD Current linear transformation.
xx NxD Input data
yy Nx1 Corresponding labels, taken from any discrete set
Outputs:
ff 1x1 MANN cost function
gg KxD partial derivatives of ff wrt elements of A
Motivation: gradients in existing implementations, and as written in the
paper, have the wrong scaling with D. This implementation should scale
correctly for problems with many input dimensions.
Note: this function should be passed to a MINIMIZER.
"""
N, D = xx.shape
assert(A.shape[1] == D)
# projection function:
zz = np.dot(A, xx.T).T # KxN
# TODO Subsample part of data to compute loss on.
# kk = np.exp(-square_dist(zz.T, zz.T[idxs])) # Nxn
# kk[idxs, np.arange(len(idxs))] = 0
gg = np.zeros((D, D))
yy = 0
yy1 = 0
gg1 = np.zeros((D, D))
s_yy1 = 1
for i in range(N):
S_m_i = S_m[i]
D_m_i = D_m[i]
if len(S_m_i) <1:
continue
if len(D_m_i) <1:
continue
dist_s1 = square_dist(zz[i:i+1, :], zz[S_m_i, :])
dist_d1 = square_dist(zz[i:i+1, :], zz[D_m_i, :])
if 1 < 0:
if len(S_m_i) ==0:
print('S_m_i:0')
if len(D_m_i) == 0:
print('D_m_i')
ind_d_hard = dist_d1 < np.max(dist_s1)+1
dist_d1 = dist_d1[ind_d_hard]
if dist_d1.shape[0] ==0:
print('d_m_i:0')
ind_s_hard = dist_s1 > np.min(dist_d1)-1
dist_s1 = dist_s1[ind_s_hard]
else:
ind_d_hard = dist_d1 < np.max(dist_d1)+1
ind_s_hard = dist_s1 < np.max(dist_s1)+1
dist_s1 = dist_s1.reshape((dist_s1.shape[1],))
dist_d1 = dist_d1.reshape((dist_d1.shape[1],))
#ave_s = np.sum(dist_s1) / dist_s1.shape[0]
if alph < 0:
ave_s = np.max(dist_s1)
else:
ave_s = np.min(dist_s1)
dist_s = dist_s1 - ave_s
#ave_d = np.sum(dist_d1)/dist_d1.shape[0]
ave_d = np.min(dist_d1)
dist_d = dist_d1 - ave_d # to avoid the value too large to let the exp(dist_d1) be nan
dist_s_exp = np.exp(-alph * dist_s) + 1e-8
dist_d_exp = np.exp(-m * dist_d) + 1e-8
dist_s_exp[np.isnan(dist_s_exp)] = np.max(dist_s_exp[~np.isnan(dist_s_exp)])
dist_d_exp[np.isnan(dist_d_exp)] = np.max(dist_d_exp[~np.isnan(dist_d_exp)])
if s_type == 1:
t_s = -np.log(np.sum(dist_s_exp)/dist_s_exp.shape[0])/alph + ave_s + 1
t_d = -np.log(np.sum(dist_d_exp)/dist_d_exp.shape[0])/m + ave_d
else:
t_s = -np.log(np.sum(dist_s_exp)) / alph + 1
t_d = -np.log(np.sum(dist_d_exp))/m
t = t_s - t_d
if t > 0:
yy = yy + t
t_s_rate = dist_s_exp / np.sum(dist_s_exp)
t_d_rate = dist_d_exp / np.sum(dist_d_exp)
s_temp = np.dot((t_s_rate[np.newaxis,:].T * xx[np.array(S_m_i)[ind_s_hard[0,:]], :]).T, xx[np.array(S_m_i)[ind_s_hard[0,:]], :])
d_temp = np.dot((t_d_rate[np.newaxis,:].T * xx[np.array(D_m_i)[ind_d_hard[0,:]], :]).T, xx[np.array(D_m_i)[ind_d_hard[0,:]], :])
gg = gg + s_temp - d_temp
if np.isnan(gg.sum()):
print('nan')
gg1 = gg1 + np.dot(xx[(label[i] == label)[:, 0], :].T, xx[(label[i] == label)[:, 0], :])
yy1 = yy1 + np.sum(zz[(label[i] == label)[:, 0], :] ** 2)
s_yy1 = s_yy1 + np.sum(label[i] == label)
gg = gg / zz.shape[1] + mu * gg1 / s_yy1 + reg*np.eye(D)
yy = yy / zz.shape[1] + mu * yy1 / s_yy1 + reg * np.dot(A.ravel(), A.ravel())
#gg = np.dot(gg,A.T).T + 2 * mu * A
#gg = gg / np.sqrt(np.trace(np.dot(gg , gg.T)))
return yy, gg