是一个深度聚类的程序用了sklearn的PCA进行数据降维,然后在把数据降维后,在计算loss的时候出现了问题
def cluster(self, data, verbose=False):
"""Performs k-means clustering.
Args:
x_data (np.array N * dim): data to cluster
"""
end = time.time()
# PCA-reducing, whitening and L2-normalization
xb = preprocess_features(data)
# cluster the data
I, loss = run_kmeans(xb, self.k, verbose)
self.images_lists = [[] for i in range(self.k)]
for i in range(len(data)):
self.images_lists[I[i]].append(i)
if verbose:
print('k-means time: {0:.0f} s'.format(time.time() - end))
return loss
PCA部分在这里
def preprocess_features(npdata, pca=256):
_, ndim = npdata.shape
npdata = npdata.astype('float32')
# Apply PCA-whitening with sklearn
pca = PCA(n_components=pca, whiten=True)
npdata = pca.fit_transform(npdata)
# L2 normalization
normalizer = Normalizer(norm='l2')
npdata = normalizer.fit_transform(npdata)
return npdata
#def preprocess_features(npdata, pca=256):
# """Preprocess an array of features.
# Args:
# npdata (np.array N * ndim): features to preprocess
# pca (int): dim of output
# Returns:
# np.array of dim N * pca: data PCA-reduced, whitened and L2-normalized
# """
# _, ndim = npdata.shape
# npdata = npdata.astype('float32')
# Apply PCA-whitening with Faiss
# mat = faiss.PCAMatrix (ndim, pca, eigen_power=-0.5)
# mat.train(npdata)
# assert mat.is_trained
# npdata = mat.apply_py(npdata)
# L2 normalization
# row_sums = np.linalg.norm(npdata, axis=1)
# npdata = npdata / row_sums[:, np.newaxis]
# return npdata
这一部分有两种PCA方法,一种是sklearn的,另一种是faiss的两个都出现了问题。
sklearn的出现的问题是这个
faiss的问题是这样的
有没有大神可以帮忙解决一下呀,谢谢啦