安装
conda install faiss-gpu=1.5.3 ## python=3.6 可以正常使用k-means的版本
conda install -c pytorch faiss-gpu
#安装gpu版本
#确保已经安装了CUDA,否则会自动安装cup版本。
conda install faiss-gpu -c pytorch # 默认 For CUDA8.0
conda install faiss-gpu cuda90 -c pytorch # For CUDA9.0
conda install faiss-gpu cuda91 -c pytorch # For CUDA9.1
使用
Kmeans
#导入faiss
import sys
sys.path.append('/home/maliqi/faiss/python/')
import faiss
#数据
import numpy as np
d = 512 #维数
n_data = 2000
np.random.seed(0)
data = []
mu = 3
sigma = 0.1
for i in range(n_data):
data.append(np.random.normal(mu, sigma, d))
data = np.array(data).astype('float32') # 注意数据类型 !!!
# 聚类
ncentroids = 1024
niter = 20
verbose = True
d = data.shape[1]
kmeans = faiss.Kmeans(d, ncentroids)
kmeans.train(data)
#输出聚类中心
print(kmeans.centroids)
HNSW
-
检索 : Approximate Nearest Neighbor NSW + HNSW https://blog.csdn.net/ResumeProject/article/details/122182837
-
Faiss中HNSW代码讲解:https://www.bilibili.com/video/BV1q8411t7YU/?
-
https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
向量编码Index
- 可用的编码是(从最小压缩到最强压缩):
完全没有编码():向量在没有压缩的情况下存储;IndexFlat
16 位浮点编码(带):向量被压缩为 16 位浮点数,这可能会导致一些精度损失;IndexScalarQuantizerQT_fp16
8/6/4 位整数编码(带 //):量化为 256/64/16 级的向量;IndexScalarQuantizerQT_8bitQT_6bitQT_4bit
PQ编码():向量被分成子向量,每个子向量被量化为几位(通常为8位)。请参阅下面的示例。IndexPQ
残差编码():向量被量化并通过残差逐步细化。在每个量化阶段,可以细化码本的大小。IndexResidual
Search
for sent_feat_info in doc_feat[1:]:# 循环句子
sent_id, sent, sec, text_emb = sent_feat_info
#text_emb = text_emb.cpu().numpy()
# fassi feature recall
emb_text = np.array(text_emb).astype(np.float32)
D, I = self.index.search(emb_text, topk)# 检索
res_imgid = [self.imgidx[x] for x in I.tolist()[0]]
res_sim = D.tolist()[0]
res_emb = [self.data[x] for x in I.tolist()[0]]
print (sent, sec, res_imgid)
doc_fas.append([title, sent, sec, res_imgid, res_sim, res_emb])# 结果转换保存
out_info.append(doc_fas)
return out_info
参考与更多
faiss documentation python
https://github.com/facebookresearch/faiss/wiki/
https://github.com/CCCBora/faiss_chat
- FAISS Chat: 和本地数据库聊天!
- https://www.bilibili.com/video/BV11k4y1W7ZE/?
# faiss implementation of k-means
clus = faiss.Clustering(d, nmb_clusters)
Faiss building blocks: clustering, PCA, quantization
Runs kmeans on 1 GPU.
deepcluster = Kmeans(args.n_clusters, knn=1)
clustering_loss, _ = deepcluster.cluster(features, verbose=True)
return deepcluster.centroids, clustering_loss
class Kmeans:
def __init__(self, k, knn=1):
self.k = k
self.knn = knn
self.centroids = None
self.labels = None
self.images_lists = []
self.dists = None
def cluster(self, data, verbose=False):
"""Performs k-means clustering.
Args:
x_data (np.array N * dim): data to cluster
"""
start = time.time()
# cluster the data
labels, loss, self.centroids, self.dists = run_kmeans(data, self.k, verbose, self.knn)
self.labels = labels
self.images_lists = [[] for i in range(self.k)]
for i in range(len(data)):
self.images_lists[labels[i]].append(i)
if verbose:
print('k-means time: {0:.0f} s'.format(time.time() - start))
return loss, self.dists
def run_kmeans(x, nmb_clusters, verbose=False, knn=1):
"""Runs kmeans on 1 GPU.
Args:
x: data
nmb_clusters (int): number of clusters
Returns:
list: ids of data in each cluster
"""
n_data, d = x.shape # 560900 , 1152
# faiss implementation of k-means
nmb_clusters = int(nmb_clusters)
clus = faiss.Clustering(d, nmb_clusters) # d=1152,nmb_clusters=10
clus.niter = 30
clus.max_points_per_centroid = 10000000
res = faiss.StandardGpuResources()
flat_config = faiss.GpuIndexFlatConfig()
flat_config.useFloat16 = False
flat_config.device = 0
index = faiss.GpuIndexFlatL2(res, d, flat_config)
# perform the training
clus.train(x, index) # ndarray(560900,1152)
dists, labels = index.search(x, knn)
losses = faiss.vector_to_array(clus.obj)
centroids = faiss.vector_float_to_array(clus.centroids).reshape(nmb_clusters, d)
if verbose:
print('k-means loss evolution: {0}'.format(losses))
return [int(n[0]) for n in labels], losses[-1], centroids, dists
import time
import faiss
import torch
import numpy as np
def compute_features(dataloader, model, args, use_predict_fn=False, concat_vid=False, keep_dim=False):
cargs = args
if cargs.verbose:
print('Compute features')
start = time.time()
model.eval()
features = []
# discard the label information in the dataloader
for i, data_arr in enumerate(dataloader):
pose_data = data_arr[0]
with torch.no_grad():
data = pose_data.to(args.device)
if use_predict_fn:
pose_features = model.predict(data).data.to('cpu', non_blocking=True).numpy()
pose_features = pose_features.reshape(-1, 1)
else:
pose_features = model(data)
if isinstance(pose_features, (list, tuple)):
pose_features = pose_features[0]
pose_features = pose_features.data.to('cpu', non_blocking=True).numpy()
if concat_vid: # Concatenate each clip's video features to its pose embedding
vid_features = data_arr[1]
batch_features = np.concatenate([pose_features, vid_features], axis=1)
else:
batch_features = pose_features
features.append(batch_features)
# measure elapsed time
batch_time = time.time() - start
start = time.time()
if cargs.verbose and (i % 200) == 0:
print('{0} / {1}\t'
'Time: {batch_time:.3f})'
.format(i, len(dataloader), batch_time=batch_time))
features = np.concatenate(features)
if keep_dim:
n, c, t, v = data.size()
features = features.reshape(features.shape[0], -1, v)
return features
class Kmeans:
def __init__(self, k, knn=1):
self.k = k
self.knn = knn
self.centroids = None
self.labels = None
self.images_lists = []
self.dists = None
def cluster(self, data, verbose=False):
"""Performs k-means clustering.
Args:
x_data (np.array N * dim): data to cluster
"""
start = time.time()
# cluster the data
labels, loss, self.centroids, self.dists = run_kmeans(data, self.k, verbose, self.knn)
self.labels = labels
self.images_lists = [[] for i in range(self.k)]
for i in range(len(data)):
self.images_lists[labels[i]].append(i)
if verbose:
print('k-means time: {0:.0f} s'.format(time.time() - start))
return loss, self.dists
def run_kmeans(x, nmb_clusters, verbose=False, knn=1):
"""Runs kmeans on 1 GPU.
Args:
x: data
nmb_clusters (int): number of clusters
Returns:
list: ids of data in each cluster
"""
n_data, d = x.shape
# faiss implementation of k-means
nmb_clusters = int(nmb_clusters)
clus = faiss.Clustering(d, nmb_clusters)
clus.niter = 30
clus.max_points_per_centroid = 10000000
res = faiss.StandardGpuResources()
flat_config = faiss.GpuIndexFlatConfig()
flat_config.useFloat16 = False
flat_config.device = 0
index = faiss.GpuIndexFlatL2(res, d, flat_config)
# perform the training
clus.train(x, index)
dists, labels = index.search(x, knn)
losses = faiss.vector_to_array(clus.obj)# !!!
centroids = faiss.vector_float_to_array(clus.centroids).reshape(nmb_clusters, d)
if verbose:
print('k-means loss evolution: {0}'.format(losses))
return [int(n[0]) for n in labels], losses[-1], centroids, dists
if __name__ == "__main__":
data = np.random.randn(560900,1152)
labels, loss, centroids, dists = run_kmeans(data, 10, verbose=True, knn=1)
print(123)
https://pypi.tuna.tsinghua.edu.cn/simple/faiss-gpu/
geng
video:Milvus 问答 #12:新版本、Postgres向量检索插件、比Faiss好用?
-
Classification is a Strong Baseline for Deep Metric Learning (BMVC '19)
深度度量学习旨在学习一种将图像像素映射到嵌入特征向量的函数,该特征向量对图像之间的相似性进行建模。度量学习的两个主要应用是基于内容的图像检索和人脸验证。对于检索任务,大多数当前最先进的(SOTA)方法是基于三元组的非参数化训练。然而,对于人脸验证任务,最近的SOTA方法采用了基于分类的参数化训练。在本文中,我们研究了基于分类的方法在图像检索数据集上的有效性。我们评估了几个标准检索数据集,如CAR-196,CUB-200-2011,斯坦福在线产品和In-Shop数据集,用于图像检索和聚类,并确定我们基于分类的方法在不同的特征维度和基本特征网络中具有竞争力。我们进一步深入分析了子采样类的性能效应,以实现可扩展的基于分类的训练,以及二值化的影响,从而为实际应用提供高效的存储和计算。 -
当数据很多,聚类中心也很多时无法正常运行kmeans_pytorch
# pip install fast-pytorch-kmeans
from fast_pytorch_kmeans import KMeans
import torch
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
kmeans = KMeans(n_clusters=8, mode='euclidean', verbose=1)
x = torch.randn(100000, 64, device='cuda').float()
labels = kmeans.fit_predict(x)
# Traceback (most recent call last):
# File "/---/ubuntu/---/---/gepc-master/fasttest.py", line 12, in <module>
# labels = kmeans.fit_predict(x)
# File "/home/ubuntu/anaconda3/envs/ngepc/lib/python3.6/site-packages/fast_pytorch_kmeans/kmeans.py", line 215, in fit_predict
# self.num_points_in_clusters[matched_clusters] += counts
# RuntimeError: expected device cuda:0 and dtype Float but got device cuda:0 and dtype Long