faiss IndexIVFPQ 源码详解 - add过程

最新推荐文章于 2024-01-05 13:36:34 发布

狂奔的工程师

最新推荐文章于 2024-01-05 13:36:34 发布

阅读量481

点赞数

分类专栏： # faiss

版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。

本文链接：https://blog.csdn.net/yong8502/article/details/117062784

版权

faiss 专栏收录该内容

12 篇文章 1 订阅

订阅专栏

223 /*xids:外部指定的每个向量的唯一标识*/

224 void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,

225 float *residuals_2, const idx_t *precomputed_idx)

226 {

227

228 idx_t bs = 32768;

229 if (n > bs) {

230 for (idx_t i0 = 0; i0 < n; i0 += bs) {

231 idx_t i1 = std::min(i0 + bs, n);

232 if (verbose) {

233 printf("IndexIVFPQ::add_core_o: adding %ld:%ld / %ld\n",

234 i0, i1, n);

235 }

236 add_core_o (i1 - i0, x + i0 * d,

237 xids ? xids + i0 : nullptr,

238 residuals_2 ? residuals_2 + i0 * d : nullptr,

239 precomputed_idx ? precomputed_idx + i0 : nullptr); ///递归

240 }

241 return;

242 }

243

244 InterruptCallback::check();

246 FAISS_THROW_IF_NOT (is_trained);

247 double t0 = getmillisecs ();

248 const idx_t * idx;

249 ScopeDeleter<idx_t> del_idx;

250

251 if (precomputed_idx) {

252 idx = precomputed_idx;

253 } else {

254 idx_t * idx0 = new idx_t [n];

255 del_idx.set (idx0);

256 quantizer->assign (n, x, idx0); ///得到idx(查询到的每个向量对应中心点的idx)

257 idx = idx0;

258 }

259

260 double t1 = getmillisecs ();

261 uint8_t * xcodes = new uint8_t [n * code_size];

262 ScopeDeleter<uint8_t> del_xcodes (xcodes);

263

264 const float *to_encode = nullptr;

265 ScopeDeleter<float> del_to_encode;

266

267 if (by_residual) {

268 to_encode = compute_residuals (quantizer, n, x, idx); ///计算残差:to_encode

269 del_to_encode.set (to_encode);

270 } else {

271 to_encode = x;

272 }

273 pq.compute_codes (to_encode, xcodes, n); ///计算编码,

残差向量在残差量化器中也对应中心点，因为是pq所以有多个子空间，把向量对应的每个子空间的id组合成残差量化器的编码(计算距离时使用)

274

275 double t2 = getmillisecs ();

276 // TODO: parallelize?

277 size_t n_ignore = 0;

278 for (size_t i = 0; i < n; i++) {

279 idx_t key = idx[i]; ///二级聚类中心点编码

280 if (key < 0) {

281 n_ignore ++;

282 if (residuals_2)

283 memset (residuals_2, 0, sizeof(*residuals_2) * d);

284 continue;

285 }

286 idx_t id = xids ? xids[i] : ntotal + i; ///向量的唯一标识，可以自动生成，也可以累加

287

288 uint8_t *code = xcodes + i * code_size; ///对应的残差编码

289 size_t offset = invlists->add_entry (key, id, code); ///存入倒排,把数据copy到key对应的内存块

290

291 if (residuals_2) {

292 float *res2 = residuals_2 + i * d;

293 const float *xi = to_encode + i * d;

294 pq.decode (code, res2);

295 for (int j = 0; j < d; j++)

296 res2[j] = xi[j] - res2[j];

297 }

298

299 if (maintain_direct_map)

300 direct_map.push_back (key << 32 | offset);

301 }

302

303

304 double t3 = getmillisecs ();

305 if(verbose) {

306 char comment[100] = {0};

307 if (n_ignore > 0)

308 snprintf (comment, 100, "(%ld vectors ignored)", n_ignore);

309 printf(" add_core times: %.3f %.3f %.3f %s\n",

310 t1 - t0, t2 - t1, t3 - t2, comment);

311 }

312 ntotal += n;

313 }

狂奔的工程师

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
faiss IndexIVFPQ 源码详解 - add过程

223 /*xids:外部指定的每个向量的唯一标识*/224 void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,225 float *residuals_2, const idx_t *precomputed_idx)226 {227228 idx_t bs = 32768;229 if (n &g...
复制链接

扫一扫

专栏目录

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。