223 /*xids:外部指定的每个向量的唯一标识*/
224 void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
225 float *residuals_2, const idx_t *precomputed_idx)
226 {
227
228 idx_t bs = 32768;
229 if (n > bs) {
230 for (idx_t i0 = 0; i0 < n; i0 += bs) {
231 idx_t i1 = std::min(i0 + bs, n);
232 if (verbose) {
233 printf("IndexIVFPQ::add_core_o: adding %ld:%ld / %ld\n",
234 i0, i1, n);
235 }
236 add_core_o (i1 - i0, x + i0 * d,
237 xids ? xids + i0 : nullptr,
238 residuals_2 ? residuals_2 + i0 * d : nullptr,
239 precomputed_idx ? precomputed_idx + i0 : nullptr); ///递归
240 }
241 return;
242 }
243
244 InterruptCallback::check();
246 FAISS_THROW_IF_NOT (is_trained);
247 double t0 = getmillisecs ();
248 const idx_t * idx;
249 ScopeDeleter<idx_t> del_idx;
250
251 if (precomputed_idx) {
252 idx = precomputed_idx;
253 } else {
254 idx_t * idx0 = new idx_t [n];
255 del_idx.set (idx0);
256 quantizer->assign (n, x, idx0); ///得到idx(查询到的每个向量对应中心点的idx)
257 idx = idx0;
258 }
259
260 double t1 = getmillisecs ();
261 uint8_t * xcodes = new uint8_t [n * code_size];
262 ScopeDeleter<uint8_t> del_xcodes (xcodes);
263
264 const float *to_encode = nullptr;
265 ScopeDeleter<float> del_to_encode;
266
267 if (by_residual) {
268 to_encode = compute_residuals (quantizer, n, x, idx); ///计算残差:to_encode
269 del_to_encode.set (to_encode);
270 } else {
271 to_encode = x;
272 }
273 pq.compute_codes (to_encode, xcodes, n); ///计算编码,
残差向量在残差量化器中也对应中心点,因为是pq所以有多个子空间,把向量对应的每个子空间的id组合成残差量化器的编码(计算距离时使用)
274
275 double t2 = getmillisecs ();
276 // TODO: parallelize?
277 size_t n_ignore = 0;
278 for (size_t i = 0; i < n; i++) {
279 idx_t key = idx[i]; ///二级聚类中心点编码
280 if (key < 0) {
281 n_ignore ++;
282 if (residuals_2)
283 memset (residuals_2, 0, sizeof(*residuals_2) * d);
284 continue;
285 }
286 idx_t id = xids ? xids[i] : ntotal + i; ///向量的唯一标识,可以自动生成,也可以累加
287
288 uint8_t *code = xcodes + i * code_size; ///对应的残差编码
289 size_t offset = invlists->add_entry (key, id, code); ///存入倒排,把数据copy到key对应的内存块
290
291 if (residuals_2) {
292 float *res2 = residuals_2 + i * d;
293 const float *xi = to_encode + i * d;
294 pq.decode (code, res2);
295 for (int j = 0; j < d; j++)
296 res2[j] = xi[j] - res2[j];
297 }
298
299 if (maintain_direct_map)
300 direct_map.push_back (key << 32 | offset);
301 }
302
303
304 double t3 = getmillisecs ();
305 if(verbose) {
306 char comment[100] = {0};
307 if (n_ignore > 0)
308 snprintf (comment, 100, "(%ld vectors ignored)", n_ignore);
309 printf(" add_core times: %.3f %.3f %.3f %s\n",
310 t1 - t0, t2 - t1, t3 - t2, comment);
311 }
312 ntotal += n;
313 }