1.模型生成类
import numpy as np
import json
d = 512 # 向量维度
nb = 500000 # 向量集大小
#np.random.seed(1234) # 随机种子,使结果可复现
#xb = np.random.random((nb, d)).astype('float32')
#xb[:, 0] += np.arange(nb) / 1000. +100
feature_list = []
feature_appNo = []
file = open("/home/lwf/yang/train/50aa")
for line in file:
source = json.loads(line)
feature_list.append(source['_source']['_aknn_vector'])
feature_appNo.append(source['_id'][0:10])
#print(source['_source']['_aknn_vector'])
#print(source['_id'])
file.close()
#print("feature_list" ,feature_list[0:10])
#print("feature_appNo" ,feature_appNo[0:10])
feature_list_np = np.asarray(feature_list).astype('float32')
#feature_appNo_np = np.asarray(feature_appNo).astype('int')
feature_appNo_np = np.asarray(feature_appNo).astype('int')
#print(type(feature_appNo_np))
#print(feature_appNo_np)
#print("xb",xb)
#print("xb,0",xb[:,0])
import faiss
nlist = 100
m = 8
k = 4
quantizer = faiss.IndexFlatL2(d) # 内部的索引方式依然不变
index = faiss.IndexIVFPQ(quantizer, d, nlist, m, 8)
# 每个向量都被编码为8个字节大小
index.train(feature_list_np)
#index.add_with_ids(feature_list_np, feature_appNo_np)
index2 = faiss.IndexIDMap(index)
index2.add_with_ids(feature_list_np, feature_appNo_np)
faiss.write_index(index2,"large.index") #保存索引