目录
常规index
from elasticsearch import Elasticsearch
es = Elasticsearch('http://xx:9200')
print(es.indices.create(index='test01'))
mapping类型的index
import requests
from elasticsearch import Elasticsearch
es = Elasticsearch('http://xx:9200')
mapping = {'properties':{
'vector':{
'type': 'dense_vector',
'dims': 3,
'index': True,
'similarity': 'dot_product'
}
}
}
print(es.indices.create(index='test03', mappings=mapping))
插入1条向量
import requests
from elasticsearch import Elasticsearch
es = Elasticsearch('http://xxx:9200')
res = es.index(index='test03',document={'vector': [0.1,0.1,0.98995]})
print(res)
{'_index': 'test03', '_id': 'h_v5TYkBNiG-_5JLvCS6', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
计算相似度
import requests
from elasticsearch import Elasticsearch
es = Elasticsearch('http://xx:9200')
knn = {'field': 'vector',
'query_vector': [0.1,0.98995,0.1],
'k': 1,
'num_candidates': 100}
res = es.search(index='test03',knn=knn)
print(res)
{'took': 1, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 0.60399497, 'hits': [{'_index': 'test03', '_id': 'h_v5TYkBNiG-_5JLvCS6', '_score': 0.60399497, '_source': {'vector': [0.1, 0.1, 0.98995]}}]}}
分值翻译
参考:https://www.elastic.co/guide/en/elasticsearch/reference/8.6/dense-vector.html
点积值 = (es返回的score)x2 -1
-
[0.1,0.98995,0.1]
-
[0.1,0.1,0.98995]
a = [0.1,0.1,0.98995]
b = [0.1,0.98995,0.1]
IP = 0
for i,j in zip(a,b):
IP += i*j
print(IP)
es_score = 0.60399497
print(2*es_score-1)
0.20799
0.20798994000000004
封装成类
class EsVector:
def __init__(self):
self.es = Elasticsearch('http://xx:9200')
self.indexName = 'test03'
mapping = {'properties':{
'vector':{
'type': 'dense_vector',
'dims': 512,
'index': True,
'similarity': 'dot_product'}}}
if self.es.indices.exists(index=self.indexName):
self.es.indices.delete(index=self.indexName)
self.es.indices.create(index=self.indexName, mappings=mapping)
print('delete index={}. create index={}'.format(self.indexName,self.indexName))
else:
self.es.indices.create(index=self.indexName, mappings=mapping)
print('create index={}'.format(self.indexName))
def vectorNum(self):
n = self.es.count(index=self.indexName)['count']
return n
def close(self):
self.es.close()
def insert(self,vector):
res = self.es.index(index=self.indexName, document={'vector': vector[0]})
self.es.indices.refresh(index=self.indexName)
return res['_id']
def search(self,vector):
knn = {'field': 'vector', 'query_vector': None, 'k': 1, 'num_candidates': 100}
knn['query_vector'] = vector[0]
res = self.es.search(index=self.indexName, knn=knn)
id = res['hits']['hits'][0]['_id']
ip = res['hits']['hits'][0]['_score']
true_ip = 2 * ip -1
return [id,true_ip]