腾讯云向量数据库新体验

最新推荐文章于 2024-06-14 10:20:40 发布

学习中的程序媛~

最新推荐文章于 2024-06-14 10:20:40 发布

阅读量189

点赞数 3

文章标签：腾讯云云计算

本文链接：https://blog.csdn.net/m0_62458145/article/details/139114875

版权

首先去腾讯云领取一个测试版

在控制台管理自己的实例

接下来就是在代码中去进行一些对于数据库的增删改查操作

创建数据库

import tcvectordb
from tcvectordb.model.enum import ReadConsistency

#create a database client object
client = tcvectordb.VectorDBClient(url='http://10.0.X.X', username='root', key='eC4bLRy2va******************************', read_consistency=ReadConsistency.EVENTUAL_CONSISTENCY, timeout=30)
# create a database
db = client.create_database(database_name='db-test')

print(db.database_name)

新建collection

import tcvectordb
from tcvectordb.model.enum import ReadConsistency
from tcvectordb.model.enum import FieldType, IndexType, MetricType, EmbeddingModel
from tcvectordb.model.index import Index, VectorIndex, FilterIndex, HNSWParams
from tcvectordb.model.collection import Embedding

#create a database client object
client = tcvectordb.VectorDBClient(url='http://10.0.X.X', username='root', key='eC4bLRy2va******************************', read_consistency=ReadConsistency.EVENTUAL_CONSISTENCY, timeout=30)
db = client.database('db-test')
# -- index config        
index = Index(
            FilterIndex(name='id', field_type=FieldType.String, index_type=IndexType.PRIMARY_KEY),
            VectorIndex(name='vector', dimension=768, index_type=IndexType.HNSW,
                        metric_type=MetricType.COSINE, params=HNSWParams(m=16, efconstruction=200)),
            FilterIndex(name='author', field_type=FieldType.String, index_type=IndexType.FILTER),
            FilterIndex(name='tags', field_type=FieldType.String, index_type=IndexType.FILTER),
            FilterIndex(name='bookName', field_type=FieldType.String, index_type=IndexType.FILTER)          
        )
        
# Embedding config
ebd = Embedding(vector_field='vector', field='text', model=EmbeddingModel.BGE_BASE_ZH)

# create a collection        
coll = db.create_collection(
            name='book-emb',
            shard=1,
            replicas=0,
            description='this is a collection of test embedding',
            embedding=ebd,
            index=index
        )
print(vars(coll))

查询指定的collection

import tcvectordb
from tcvectordb.model.enum import ReadConsistency

# create a database client object
client = tcvectordb.VectorDBClient(url='http://10.0.X.X', username='root', key='eC4bLRy2va******************************', read_consistency=ReadConsistency.EVENTUAL_CONSISTENCY, timeout=30)
# 指定 Base 类数据库
db = client.database('db-test')
# 查询 Base 类数据库下的集合
res = db.describe_collection('book-emb')
print(vars(res))

插入数据

import tcvectordb
from tcvectordb.model.collection import Embedding, UpdateQuery
from tcvectordb.model.document import Document, Filter, SearchParams
from tcvectordb.model.enum import FieldType, IndexType, MetricType, EmbeddingModel
from tcvectordb.model.index import Index, VectorIndex, FilterIndex, HNSWParams, IVFFLATParams
from tcvectordb.model.enum import FieldType, IndexType, MetricType, ReadConsistency
#create a database client object
client = tcvectordb.VectorDBClient(url='http://10.0.X.X', username='root', key='eC4bLRy2va******************************', read_consistency=ReadConsistency.EVENTUAL_CONSISTENCY, timeout=30)
# 指定写入原始文本的数据库与集合
db = client.database('db-test')
coll = db.collection('book-emb')

# 写入数据。
# 参数 build_index 为 True,指写入数据同时重新创建索引。
res = coll.upsert(
            documents=[
                Document(id='0001', text="话说天下大势，分久必合，合久必分。", author='罗贯中', bookName='三国演义', page=21),
                Document(id='0002', text="混沌未分天地乱，茫茫渺渺无人间。", author='吴承恩', bookName='西游记', page=22),
                Document(id='0003', text="甄士隐梦幻识通灵，贾雨村风尘怀闺秀。", author='曹雪芹', bookName='红楼梦', page=23)  
            ],
            build_index=True
        )

基于原文本查询

import tcvectordb
from tcvectordb.model.enum import FieldType, IndexType, MetricType, EmbeddingModel, ReadConsistency
from tcvectordb.model.index import Index, VectorIndex, FilterIndex, HNSWParams
from tcvectordb.model.document import Document, Filter, SearchParams

#create a database client object
client = tcvectordb.VectorDBClient(url='http://10.0.X.X', username='root', key='eC4bLRy2va******************************', read_consistency=ReadConsistency.EVENTUAL_CONSISTENCY, timeout=30)

db = client.database('db-test')
coll = db.collection('book-emb')

# search by text
# embeddingItems 指定了检索的文本
# filter 指定了过滤条件
# params 指定索引类型对应的查询参数，HNSW 类型需要设置 ef，指定查询的遍历范围;IVF 系列需要设置 nprobe,指定查询的单位数量

# limit 指定返回最相似的 Top K 条结果。如果插入的数据不足 K 条，则返回实际插入的 Document 数量。
# output_fields 指定输出字段 
doc_lists = coll.searchByText(
                 embeddingItems=['天下大势，分久必合，合久必分'],
                 filter=Filter(Filter.In("bookName",["三国演义", "西游记"])),
                 params=SearchParams(ef=200),
                 limit=3,
                 retrieve_vector=False,
                 output_fields=['bookName','author']
             )           
# printf             
for i, docs in enumerate(doc_lists.get("documents")):
                print(i)
                for doc in docs:
                        print(doc)

但是我本来想用他的Embedding但是可惜有限制的

学习中的程序媛~

关注

3
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
腾讯云向量数据库新体验

接下来就是在代码中去进行一些对于数据库的增删改查操作。但是我本来想用他的Embedding但是可惜有限制的。查询指定的collection。首先去腾讯云领取一个测试版。新建collection。在控制台管理自己的实例。
复制链接

扫一扫