腾讯云向量数据库新体验

首先去腾讯云领取一个测试版

在控制台管理自己的实例

 接下来就是在代码中去进行一些对于数据库的增删改查操作

创建数据库

import tcvectordb
from tcvectordb.model.enum import ReadConsistency

#create a database client object
client = tcvectordb.VectorDBClient(url='http://10.0.X.X', username='root', key='eC4bLRy2va******************************', read_consistency=ReadConsistency.EVENTUAL_CONSISTENCY, timeout=30)
# create a database
db = client.create_database(database_name='db-test')

print(db.database_name)

新建collection

import tcvectordb
from tcvectordb.model.enum import ReadConsistency
from tcvectordb.model.enum import FieldType, IndexType, MetricType, EmbeddingModel
from tcvectordb.model.index import Index, VectorIndex, FilterIndex, HNSWParams
from tcvectordb.model.collection import Embedding

#create a database client object
client = tcvectordb.VectorDBClient(url='http://10.0.X.X', username='root', key='eC4bLRy2va******************************', read_consistency=ReadConsistency.EVENTUAL_CONSISTENCY, timeout=30)
db = client.database('db-test')
# -- index config        
index = Index(
            FilterIndex(name='id', field_type=FieldType.String, index_type=IndexType.PRIMARY_KEY),
            VectorIndex(name='vector', dimension=768, index_type=IndexType.HNSW,
                        metric_type=MetricType.COSINE, params=HNSWParams(m=16, efconstruction=200)),
            FilterIndex(name='author', field_type=FieldType.String, index_type=IndexType.FILTER),
            FilterIndex(name='tags', field_type=FieldType.String, index_type=IndexType.FILTER),
            FilterIndex(name='bookName', field_type=FieldType.String, index_type=IndexType.FILTER)          
        )
        
# Embedding config
ebd = Embedding(vector_field='vector', field='text', model=EmbeddingModel.BGE_BASE_ZH)

# create a collection        
coll = db.create_collection(
            name='book-emb',
            shard=1,
            replicas=0,
            description='this is a collection of test embedding',
            embedding=ebd,
            index=index
        )
print(vars(coll))

查询指定的collection

import tcvectordb
from tcvectordb.model.enum import ReadConsistency

# create a database client object
client = tcvectordb.VectorDBClient(url='http://10.0.X.X', username='root', key='eC4bLRy2va******************************', read_consistency=ReadConsistency.EVENTUAL_CONSISTENCY, timeout=30)
# 指定 Base 类数据库
db = client.database('db-test')
# 查询 Base 类数据库下的集合
res = db.describe_collection('book-emb')
print(vars(res)) 

插入数据

import tcvectordb
from tcvectordb.model.collection import Embedding, UpdateQuery
from tcvectordb.model.document import Document, Filter, SearchParams
from tcvectordb.model.enum import FieldType, IndexType, MetricType, EmbeddingModel
from tcvectordb.model.index import Index, VectorIndex, FilterIndex, HNSWParams, IVFFLATParams
from tcvectordb.model.enum import FieldType, IndexType, MetricType, ReadConsistency
#create a database client object
client = tcvectordb.VectorDBClient(url='http://10.0.X.X', username='root', key='eC4bLRy2va******************************', read_consistency=ReadConsistency.EVENTUAL_CONSISTENCY, timeout=30)
# 指定写入原始文本的数据库与集合
db = client.database('db-test')
coll = db.collection('book-emb')

# 写入数据。
# 参数 build_index 为 True,指写入数据同时重新创建索引。
res = coll.upsert(
            documents=[
                Document(id='0001', text="话说天下大势,分久必合,合久必分。", author='罗贯中', bookName='三国演义', page=21),
                Document(id='0002', text="混沌未分天地乱,茫茫渺渺无人间。", author='吴承恩', bookName='西游记', page=22),
                Document(id='0003', text="甄士隐梦幻识通灵,贾雨村风尘怀闺秀。", author='曹雪芹', bookName='红楼梦', page=23)  
            ],
            build_index=True
        )

基于原文本查询

import tcvectordb
from tcvectordb.model.enum import FieldType, IndexType, MetricType, EmbeddingModel, ReadConsistency
from tcvectordb.model.index import Index, VectorIndex, FilterIndex, HNSWParams
from tcvectordb.model.document import Document, Filter, SearchParams

#create a database client object
client = tcvectordb.VectorDBClient(url='http://10.0.X.X', username='root', key='eC4bLRy2va******************************', read_consistency=ReadConsistency.EVENTUAL_CONSISTENCY, timeout=30)

db = client.database('db-test')
coll = db.collection('book-emb')

# search by text
# embeddingItems 指定了检索的文本
# filter 指定了过滤条件
# params 指定索引类型对应的查询参数,HNSW 类型需要设置 ef,指定查询的遍历范围;IVF 系列需要设置 nprobe,指定查询的单位数量

# limit 指定返回最相似的 Top K 条结果。如果插入的数据不足 K 条,则返回实际插入的 Document 数量。
# output_fields 指定输出字段 
doc_lists = coll.searchByText(
                 embeddingItems=['天下大势,分久必合,合久必分'],
                 filter=Filter(Filter.In("bookName",["三国演义", "西游记"])),
                 params=SearchParams(ef=200),
                 limit=3,
                 retrieve_vector=False,
                 output_fields=['bookName','author']
             )           
# printf             
for i, docs in enumerate(doc_lists.get("documents")):
                print(i)
                for doc in docs:
                        print(doc)
              

但是我本来想用他的Embedding但是可惜有限制的

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值