欢迎来到Milvus的操作世界!我很高兴能带你深入了解Milvus的基本操作。这篇博客将详细介绍如何在Milvus中创建、删除和管理集合,插入、更新和删除向量,以及进行简单的查询和检索操作。我们将使用Java语言编写例子,并在每个操作中提供丰富的代码注释,帮助你更好地理解每一步。准备好了吗?让我们开始吧!
依赖引入
在开始之前,我们需要先引入必要的依赖。Milvus提供了Java客户端,我们需要在项目中添加相关依赖。
在你的pom.xml
文件中添加以下依赖:
<dependency>
<groupId>io.milvus</groupId>
<artifactId>milvus-sdk-java</artifactId>
<version>2.0.0</version>
</dependency>
连接到Milvus服务器
在进行操作之前,我们需要先连接到Milvus服务器。以下是连接代码示例:
import io.milvus.client.MilvusClient;
import io.milvus.client.MilvusServiceClient;
import io.milvus.param.ConnectParam;
public class MilvusExample {
public static void main(String[] args) {
// 创建Milvus客户端实例
MilvusClient client = new MilvusServiceClient(ConnectParam.newBuilder()
.withHost("localhost")
.withPort(19530)
.build());
System.out.println("Connected to Milvus server successfully!");
}
}
创建集合
Milvus中的集合相当于关系数据库中的表。创建集合需要定义集合的名称和字段。
import io.milvus.param.collection.CreateCollectionParam;
import io.milvus.param.collection.FieldType;
import io.milvus.param.collection.FieldType.DataType;
public class MilvusExample {
public static void main(String[] args) {
// 创建Milvus客户端实例
MilvusClient client = new MilvusServiceClient(ConnectParam.newBuilder()
.withHost("localhost")
.withPort(19530)
.build());
// 定义字段
FieldType idField = FieldType.newBuilder()
.withName("id")
.withDataType(DataType.Int64)
.withPrimaryKey(true)
.build();
FieldType vectorField = FieldType.newBuilder()
.withName("vector")
.withDataType(DataType.FloatVector)
.withDimension(128)
.build();
// 创建集合
CreateCollectionParam createCollectionParam = CreateCollectionParam.newBuilder()
.withCollectionName("example_collection")
.addFieldType(idField)
.addFieldType(vectorField)
.build();
client.createCollection(createCollectionParam);
System.out.println("集合创建成功!");
}
}
删除集合
删除集合非常简单,只需指定集合名称即可。
import io.milvus.param.collection.DropCollectionParam;
public class MilvusExample {
public static void main(String[] args) {
// 创建Milvus客户端实例
MilvusClient client = new MilvusServiceClient(ConnectParam.newBuilder()
.withHost("localhost")
.withPort(19530)
.build());
// 删除集合
DropCollectionParam dropCollectionParam = DropCollectionParam.newBuilder()
.withCollectionName("example_collection")
.build();
client.dropCollection(dropCollectionParam);
System.out.println("集合删除成功!");
}
}
插入向量
插入向量是向集合中添加数据的过程。我们需要准备向量数据并插入到指定集合中。
import io.milvus.param.dml.InsertParam;
import io.milvus.grpc.MutationResult;
import java.util.Arrays;
import java.util.List;
public class MilvusExample {
public static void main(String[] args) {
// 创建Milvus客户端实例
MilvusClient client = new MilvusServiceClient(ConnectParam.newBuilder()
.withHost("localhost")
.withPort(19530)
.build());
// 准备向量数据
List<Long> idList = Arrays.asList(1L, 2L, 3L);
List<List<Float>> vectorList = Arrays.asList(
Arrays.asList(0.1f, 0.2f, 0.3f, /* ... 128维 */ 0.0f),
Arrays.asList(0.4f, 0.5f, 0.6f, /* ... 128维 */ 0.0f),
Arrays.asList(0.7f, 0.8f, 0.9f, /* ... 128维 */ 0.0f)
);
// 创建插入参数
InsertParam insertParam = InsertParam.newBuilder()
.withCollectionName("example_collection")
.withFields(Arrays.asList(
InsertParam.Field.newBuilder()
.withName("id")
.withValues(idList)
.build(),
InsertParam.Field.newBuilder()
.withName("vector")
.withValues(vectorList)
.build()
))
.build();
// 插入向量
MutationResult insertResult = client.insert(insertParam);
System.out.println("插入向量成功!");
}
}
更新向量
更新向量是修改集合中已有数据的过程。我们可以通过指定ID来更新对应的向量。
import io.milvus.param.dml.DeleteParam;
public class MilvusExample {
public static void main(String[] args) {
// 创建Milvus客户端实例
MilvusClient client = new MilvusServiceClient(ConnectParam.newBuilder()
.withHost("localhost")
.withPort(19530)
.build());
// 准备更新数据
List<Long> idList = Arrays.asList(1L);
List<List<Float>> updatedVectorList = Arrays.asList(
Arrays.asList(0.1f, 0.2f, 0.3f, /* ... 128维 */ 1.0f)
);
// 删除旧向量
DeleteParam deleteParam = DeleteParam.newBuilder()
.withCollectionName("example_collection")
.withIds(idList)
.build();
client.delete(deleteParam);
// 插入新向量
InsertParam insertParam = InsertParam.newBuilder()
.withCollectionName("example_collection")
.withFields(Arrays.asList(
InsertParam.Field.newBuilder()
.withName("id")
.withValues(idList)
.build(),
InsertParam.Field.newBuilder()
.withName("vector")
.withValues(updatedVectorList)
.build()
))
.build();
MutationResult insertResult = client.insert(insertParam);
System.out.println("更新向量成功!");
}
}
删除向量
删除向量是从集合中移除指定向量的过程。我们可以通过ID来删除向量。
import io.milvus.param.dml.DeleteParam;
public class MilvusExample {
public static void main(String[] args) {
// 创建Milvus客户端实例
MilvusClient client = new MilvusServiceClient(ConnectParam.newBuilder()
.withHost("localhost")
.withPort(19530)
.build());
// 准备删除数据
List<Long> idList = Arrays.asList(1L, 2L);
// 删除向量
DeleteParam deleteParam = DeleteParam.newBuilder()
.withCollectionName("example_collection")
.withIds(idList)
.build();
client.delete(deleteParam);
System.out.println("删除向量成功!");
}
}
简单查询
简单查询是从集合中检索指定条件的数据。以下是查询操作的示例:
import io.milvus.param.dql.QueryParam;
import io.milvus.response.QueryResults;
public class MilvusExample {
public static void main(String[] args) {
// 创建Milvus客户端实例
MilvusClient client = new MilvusServiceClient(ConnectParam.newBuilder()
.withHost("localhost")
.withPort(19530)
.build());
// 准备查询条件
String query = "id in [1, 2, 3]";
// 创建查询参数
QueryParam queryParam = QueryParam.newBuilder()
.withCollectionName("example_collection")
.withExpr(query)
.build();
// 执行查询
QueryResults queryResults = client.query(queryParam);
System.out.println("Query results: " + queryResults.getResults());
}
}
简单检索
简单检索是从集合中检索与查询向量相似的向量。以下是检索操作的示例:
import io.milvus.param.dql.SearchParam;
import io.milvus.response.SearchResults;
import java.util.Arrays;
import java.util.List;
public class MilvusExample {
public static void main(String[] args) {
// 创建Milvus客户端实例
MilvusClient client = new MilvusServiceClient(ConnectParam.newBuilder()
.withHost("localhost")
.withPort(19530)
.build());
// 准备查询向量
List<List<Float>> queryVectors = Arrays.asList(
Arrays.asList(0.1f, 0.2f, 0.3f, /* ... 128维 */ 0.0f)
);
// 创建检索参数
SearchParam searchParam = SearchParam.newBuilder()
.withCollectionName("example_collection")
.withMetricType("L2")
.withTopK(5)
.withVectors(queryVectors)
.build();
// 执行检索
SearchResults searchResults = client.search(searchParam);
System.out.println("Search results: " + searchResults.getResults());
}
}
好的,我们将进一步深入,介绍如何将文本数据生成向量数组,并进行多条件的复杂检索案例。以下是详细步骤和示例代码。
文本生成向量数组
为了将文本数据转换为向量数组,我们可以使用预训练的语言模型(如BERT)。以下是一个简单的示例,使用BERT模型将文本转换为向量。
引入依赖
首先,我们需要在pom.xml
文件中引入BERT相关依赖和Apache Commons Lang库:
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
<dependency>
<groupId>ai.djl</groupId>
<artifactId>api</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>ai.djl.tensorflow</groupId>
<artifactId>tensorflow-engine</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>ai.djl.tensorflow</groupId>
<artifactId>tensorflow-model-zoo</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>ai.djl</groupId>
<artifactId>pytorch-model-zoo</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>ai.djl</groupId>
<artifactId>basicdataset</artifactId>
<version>0.14.0</version>
</dependency>
文本生成向量代码
以下是将文本数据生成向量数组的示例代码:
import ai.djl.Model;
import ai.djl.ModelException;
import ai.djl.ModelZoo;
import ai.djl.inference.Predictor;
import ai.djl.modality.Classifications;
import ai.djl.modality.nlp.DefaultVocabulary;
import ai.djl.modality.nlp.NlpUtils;
import ai.djl.modality.nlp.preprocess.SimpleTokenizer;
import ai.djl.modality.nlp.qa.QAInput;
import ai.djl.modality.nlp.qa.QAOutput;
import ai.djl.modality.nlp.qa.QAPreprocessor;
import ai.djl.modality.nlp.qa.QAUtils;
import ai.djl.translate.TranslateException;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import ai.djl.translate.TranslatorFactory;
import ai.djl.translate.TranslatorUtils;
import ai.djl.translate.TranslatorContext;
import ai.djl.translate.TranslatorContext;
import ai.djl.util.Utils;
import ai.djl.Model;
import ai.djl.ModelException;
import ai.djl.ModelZoo;
import ai.djl.ndarray.NDManager;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.types.Shape;
import ai.djl.translate.TranslateException;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import ai.djl.translate.TranslatorFactory;
import ai.djl.util.Utils;
import ai.djl.basicdataset.nlp.SimpleVocabulary;
import ai.djl.basicdataset.nlp.DefaultVocabulary;
import ai.djl.modality.nlp.vocabulary.Vocabulary;
import ai.djl.modality.nlp.DefaultVocabulary;
import ai.djl.translate.Batchifier;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import ai.djl.translate.TranslateException;
import ai.djl.translate.TranslatorFactory;
import ai.djl.util.Utils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class TextToVector {
public static void main(String[] args) throws ModelException, TranslateException, IOException {
String modelName = "distilbert-base-uncased";
String sentence = "This is an example sentence for BERT model.";
try (Model model = ModelZoo.loadModel(modelName)) {
// Create Translator
Translator<String, float[]> translator = new Translator<String, float[]>() {
@Override
public Batchifier getBatchifier() {
return null;
}
@Override
public float[] processOutput(TranslatorContext ctx, ai.djl.ndarray.NDList list) throws Exception {
return list.singletonOrThrow().toFloatArray();
}
@Override
public ai.djl.ndarray.NDList processInput(TranslatorContext ctx, String input) throws Exception {
Vocabulary vocab = DefaultVocabulary.builder()
.addAll(new SimpleTokenizer().tokenize(input))
.optUnknownToken("[UNK]")
.build();
ai.djl.ndarray.NDArray ndArray = ctx.getNDManager().create(vocab.getIndices(input));
return new ai.djl.ndarray.NDList(ndArray);
}
};
try (Predictor<String, float[]> predictor = model.newPredictor(translator)) {
float[] embeddings = predictor.predict(sentence);
System.out.println("Embeddings: " + Arrays.toString(embeddings));
}
}
}
}
多条件的复杂检索案例
我们将使用Milvus进行多条件的复杂检索。假设我们有一个包含文本向量和其他元数据(如类别、日期等)的集合。我们将展示如何进行复杂的条件检索,包括向量相似度检索和基于元数据的过滤。
数据准备
首先,我们需要创建一个包含文本向量和元数据的集合,并插入一些数据。以下是创建和插入数据的示例:
import io.milvus.param.collection.CreateCollectionParam;
import io.milvus.param.collection.FieldType;
import io.milvus.param.collection.FieldType.DataType;
import io.milvus.param.dml.InsertParam;
import io.milvus.grpc.MutationResult;
import io.milvus.param.ConnectParam;
import io.milvus.client.MilvusClient;
import io.milvus.client.MilvusServiceClient;
import java.util.Arrays;
import java.util.List;
public class MilvusComplexSearchExample {
public static void main(String[] args) {
MilvusClient client = new MilvusServiceClient(ConnectParam.newBuilder()
.withHost("localhost")
.withPort(19530)
.build());
// 定义字段
FieldType idField = FieldType.newBuilder()
.withName("id")
.withDataType(DataType.Int64)
.withPrimaryKey(true)
.build();
FieldType vectorField = FieldType.newBuilder()
.withName("vector")
.withDataType(DataType.FloatVector)
.withDimension(768) // 假设我们使用768维的BERT向量
.build();
FieldType categoryField = FieldType.newBuilder()
.withName("category")
.withDataType(DataType.String)
.build();
FieldType dateField = FieldType.newBuilder()
.withName("date")
.withDataType(DataType.String)
.build();
// 创建集合
CreateCollectionParam createCollectionParam = CreateCollectionParam.newBuilder()
.withCollectionName("text_collection")
.addFieldType(idField)
.addFieldType(vectorField)
.addFieldType(categoryField)
.addFieldType(dateField)
.build();
client.createCollection(createCollectionParam);
System.out.println("Collection created successfully!");
// 准备插入数据
List<Long> idList = Arrays.asList(1L, 2L, 3L);
List<List<Float>> vectorList = Arrays.asList(
Arrays.asList(0.1f, 0.2f, 0.3f, /* ... 768维 */ 0.0f),
Arrays.asList(0.4f, 0.5f, 0.6f, /* ... 768维 */ 0.0f),
Arrays.asList(0.7f, 0.8f, 0.9f, /* ... 768维 */ 0.0f)
);
List<String> categoryList = Arrays.asList("news", "sports", "entertainment");
List<String> dateList = Arrays.asList("2023-01-01", "2023-01-02", "2023-01-03");
// 创建插入参数
InsertParam insertParam = InsertParam.newBuilder()
.withCollectionName("text_collection")
.withFields(Arrays.asList(
InsertParam.Field.newBuilder()
.withName("id")
.withValues(idList)
.build(),
InsertParam.Field.newBuilder()
.withName("vector")
.withValues(vectorList)
.build(),
InsertParam.Field.newBuilder()
.withName("category")
.withValues(categoryList)
.build(),
InsertParam.Field.newBuilder()
.withName("date")
.withValues(dateList)
.build()
))
.build();
// 插入数据
MutationResult insertResult = client.insert(insertParam);
System.out.println("Data inserted successfully!");
}
}
多条件检索
接下来,我们将展示如何进行多条件的复杂检索,包括向量相似度检索和基于元数据的过滤。
import io.milvus.param.dql.SearchParam;
import io.milvus.param.dql.QueryParam;
import io.milvus.response.SearchResults;
import io.milvus.response.QueryResults;
import java.util.Arrays;
import java.util.List;
public class MilvusComplexSearchExample {
public static void main(String[] args) {
MilvusClient client = new MilvusServiceClient(ConnectParam.newBuilder()
.withHost("localhost")
.withPort(19530)
.build());
// 准备查询向量
List<List<Float>> queryVectors = Arrays.asList(
Arrays.asList(0.1f, 0.2f, 0.3f, /* ... 768维 */ 0.0f)
);
// 创建检索参数
SearchParam searchParam = SearchParam.newBuilder()
.withCollectionName("text_collection")
.withMetricType("L2")
.withTopK(5)
.withVectors(queryVectors)
.build();
// 执行向量检索
SearchResults searchResults = client.search(searchParam);
System.out.println("Search results: " + searchResults.getResults());
// 基于元数据的过滤条件
String query = "category == 'news' && date >= '2023-01-01'";
// 创建查询参数
QueryParam queryParam = QueryParam.newBuilder()
.withCollectionName("text_collection")
.withExpr(query)
.build();
// 执行查询
QueryResults queryResults = client.query(queryParam);
System.out.println("Query results: " + queryResults.getResults());
}
}
总结
通过本指南,我们详细介绍了Milvus的基本操作,包括如何创建、删除和管理集合,插入、更新和删除向量,进行简单的查询和检索操作,以及如何将文本数据生成向量数组,并进行多条件的复杂检索。每个操作都提供了Java代码示例和详细的代码注释,帮助你更好地理解每一步的实现。希望这篇文章能帮助你快速上手Milvus,在实际项目中充分发挥其强大的向量检索能力。如果你在操作过程中遇到任何问题,欢迎留言讨论,一起探索Milvus的无限可能!
如果你喜欢这篇文章,别忘了收藏文章、关注作者、订阅专栏,感激不尽。