// ===================== 以下是主要逻辑 =====================
// 主handler函数,根据key从redis中获取value ,如果不命中,则首先调用文本向量化接口向量化query,然后调用向量搜索接口搜索最相似的出现过的key,最后再次调用redis获取结果
// 可以把所有handler单独提取为文件,这里为了方便读者复制就和主逻辑放在一个文件中了
//
// 1. query 进来和 redis 中存的 key 匹配 (redisSearchHandler) ,若完全一致则直接返回 (handleCacheHit)
// 2. 否则请求 text_embdding 接口将 query 转换为 query_embedding (fetchAndProcessEmbeddings)
// 3. 用 query_embedding 和向量数据库中的向量做 ANN search,返回最接近的 key ,并用阈值过滤 (performQueryAndRespond)
// 4. 若返回结果为空或大于阈值,舍去,本轮 cache 未命中, 最后将 query_embedding 存入向量数据库 (uploadQueryEmbedding)
// 5. 若小于阈值,则再次调用 redis对 most similar key 做匹配。(redisSearchHandler)
// 7. 在 response 阶段请求 redis 新增key/LLM返回结果
func redisSearchHandler(key string, ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log, stream bool, ifUseEmbedding bool) error {
err := config.redisClient.Get(config.CacheKeyPrefix+key, func(response resp.Value) {
if err := response.Error(); err == nil && !response.IsNull() {
log.Warnf("cache hit, key:%s", key)
handleCacheHit(key, response, stream, ctx, config, log)
} else {
log.Warnf("cache miss, key:%s", key)
if ifUseEmbedding {
handleCacheMiss(key, err, response, ctx, config, log, key, stream)
} else {
proxywasm.ResumeHttpRequest()
return
}
}
})
return err
}
// 简单处理缓存命中的情况, 从redis中获取到value后,直接返回
func handleCacheHit(key string, response resp.Value, stream bool, ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log) {
log.Warnf("cache hit, key:%s", key)
ctx.SetContext(CacheKeyContextKey, nil)
if !stream {
proxywasm.SendHttpResponse(200, [][2]string{{"content-type", "application/json; charset=utf-8"}}, []byte(fmt.Sprintf(config.ReturnResponseTemplate, response.String())), -1)
} else {
proxywasm.SendHttpResponse(200, [][2]string{{"content-type", "text/event-stream; charset=utf-8"}}, []byte(fmt.Sprintf(config.ReturnStreamResponseTemplate, response.String())), -1)
}
}
// 处理缓存未命中的情况,调用fetchAndProcessEmbeddings函数向量化query
func handleCacheMiss(key string, err error, response resp.Value, ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log, queryString string, stream bool) {
if err != nil {
log.Warnf("redis get key:%s failed, err:%v", key, err)
}
if response.IsNull() {
log.Warnf("cache miss, key:%s", key)
}
fetchAndProcessEmbeddings(key, ctx, config, log, queryString, stream)
}
// 调用文本向量化接口向量化query, 向量化成功后调用processFetchedEmbeddings函数处理向量化结果
func fetchAndProcessEmbeddings(key string, ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log, queryString string, stream bool) {
Emb_url, Emb_requestBody, Emb_headers := ConstructTextEmbeddingParameters(&config, log, []string{queryString})
config.DashVectorInfo.DashScopeClient.Post(
Emb_url,
Emb_headers,
Emb_requestBody,
func(statusCode int, responseHeaders http.Header, responseBody []byte) {
// log.Infof("statusCode:%d, responseBody:%s", statusCode, string(responseBody))
log.Infof("Successfully fetched embeddings for key: %s", key)
if statusCode != 200 {
log.Errorf("Failed to fetch embeddings, statusCode: %d, responseBody: %s", statusCode, string(responseBody))
ctx.SetContext(QueryEmbeddingKey, nil)
proxywasm.ResumeHttpRequest()
} else {
processFetchedEmbeddings(key, responseBody, ctx, config, log, stream)
}
},
10000)
}
// 先将向量化的结果存入上下文ctx变量,其次发起向量搜索请求
func processFetchedEmbeddings(key string, responseBody []byte, ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log, stream bool) {
text_embedding_raw, _ := ParseTextEmbedding(responseBody)
text_embedding := text_embedding_raw.Output.Embeddings[0].Embedding
// ctx.SetContext(CacheKeyContextKey, text_embedding)
ctx.SetContext(QueryEmbeddingKey, text_embedding)
ctx.SetContext(CacheKeyContextKey, key)
performQueryAndRespond(key, text_embedding, ctx, config, log, stream)
}
// 调用向量搜索接口搜索最相似的key,搜索成功后调用redisSearchHandler函数获取最相似的key的结果
func performQueryAndRespond(key string, text_embedding []float64, ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log, stream bool) {
vector_url, vector_request, vector_headers, err := ConstructEmbeddingQueryParameters(config, text_embedding)
if err != nil {
log.Errorf("Failed to perform query, err: %v", err)
proxywasm.ResumeHttpRequest()
return
}
config.DashVectorInfo.DashVectorClient.Post(
vector_url,
vector_headers,
vector_request,
func(statusCode int, responseHeaders http.Header, responseBody []byte) {
log.Infof("statusCode:%d, responseBody:%s", statusCode, string(responseBody))
query_resp, err_query := ParseQueryResponse(responseBody)
if err_query != nil {
log.Errorf("Failed to parse response: %v", err)
proxywasm.ResumeHttpRequest()
return
}
if len(query_resp.Output) < 1 {
log.Warnf("query response is empty")
uploadQueryEmbedding(ctx, config, log, key, text_embedding)
return
}
most_similar_key := query_resp.Output[0].Fields["query"].(string)
log.Infof("most similar key:%s", most_similar_key)
most_similar_score := query_resp.Output[0].Score
if most_similar_score < 0.1 {
ctx.SetContext(CacheKeyContextKey, nil)
redisSearchHandler(most_similar_key, ctx, config, log, stream, false)
} else {
log.Infof("the most similar key's score is too high, key:%s, score:%f", most_similar_key, most_similar_score)
uploadQueryEmbedding(ctx, config, log, key, text_embedding)
proxywasm.ResumeHttpRequest()
return
}
},
100000)
}
// 未命中cache,则将新的query embedding和对应的key存入向量数据库
func uploadQueryEmbedding(ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log, key string, text_embedding []float64) error {
vector_url, vector_body, err := ConsturctEmbeddingInsertParameters(&config, log, text_embedding, key)
if err != nil {
log.Errorf("Failed to construct embedding insert parameters: %v", err)
proxywasm.ResumeHttpRequest()
return nil
}
err = config.DashVectorInfo.DashVectorClient.Post(
vector_url,
[][2]string{
{"Content-Type", "application/json"},
{"dashvector-auth-token", config.DashVectorInfo.DashVectorKey},
},
vector_body,
func(statusCode int, responseHeaders http.Header, responseBody []byte) {
if statusCode != 200 {
log.Errorf("Failed to upload query embedding: %s", responseBody)
} else {
log.Infof("Successfully uploaded query embedding for key: %s", key)
}
proxywasm.ResumeHttpRequest()
},
10000,
)
if err != nil {
log.Errorf("Failed to upload query embedding: %v", err)
proxywasm.ResumeHttpRequest()
return nil
}
return nil
}
// ===================== 以上是主要逻辑 =====================
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
- 53.
- 54.
- 55.
- 56.
- 57.
- 58.
- 59.
- 60.
- 61.
- 62.
- 63.
- 64.
- 65.
- 66.
- 67.
- 68.
- 69.
- 70.
- 71.
- 72.
- 73.
- 74.
- 75.
- 76.
- 77.
- 78.
- 79.
- 80.
- 81.
- 82.
- 83.
- 84.
- 85.
- 86.
- 87.
- 88.
- 89.
- 90.
- 91.
- 92.
- 93.
- 94.
- 95.
- 96.
- 97.
- 98.
- 99.
- 100.
- 101.
- 102.
- 103.
- 104.
- 105.
- 106.
- 107.
- 108.
- 109.
- 110.
- 111.
- 112.
- 113.
- 114.
- 115.
- 116.
- 117.
- 118.
- 119.
- 120.
- 121.
- 122.
- 123.
- 124.
- 125.
- 126.
- 127.
- 128.
- 129.
- 130.
- 131.
- 132.
- 133.
- 134.
- 135.
- 136.
- 137.
- 138.
- 139.
- 140.
- 141.
- 142.
- 143.
- 144.
- 145.
- 146.
- 147.
- 148.
- 149.
- 150.
- 151.
- 152.
- 153.
- 154.
- 155.
- 156.
- 157.