上一篇:Elasticsearch Java REST Client 删除、修改
下一篇:Elasticsearch Java REST Client 批量操作(Bulk API)
Term Vectors API
官网地址:戳
词条向量
大概就是统计文档中一些参数值的数量
同步执行
# 查询方式一
@RequestMapping("test")
public TermVectorsResponse test(String id) throws IOException {
TermVectorsRequest request = new TermVectorsRequest("edu-app-user", "_doc", id);
request.setFields("name");
return restHighLevelClient.termvectors(request, RequestOptions.DEFAULT);
}
# 方式二
@RequestMapping("test1")
public TermVectorsResponse test1() throws IOException {
XContentBuilder docBuilder = XContentFactory.jsonBuilder();
docBuilder.startObject().field("name", "川").endObject();
TermVectorsRequest request = new TermVectorsRequest("edu-app-user", "_doc", docBuilder);
return restHighLevelClient.termvectors(request, RequestOptions.DEFAULT);
}
异步执行
@RequestMapping("test2")
public void test2() throws IOException {
XContentBuilder docBuilder = XContentFactory.jsonBuilder();
docBuilder.startObject().field("id", "28").endObject();
TermVectorsRequest request = new TermVectorsRequest("edu-app-user", "_doc", docBuilder);
restHighLevelClient.termvectorsAsync(request, RequestOptions.DEFAULT,new TermVectorsESListen());
}
# 监听类
package com.wdz.es.config.es;
import com.alibaba.fastjson.JSONObject;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.client.core.TermVectorsResponse;
public class TermVectorsESListen implements ActionListener<TermVectorsResponse> {
@Override
public void onResponse(TermVectorsResponse response) {
System.out.println("成功结果:"+ JSONObject.toJSONString(response));
}
@Override
public void onFailure(Exception e) {
System.out.println("失败结果:"+e.getMessage());
}
}
其他可选方法
# 设置fieldStatistics为false(默认为true)以省略文档计数、文档频率总和、总词频总和。
request.setFieldStatistics(false);
# 设置termStatistics为true(默认为false)以显示总词频和文档频率。
request.setTermStatistics(true);
# 设置positions为false(默认为true)以省略位置的输出
request.setPositions(false);
# 设置offsets为false(默认为true)以省略偏移量的输出
request.setOffsets(false);
# 设置payloads为false(默认为true)以省略有效负载的输出
request.setPayloads(false);
Map<String, Integer> filterSettings = new HashMap<>();
filterSettings.put("max_num_terms", 3);
filterSettings.put("min_term_freq", 1);
filterSettings.put("max_term_freq", 10);
filterSettings.put("min_doc_freq", 1);
filterSettings.put("max_doc_freq", 100);
filterSettings.put("min_word_length", 1);
filterSettings.put("max_word_length", 10);
# 设置filterSettings为根据 tf-idf 分数过滤可以返回的术语
request.setFilterSettings(filterSettings);
Map<String, String> perFieldAnalyzer = new HashMap<>();
perFieldAnalyzer.put("user", "keyword");
# 设置perFieldAnalyzer以指定与该字段具有的分析器不同的分析器
request.setPerFieldAnalyzer(perFieldAnalyzer);
# 设置realtime为false(默认为true)以接近实时检索词向量
request.setRealtime(false);
# 设置路由参数
request.setRouting("routing");
结果解析
for (TermVectorsResponse.TermVector tv : response.getTermVectorsList()) {
String fieldname = tv.getFieldName();
int docCount = tv.getFieldStatistics().getDocCount();
long sumTotalTermFreq =
tv.getFieldStatistics().getSumTotalTermFreq();
long sumDocFreq = tv.getFieldStatistics().getSumDocFreq();
if (tv.getTerms() != null) {
List<TermVectorsResponse.TermVector.Term> terms =
tv.getTerms();
for (TermVectorsResponse.TermVector.Term term : terms) {
String termStr = term.getTerm();
int termFreq = term.getTermFreq();
int docFreq = term.getDocFreq();
long totalTermFreq = term.getTotalTermFreq();
float score = term.getScore();
if (term.getTokens() != null) {
List<TermVectorsResponse.TermVector.Token> tokens =
term.getTokens();
for (TermVectorsResponse.TermVector.Token token : tokens) {
int position = token.getPosition();
int startOffset = token.getStartOffset();
int endOffset = token.getEndOffset();
String payload = token.getPayload();
}
}
}
}
}
{ # 查询的索引
"index":"edu-app-user",
# 文档类型名称
"type":"_doc",
# 索引的id
"id":"28",
"docVersion":7,
# 无数据的时候为false
"found":true,
"tookInMillis":10,
"termVectorsList":[
{
# 当前查询的指定参数:name
"fieldName":"name",
"fieldStatistics":{
# 当前字段的字段统计信息 - 文档频率总和
"sumDocFreq":9,
# 当前字段的字段统计信息 - 文档计数
"docCount":2,
当前字段的字段统计信息 - 总词频的总和
"sumTotalTermFreq":10
},
# 当前字段的术语
"terms":[
{ # 术语的名称
"term":"川",
# 术语的术语频率
"termFreq":1,
# 术语的文档频率
"docFreq":null,
# 术语的总术语频率
"totalTermFreq":null,
"score":null,
# 术语的标记
"tokens":[
{ # 令牌的起始偏移量
"startOffset":3,
# 令牌的结束偏移量
"endOffset":4,
# 令牌的位置
"position":3,
# 令牌的有效载荷
"payload":null
}
]
},
{
"term":"州",
"termFreq":1,
"docFreq":null,
"totalTermFreq":null,
"score":null,
"tokens":[
{
"startOffset":1,
"endOffset":2,
"position":1,
"payload":null
}
]
}
]
}
]
}
上一篇:Elasticsearch Java REST Client 删除、修改
下一篇:Elasticsearch Java REST Client 批量操作(Bulk API)