搜索管理
环境准备
创建映射
{
"properties": {
"description": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"name": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"pic": {
"type": "text",
"index": false
},
"price": {
"type":"float"
},
"studymodel": {
"type": "keyword"
},
"timestamp": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
}
}
}
插入原始数据
POSTMan 操作即可.
# 创建映射:
post:http://localhost:9200/video/doc/_mapping
{
"properties": {
"description": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"name": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"pic":{
"type":"text",
"index":false
},
"price": {
"type": "float"
},
"studymodel": {
"type": "keyword"
},
"timestamp": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
}
}
}
# 初始化文档:
http://localhost:9200/video/doc/1
{
"name": "Bootstrap开发",
"description": "Bootstrap是由Twitter推出的一个前台页面开发框架,是一个非常流行的开发框架,",
"studymodel": "201002",
"price":38.6,
"timestamp":"2018-04-25 19:11:35",
"pic":"group1/M00/00/00/wKhlQFs6RCeAY0pHAAJx5ZjNDEM428.jpg"
}
http://localhost:9200/video/doc/2
{
"name": "java编程基础",
"description": "java语言是世界第一编程语言,在软件开发领域使用人数最多。",
"studymodel": "201001",
"price":68.6,
"timestamp":"2018-03-25 19:11:35",
"pic":"group1/M00/00/00/wKhlQFs6RCeAY0pHAAJx5ZjNDEM428.jpg"
}
http://localhost:9200/xc_course/doc/3
{
"name": "spring开发基础",
"description": "spring 在java领域非常流行,java程序员都在用。",
"studymodel": "201001",
"price":88.6,
"timestamp":"2018-02-24 19:11:35",
"pic":"group1/M00/00/00/wKhlQFs6RCeAY0pHAAJx5ZjNDEM428.jpg"
}
简单测试
DSL搜索
DSL 搜索是 ES 提出的基于 JSON 的搜搜方式, 在搜索时传入特定的 JSON 格式的数据来完成不同的搜索需求.
DSL 比 URL 方式功能更加强大, 建议在项目中使用 DSL 方式来完成搜索.
查询所有文档
url 方式
查询所有索引库中文档:
post http://localhost:9200/_search
查询指定索引库指定类型的文档
post http://localhost:9200/video/doc/_search
{
"query": {
"match_all": {}
},
#指定结果中包含哪些字段
"_source" : ["name","studymodel"]
}
Java Client 方式
@Test
public void testQueryAll() throws IOException {
SearchRequest request = new SearchRequest("video");
request.types("doc");
SearchSourceBuilder builder = new SearchSourceBuilder();
//原字段过滤
builder.fetchSource(new String[]{"name","description"},new String[]{});
request.source(builder);
//返回查询结果
SearchResponse response = highLevelClient.search(request);
SearchHits hits = response.getHits();
SearchHit[] hits1 = hits.getHits();
for (SearchHit hit : hits1){
String index = hit.getIndex();
String type = hit.getType();
String id = hit.getId();
float score = hit.getScore();
//打印 _source
String sourceAsString = hit.getSourceAsString();
System.out.println(sourceAsString);
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String name = (String) sourceAsMap.get("name");
String description = (String) sourceAsMap.get("description");
System.out.println(name);
System.out.println(description);
}
}
分页查询
url方式
ES 支持分页查询, 传入两个参数 : from 和 size
form: 起始文档的下标, 从 0 开始
size: 查询的文档数量
POST : http://localhost:9200/video/doc/_search
{
"from": 0,
"size": 1,
"query": {
"match_all": {}
},
"_source": ["name","description"]
}
Java Client 方式
@Test
public void testPageQuery() throws IOException {
SearchRequest request = new SearchRequest("video");
request.types("doc");
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.from(0);
builder.size(2);
//原字段过滤
builder.fetchSource(new String[]{"name","description"},new String[]{});
request.source(builder);
//返回查询结果
SearchResponse response = highLevelClient.search(request);
SearchHits hits = response.getHits();
SearchHit[] hits1 = hits.getHits();
for (SearchHit hit : hits1){
String index = hit.getIndex();
String type = hit.getType();
String id = hit.getId();
float score = hit.getScore();
//打印 _source
String sourceAsString = hit.getSourceAsString();
System.out.println(sourceAsString);
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String name = (String) sourceAsMap.get("name");
String description = (String) sourceAsMap.get("description");
System.out.println(name);
System.out.println(description);
}
}
Term Query
Term Query 为精确查询, 在搜索时会整体匹配关键字, 不再将关键字分词.
url 方式
post http://localhost:9200/video/doc/_search
{
"query": {
"term" :{
"name": "spring"
}
},
"_source": ["name","description"]
}
Java Client
@Test
public void testTermQuery() throws IOException {
SearchRequest request = new SearchRequest("video");
request.types("doc");
SearchSourceBuilder builder = new SearchSourceBuilder();
//原字段过滤
builder.query(QueryBuilders.termQuery("name", "spring"));
request.source(builder);
//返回查询结果
SearchResponse response = highLevelClient.search(request);
SearchHits hits = response.getHits();
SearchHit[] hits1 = hits.getHits();
for (SearchHit hit : hits1){
String index = hit.getIndex();
String type = hit.getType();
String id = hit.getId();
float score = hit.getScore();
//打印 _source
String sourceAsString = hit.getSourceAsString();
System.out.println(sourceAsString);
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String name = (String) sourceAsMap.get("name");
String description = (String) sourceAsMap.get("description");
System.out.println(name);
System.out.println(description);
}
}
根据 id 精确匹配
ES 提供根据多个 id 值匹配的方法 :
url 方式
post : http://127.0.0.1:9200/video/doc/_search
{
"query": {
"ids" :{
"type": "doc",
"values": ["1","2"]
}
},
"_source": ["name","description"]
}
Java Client 方式
@Test
public void testIds() throws IOException {
SearchRequest request = new SearchRequest("video");
request.types("doc");
SearchSourceBuilder builder = new SearchSourceBuilder();
//多个id
String[] split = new String[]{"1","2"};
List<String> idList = Arrays.asList(split);
//原字段过滤 注意: 使用的是 termsQuery 方法 不是 termQuery
builder.query(QueryBuilders.termsQuery("_id", idList));
request.source(builder);
//返回查询结果
SearchResponse response = highLevelClient.search(request);
SearchHits hits = response.getHits();
SearchHit[] hits1 = hits.getHits();
for (SearchHit hit : hits1){
String index = hit.getIndex();
String type = hit.getType();
String id = hit.getId();
float score = hit.getScore();
//打印 _source
String sourceAsString = hit.getSourceAsString();
System.out.println(sourceAsString);
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String name = (String) sourceAsMap.get("name");
String description = (String) sourceAsMap.get("description");
System.out.println(name);
System.out.println(description);
}
}
match query
match query 全文检索, 他的搜索方式是先将搜索字符串分词, 再适应各个词条从索引中搜索.
match query 与 term query 区别是 match query 在搜索前先将搜索关键字分词, 再拿各个词语去索引中搜索.
url方式
post http://localhost:9200/video/doc/_search
{
"query": {
"match": {
"description": {
"query": "spring开发",
"operator": "or"
}
}
}
}
执行过程 :
- 将 spring开发 分为 : spring , 开发 两个词
- 再使用 spring 和 开发 两个词去匹配索引中的搜索
- 由于设置了 operator 为 or , 因此只要有一个词匹配成功就可以返回该文档
Java Client 方式
@Test
public void testMatchQuery() throws IOException {
SearchRequest request = new SearchRequest("video");
request.types("doc");
SearchSourceBuilder builder = new SearchSourceBuilder();
//原字段过滤
builder.query(QueryBuilders.matchQuery("description","java开发").operator(Operator.OR));
request.source(builder);
//返回查询结果
SearchResponse response = highLevelClient.search(request);
SearchHits hits = response.getHits();
SearchHit[] hits1 = hits.getHits();
for (SearchHit hit : hits1){
String index = hit.getIndex();
String type = hit.getType();
String id = hit.getId();
float score = hit.getScore();
//打印 _source
String sourceAsString = hit.getSourceAsString();
System.out.println(sourceAsString);
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String name = (String) sourceAsMap.get("name");
String description = (String) sourceAsMap.get("description");
System.out.println(name);
System.out.println(description);
}
}
minimum_should_match:
上面使用 oprator = or 表示只要有一个词匹配就能得分, 如果要实现三个词有两个匹配呢?
使用 minimum_should_match 可以指定文档匹配词的占比:
比如 :
{
"query": {
"match": {
"description": {
"query": "spring开发框架",
"minium_should_match": "80%"
}
}
}
}
Spring开发框架会被分为 : Spring 开发 框架 三个词,
设置 minium_should_match : 80% 表示三个词在文档的匹配占比为 80%, 即 3*0.8=2.4 , 向上取整得 2, 表示至少有两个词在文档中就会匹配成功.
@Test
public void test_minimum_should_match() throws IOException {
SearchRequest request = new SearchRequest("video");
request.types("doc");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MultiMatchQueryBuilder builder = QueryBuilders
.multiMatchQuery("description","java开发")
.minimumShouldMatch("80%");
searchSourceBuilder.query(builder);
//返回查询结果
SearchResponse response = highLevelClient.search(request);
SearchHits hits = response.getHits();
SearchHit[] hits1 = hits.getHits();
for (SearchHit hit : hits1){
String index = hit.getIndex();
String type = hit.getType();
String id = hit.getId();
float score = hit.getScore();
//打印 _source
String sourceAsString = hit.getSourceAsString();
System.out.println(sourceAsString);
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String name = (String) sourceAsMap.get("name");
String description = (String) sourceAsMap.get("description");
System.out.println(name);
System.out.println(description);
}
}
multi query
上面学习的 termQuery 和 matchQuery 一次只能匹配一个 Field, 学习 multiQuery 一次可以匹配多个字段.
url 方式
发送:post http://localhost:9200/video/doc/_search
拿关键字 “spring css”去匹配name 和description字段
{
"query": {
"multi_match" : {
"query" : "spring css",
"minimum_should_match": "50%",
"fields": [ "name", "description" ]
}
}
}
匹配多个字段时可以提升字段的 boost(权重) 来提高得分.
{
"query": {
"multi_match" : {
"query" : "spring框架",
"minimum_should_match": "50%",
#name权重高于description 排在前面
"fields": [ "name^10", "description" ]
}
}
}
Java Client 方式
@Test
public void test_minimum_should_match() throws IOException {
SearchRequest request = new SearchRequest("video");
request.types("doc");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MultiMatchQueryBuilder builder = QueryBuilders
.multiMatchQuery("description","java开发")
.minimumShouldMatch("80%");
builder.field("name",10);
searchSourceBuilder.query(builder);
//返回查询结果
SearchResponse response = highLevelClient.search(request);
SearchHits hits = response.getHits();
SearchHit[] hits1 = hits.getHits();
for (SearchHit hit : hits1){
String index = hit.getIndex();
String type = hit.getType();
String id = hit.getId();
float score = hit.getScore();
//打印 _source
String sourceAsString = hit.getSourceAsString();
System.out.println(sourceAsString);
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String name = (String) sourceAsMap.get("name");
String description = (String) sourceAsMap.get("description");
System.out.println(name);
System.out.println(description);
}
}
布尔查询
三个参数 :
must : 文档必须匹配 must 所包括的查询条件, 相当于 and
should: 文档应该匹配 should 所包括的查询条件其中的一个或者多个, 相当于 or
must_not: 文档不能匹配, 相当于 not
url 方式
POST http://localhost:9200/video/doc/_search
{
"_source": ["name","studymodel","description"],
"from": 0,
"size": 1,
"query": {
"bool": {
"must": [
{
"nulti_match": {
"query": "Spring框架",
"mini_should_match": "50%",
"fields": [
"name*10",
"description"
]
}
},
{
"term": {
"studymodel": "201001"
}
}
]
}
}
}
Java Client 方式
@Test
public void testBoolQuery() throws IOException {
//构建查询请求对象
SearchRequest request = new SearchRequest("video").types("doc");
//构建源请求条件
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//构建多域查询
MultiMatchQueryBuilder multiMatchQueryBuilder = QueryBuilders
.multiMatchQuery("程序员 java","description")
.minimumShouldMatch("80%");
multiMatchQueryBuilder.field("name",10);
//构建精准查询对象
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("_id","3");
//构建 bool 查询对象
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
boolQueryBuilder.must(multiMatchQueryBuilder);
boolQueryBuilder.must(termQueryBuilder);
//将布尔查询对象设置进请求条件中
searchSourceBuilder.query(boolQueryBuilder);
//设置搜索源
request.source(searchSourceBuilder);
SearchResponse response = highLevelClient.search(request);
SearchHits hits = response.getHits();
SearchHit[] hits1 = hits.getHits();
for (SearchHit hit : hits1){
String index = hit.getIndex();
String type = hit.getType();
String id = hit.getId();
float score = hit.getScore();
//打印 _source
String sourceAsString = hit.getSourceAsString();
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String description = (String) sourceAsMap.get("description");
System.out.println(description);
}
}
过滤器
过滤是针对搜索的结果进行过滤, 过滤器主要判断的是文档是否匹配, 不去计算和判断文档的匹配度的得分, 所以过滤器性能要比查询高, 且方便缓存, 推荐尽量使用过滤器区实现查询或者过滤器和查询共同使用.
过滤器在布尔查询中使用, 下面是在搜索结果的基础上进行过滤
range 和 term 异常只能对一个 field 设置范围过滤.
{
"_source" :[
"name",
"studymodel",
"description",
"price"
],
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "spring框架",
"minimum_should_match": "50%",
"fields": [
"name*10",
"description"
]
}
}
],
"filter": [
{
"term": {
"studymodel": "201001"
}
},
{
"range": {
"price": {
"gte": 60,
"lte": 100
}
}
}
]
}
}
}
[外链图片转存失败(img-oVLUptnh-1565746946889)(C:\Users\15099\AppData\Roaming\Typora\typora-user-images\1564903715958.png)]
// 布尔查询使用过滤器
@Test
public void testFilter() throws IOException {
SearchRequest searchRequest = new SearchRequest("xc_course");
searchRequest.types("doc");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//source源字段过滤
searchSourceBuilder.fetchSource(new String[]{"name", "studymodel", "price", "description"},
new String[]{});
searchRequest.source(searchSourceBuilder);
//匹配关键字
MultiMatchQueryBuilder multiMatchQueryBuilder =
QueryBuilders.multiMatchQuery("spring框架", "name", "description");
//设置匹配占比
multiMatchQueryBuilder.minimumShouldMatch("50%");
//提升另个字段的Boost值
multiMatchQueryBuilder.field("name", 10);
searchSourceBuilder.query(multiMatchQueryBuilder);
//布尔查询
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
boolQueryBuilder.must(searchSourceBuilder.query());
//过虑
boolQueryBuilder.filter(QueryBuilders.termQuery("studymodel", "201001"));
boolQueryBuilder.filter(QueryBuilders.rangeQuery("price").gte(60).lte(100));
SearchResponse searchResponse = highLevelClient.search(searchRequest);
SearchHits hits = searchResponse.getHits();
SearchHit[] searchHits = hits.getHits();
for (SearchHit hit : searchHits) {
String index = hit.getIndex();
String type = hit.getType();
String id = hit.getId();
float score = hit.getScore();
String sourceAsString = hit.getSourceAsString();
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String name = (String) sourceAsMap.get("name");
String studymodel = (String) sourceAsMap.get("studymodel");
String description = (String) sourceAsMap.get("description");
System.out.println(name);
System.out.println(studymodel);
System.out.println(description);
}
}
排序
可以在字段上添加一个或者多个排序, 支持在 keyword, date, float 等类型上添加, text 类型的字段上不允许添加排序.
url 方式
POST http://localhost:9200/video/doc/_search
{
"_source": ["name","studymodel","description","price"],
"query": {
"bool": {
"filter": [
{
"range": {
"price": {
"gte": 0,
"lte": 100
}
}
}
]
}
},
"sort": [
{
"studymodel": "desc"
},
{
"price": "asc"
}
]
}
Java Client 方式
@Test
public void testSort() throws IOException {
SearchRequest searchRequest = new SearchRequest("video");
searchRequest.types("doc");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//source源字段过虑
searchSourceBuilder.fetchSource(new String[]{"name", "studymodel", "price", "description"},
new String[]{});
searchRequest.source(searchSourceBuilder);
//布尔查询
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
//过虑
boolQueryBuilder.filter(QueryBuilders.rangeQuery("price").gte(0).lte(100));
// 排序
searchSourceBuilder.sort(new FieldSortBuilder("studymodel").order(SortOrder.DESC));
searchSourceBuilder.sort(new FieldSortBuilder("price").order(SortOrder.ASC));
SearchResponse searchResponse = highLevelClient.search(searchRequest);
SearchHits hits = searchResponse.getHits();
SearchHit[] searchHits = hits.getHits();
for (SearchHit hit : searchHits) {
String index = hit.getIndex();
String type = hit.getType();
String id = hit.getId();
float score = hit.getScore();
String sourceAsString = hit.getSourceAsString();
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String name = (String) sourceAsMap.get("name");
String studymodel = (String) sourceAsMap.get("studymodel");
String description = (String) sourceAsMap.get("description");
System.out.println(name);
System.out.println(studymodel);
System.out.println(description);
}
}
高亮显示
高亮显示可以将搜索结果一个或多个字进行突出显示, 以便向用户匹配关键字的位置.
在搜索语句中添加 highlight 即可实现, 如下 :
url 方式
post: http://127.0.0.1:9200/video/doc/_search
{
"_source": [
"name",
"studymodel",
"description",
"price"
],
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "开发框架",
"minimum_should_match": "50%",
"fields": [
"name^10",
"description"
],
"type": "best_fields"
}
}
],
"filter": [
{
"range": {
"price": {
"gte": 0,
"lte": 100
}
}
}
]
}
},
"sort": [
{
"price": "asc"
}
],
"highlight": {
"pre_tags": [
"<tag1>"
],
"post_tags": [
"</tag2>"
],
"fields": {
"name": { },
"description": { }
}
}
}
Java Client 方式
@Test
public void testHighlight() throws IOException {
SearchRequest searchRequest = new SearchRequest("video");
searchRequest.types("doc");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//source源字段过滤
searchSourceBuilder.fetchSource
(new String[]{"name", "studymodel", "price", "description"},new String[]{});
searchRequest.source(searchSourceBuilder);
//匹配关键字
MultiMatchQueryBuilder multiMatchQueryBuilder =
QueryBuilders.multiMatchQuery("开发","name", "description");
searchSourceBuilder.query(multiMatchQueryBuilder);
//布尔查询
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
boolQueryBuilder.must(searchSourceBuilder.query());
//过虑
boolQueryBuilder.filter(QueryBuilders.rangeQuery("price").gte(0).lte(100));
//排序
searchSourceBuilder.sort(new FieldSortBuilder("studymodel").order(SortOrder.DESC));
searchSourceBuilder.sort(new FieldSortBuilder("price").order(SortOrder.ASC));
//高亮设置
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.preTags("<font color='red'>");//设置前缀
highlightBuilder.postTags("</font>");//设置后缀
// 设置高亮字段
highlightBuilder.fields().add(new HighlightBuilder.Field("name"));
highlightBuilder.fields().add(new HighlightBuilder.Field("description"));
searchSourceBuilder.highlighter(highlightBuilder);
SearchResponse searchResponse = highLevelClient.search(searchRequest);
SearchHits hits = searchResponse.getHits();
SearchHit[] searchHits = hits.getHits();
for (SearchHit hit : searchHits) {
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
//名称
String name = (String) sourceAsMap.get("name");
String description = (String)sourceAsMap.get("description");
//取出高亮字段内容
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
if (highlightFields != null) {
HighlightField nameField = highlightFields.get("name");
HighlightField descriptionField = highlightFields.get("description");
if (descriptionField != null) {
Text[] fragments = descriptionField.getFragments();
StringBuffer stringBuffer = new StringBuffer();
for (Text str : fragments) {
stringBuffer.append(str.string());
}
description = stringBuffer.toString();
}
if (nameField != null) {
Text[] fragments = nameField.getFragments();
StringBuffer stringBuffer = new StringBuffer();
for (Text str : fragments) {
stringBuffer.append(str.string());
}
name = stringBuffer.toString();
}
}
System.out.println(name);
System.out.println(description);
}
}
ields.get("name");
HighlightField descriptionField = highlightFields.get("description");
if (descriptionField != null) {
Text[] fragments = descriptionField.getFragments();
StringBuffer stringBuffer = new StringBuffer();
for (Text str : fragments) {
stringBuffer.append(str.string());
}
description = stringBuffer.toString();
}
if (nameField != null) {
Text[] fragments = nameField.getFragments();
StringBuffer stringBuffer = new StringBuffer();
for (Text str : fragments) {
stringBuffer.append(str.string());
}
name = stringBuffer.toString();
}
}
System.out.println(name);
System.out.println(description);
}
}