ElasticSearch高级
1.ElasticSearch存储特殊类型:对象
当往Elasticsearch 中存储复杂类型的时候, es 会把数据进行扁平化处理
PUT my-es/_doc/1
{
"age": 18,
"user": [
{
"one": "洛",
"two": "一一"
},
{
"first" : "忆",
"last" : "尘"
}
]
}
-
age:普通数值
-
name:是一个内部嵌套的对象,包含两个属性
-
one: 普通字符串
-
two: 普通字符串
-
因为Lucene是不支持对象数据的,因此ES会将数据扁平化处理,变成以下:
{
"age": 18,
"user.one": ["洛","忆"]
"user.two": ["一一","尘"]
}
此时,文档的映射类型(mapping)大概是这样的:
PUT my-es
{
"mappings": {
"properties": {
"age": {"type": "integer" },
"user": {
"properties": {
"one": { "type": "text" },
"two": { "type": "text" }
}
}
}
}
}
当我们要搜索 忆一一
的时候,也能搜索到数据 这显然是不对的
1.1所以要解决es 中Object数组问题:
1.设置一个nested类型的字段:
PUT my-es1
{
"mappings": {
"properties": {
"user": {
"type": "nested",
"properties": {
"one":{"type":"keyword"},
"two":{"type":"keyword"}
}
}
}
}
}
然后,再次填充数据:
PUT my-es1/_doc/1
{
"age" : "18",
"user" : [
{
"one": "洛",
"two": "一一"
},
{
"one" : "忆",
"two" : "尘"
}
]
}
搜索的时候,必须使用nested搜索,并制定对象的名称:
GET my-es1/_search
{
"query": {
"nested": {
"path": "user",
"query": {
"bool": {
"must": [
{
"match": {
"user.one": "忆"
}
},
{
"match": {
"user.two": "尘"
}
}
]
}
}
}
}
}
这样我们再查 忆一一
时就查不到了
2.自动补全和提示
ES的推荐功能(Suggester)包含三种不同方式,不过用的最多的,还是Completion模式,实现自动补全和基于上下文的提示功能。
2.1.准备索引库
首先要定义一个索引库,并设置用于自动补全的字段为completion类型。
PUT my-es2
{
"mappings": {
"properties": {
"name":{
"type": "completion"
}
}
}
}
-
name :为域名 类型是
completion
可以自己加些数据
2.2查询
POST my-es2/_search
{
"suggest": {
"es2-name": {
"prefix": "el",
"completion": {
"field": "name",
"size": 10
}
}
}
}
参数说明:
suggest:代表接下来的查询是一个suggest类型的查询
-
es2-name:这次查询的名称,自定义
- prefix:用来补全的词语前缀,本例中搜索以
el
开头的内容 - completion:代表是completion类型的suggest,其它类型还有:Term、Phrase
- prefix:用来补全的词语前缀,本例中搜索以
-
field:要查询的字段
3.拼音搜索
3.1.安装拼音分词器插件
https://github.com/medcl/elasticsearch-analysis-pinyin/releases
3.2测试拼音分词器
测试之前需要重启 es 和 kibana
POST _analyze
{
"text": ["洛一一"],
"analyzer": "pinyin"
}
结果如下:就代表已经安装好了
{
"tokens" : [
{
"token" : "luo",
"start_offset" : 0,
"end_offset" : 0,
"type" : "word",
"position" : 0
},
{
"token" : "lyy",
"start_offset" : 0,
"end_offset" : 0,
"type" : "word",
"position" : 0
},
{
"token" : "yi",
"start_offset" : 0,
"end_offset" : 0,
"type" : "word",
"position" : 1
},
{
"token" : "yi",
"start_offset" : 0,
"end_offset" : 0,
"type" : "word",
"position" : 2
}
]
}
3.3组合分词器
在分词处理时,会用到analyzer,以前称它为分词器。但其实它叫分析器,一般包含两部分:
- Tokenizer:分词器,对文本内容分词,得到词条Term
- filter:过滤器,对分好的词条做进一步处理,例如拼音转换、同义词转换等
可以把各种下载的分词插件组合,作为tokenizer或者filter,来完成自定义分词效果。
示例:
PUT /student
{
"settings": {
"analysis": {
"analyzer": {
"zuhe_pinyin": {
"tokenizer": "ik_smart",
"filter": [
"gl"
]
}
},
"filter": {
"gl": {
"type": "pinyin",
"keep_full_pinyin": false,
"keep_joined_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 16,
"remove_duplicated_term": true
}
}
}
},
"mappings": {
"properties": {
"id": {
"type": "keyword"
},
"name": {
"type": "completion",
"analyzer": "zuhe_pinyin",
"search_analyzer": "ik_smart"
},
"title":{
"type": "text",
"analyzer": "zuhe_pinyin",
"search_analyzer": "ik_smart"
},
"price":{
"type": "long"
}
}
}
}
说明:
tokenizer": "ik_smart
: 在这里已经使用ik分词器完成了初步分词gl
: 使用自定义 gl 分词器进行过滤- “filter”: {
“gl”: { 此处是定义过滤器的设置
3.4测试
POST /student/_analyze
{
"text": "你好,世界",
"analyzer": "zuhe_pinyin"
}
结果
{
"tokens" : [
{
"token" : "你好",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "nihao",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "nh",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "世界",
"start_offset" : 3,
"end_offset" : 5,
"type" : "CN_WORD",
"position" : 1
},
{
"token" : "shijie",
"start_offset" : 3,
"end_offset" : 5,
"type" : "CN_WORD",
"position" : 1
},
{
"token" : "sj",
"start_offset" : 3,
"end_offset" : 5,
"type" : "CN_WORD",
"position" : 1
}
]
}
3.4测试拼音补全
插入测试数据
PUT /student/_bulk
{ "index" : {"_id":1 } }
{ "id": 1, "name": "小明","title":"篮球队"}
{ "index" : {"_id":2 } }
{"id": 2,"name": "小红","title":"足球队"}
{ "index" : {"_id":3 } }
{"id": 3,"name": "啦啦啦","title":"啦啦队"}
{ "index" : {"_id":4 } }
{"id": 4,"name": "jack","title":"篮球队"}
自动补全测试:
POST /student/_search
{
"suggest": {
"sugg_name": {
"prefix": "x",
"completion": {
"field": "name"
}
}
}
}
结果:
{
"took" : 269,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"suggest" : {
"sugg_name" : [
{
"text" : "x",
"offset" : 0,
"length" : 1,
"options" : [
{
"text" : "小明",
"_index" : "student",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"id" : 1,
"name" : "小明",
"title" : "篮球队"
}
},
{
"text" : "小红",
"_index" : "student",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"id" : 2,
"name" : "小红",
"title" : "足球队"
}
}
]
}
]
}
}
4.RestAPI
官网中提供了各种语言的客户端:
https://www.elastic.co/guide/en/elasticsearch/client/index.html
4.1使用方法
新建maven工程 导入依赖
<dependencies>
<!-- Junit单元测试 -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<!--elastic客户端-->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.4.2</version>
</dependency>
<!--lombok-->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.8</version>
</dependency>
<!--JSON工具-->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.49</version>
</dependency>
<!--common工具-->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.8.1</version>
</dependency>
<!--日志-->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.11.2</version>
</dependency>
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
<version>1.9.3</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>
</build>
并在resources中新建文件:log4j2.xml
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
<Appenders>
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="error">
<AppenderRef ref="Console"/>
</Root>
</Loggers>
</Configuration>
4.2创建ES的客户端
在官网上可以看到连接ES的初始化教程:
https://www.elastic.co/guide/en/elasticsearch/client/java-rest/current/java-rest-high-getting-started-initialization.html
方便测试可以在测试类中这样写:
public class ElasticDemo {
private RestHighLevelClient client;
/**
* 建立连接
*/
@Before
public void init() throws IOException {
client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("es的ip", es端口, "http")
)
);
}
/**
* 关闭客户端连接
*/
@After
public void close() throws IOException {
client.close();
}
}
4.3创建索引库
运行之前保证es 中没有此索引库 不然会报错
@Test
public void testCreateIndex() throws IOException {
// 1.创建CreateIndexRequest对象,并指定索引库名称
CreateIndexRequest request = new CreateIndexRequest("lipstick");
// 2.准备参加索引的JSON请求体,包括settings和mapping
request.source("{\n" +
" \"settings\": {\n" +
" \"analysis\": {\n" +
" \"analyzer\": {\n" +
" \"zuhe_pinyin\": {\n" +
" \"tokenizer\": \"ik_smart\",\n" +
" \"filter\": [\n" +
" \"gl\"\n" +
" ]\n" +
" }\n" +
" },\n" +
" \"filter\": {\n" +
" \"gl\": {\n" +
" \"type\": \"pinyin\",\n" +
" \"keep_full_pinyin\": false,\n" +
" \"keep_joined_full_pinyin\": true,\n" +
" \"keep_original\": true,\n" +
" \"limit_first_letter_length\": 16,\n" +
" \"remove_duplicated_term\": true\n" +
" }\n" +
" }\n" +
" }\n" +
" },\n" +
" \"mappings\": {\n" +
" \"properties\": {\n" +
" \"id\": {\n" +
" \"type\": \"keyword\"\n" +
" },\n" +
" \"name\": {\n" +
" \"type\": \"completion\",\n" +
" \"analyzer\": \"zuhe_pinyin\",\n" +
" \"search_analyzer\": \"ik_smart\"\n" +
" },\n" +
" \"title\":{\n" +
" \"type\": \"text\",\n" +
" \"analyzer\": \"zuhe_pinyin\",\n" +
" \"search_analyzer\": \"ik_smart\"\n" +
" },\n" +
" \"price\":{\n" +
" \"type\": \"long\"\n" +
" }\n" +
" }\n" +
" }\n" +
"}", XContentType.JSON);
// 3.发起请求,得到响应
CreateIndexResponse response = client.indices().create(request, RequestOptions.DEFAULT);
System.out.println("response = " + response.isAcknowledged());
}
返回结果:
response = true
4.5创建实体类
@AllArgsConstructor
@NoArgsConstructor
@Data
public class Lipstick {
private Long id;
private String name;
private String title;
private Long price;
}
4.6导入数据
@Test
public void testBulkDocument() throws IOException {
// 1.准备文档数据
List<Lipstick> list = new ArrayList<>();
list.add(new Lipstick(1L, "迪奥口红", "迪奥Dior 999 超火色号 口红", 350L));
list.add(new Lipstick(2L, "Armani口红", "阿玛尼Armani 405 超火色号 口红", 340L));
list.add(new Lipstick(3L, "Mac粉底液", "粉底液柔雾无暇粉底 超火", 390L));
list.add(new Lipstick(4L, "Armani口红", "阿玛尼Armani 400 超火 口红", 410L));
// 2.创建BulkRequest对象
BulkRequest bulkRequest = new BulkRequest();
// 3.创建多个IndexRequest对象,并添加到BulkRequest中
for (Lipstick lipstick : list) {
bulkRequest.add(new IndexRequest("lipstick")
.id(lipstick.getId().toString())
.source(JSON.toJSONString(lipstick), XContentType.JSON)
);
}
// 4.发起请求
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
System.out.println("status: " + bulkResponse.status());
}
4.7基本查询
@Test
public void testBasicSearchWithSortAndPage() throws Exception {
//创建SearchSourceBuilder 对象
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//source 过滤
searchSourceBuilder.fetchSource
(new String[0], new String[]{"name"});
//1.1.添加查询条件QueryBuilders,这里选择布尔查询,
// 查询标题包含“口红”,并且价格小于400
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
//拼接title 为 口红 的条件
boolQueryBuilder.must(QueryBuilders.matchQuery("title", "口红"));
//添加条件过滤
boolQueryBuilder.filter(
QueryBuilders.rangeQuery("price")
.lte(400));
//把条件放到 SearchSourceBuilder 对象中
searchSourceBuilder.query(boolQueryBuilder);
//添加排序、分页等其它条件
searchSourceBuilder.sort("price", SortOrder.ASC);
//分页
int page = 1;
int size = 3;
int from = (page - 1) * size;
searchSourceBuilder.size(size);
searchSourceBuilder.from(from);
// 1.4.高亮
sourceBuilder.highlighter(new HighlightBuilder().field("title"));
//创建SearchRequest对象,并制定索引库名称
SearchRequest searchRequest = new SearchRequest("lipstick");
//添加SearchSourceBuilder对象到SearchRequest对象中
searchRequest.source(searchSourceBuilder);
//发起请求 得到结果
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
SearchHits searchHits = response.getHits();
//总记录数
long value = searchHits.getTotalHits().value;
System.out.println("总条数 = " + value);
//数据都在 hits 中
SearchHit[] hits = searchHits.getHits();
//遍历 hits
for (SearchHit hit : hits) {
//获取其中的`_source`,是JSON数据
String json = hit.getSourceAsString();
//把`_source`反序列化为User对象
Lipstick lipstick = JSON.parseObject(json, Lipstick.class);
// 获取高亮结果
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
//遍历高亮字段 highlightFields.values()
for (HighlightField highlightField : highlightFields.values()) {
// 获取字段名
String fieldName = highlightField.getName();
//获取字段值
String fieldValue = StringUtils.join
(highlightField.getFragments());
//放到对象中
BeanUtils.setProperty(Lipstick, fieldName, fieldValue);
}
System.out.println("lipstick = " + lipstick);
}
}
4.8Suggest查询
这里以Completion Suggest查询为例:
@Test
public void testSuggest() throws IOException {
//创建
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// 1.1.准备Suggest,需要指定四个内容:
// 1)自动补全的名称:name_suggest
// 2)自动补全的类型:SuggestBuilders.completionSuggestion
// 3)自动补全的字段:completionSuggestion("name")
// 4)自动补全的前缀:.prefix("a")
SuggestBuilder suggestBuilder = new SuggestBuilder();
//添加条件 name_suggest为自定义名称
//completionSuggestion 是声明查询的域名 size 为当前页展示的数据的条数
suggestBuilder.addSuggestion
("name_suggest",
SuggestBuilders
.completionSuggestion("name")
.prefix("s").size(3));
//添加条件到 searchSourceBuilder 对象
searchSourceBuilder.suggest(suggestBuilder);
//构建 搜索的请求 对象,把sourceBuilder放进去
SearchRequest searchRequest = new SearchRequest("lipstick");
searchRequest.source(searchSourceBuilder);
//发送请求
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
Suggest suggest = response.getSuggest();
//拿到结果
Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> name_suggest =
suggest.getSuggestion("name_suggest");
//遍历结果
name_suggest.forEach(suggestion->{
List<? extends Suggest.Suggestion.Entry.Option> options = suggestion.getOptions();
//遍历options
options.forEach(option->{
Text text = option.getText();
System.out.println("text = " + text);
});
});
}