目录
2.2 什么是Spring Data ElasticSearch
2.3 Spring集成Spring Data ElasticSearch
1、ElasticSearch编程操作
1.1 依赖坐标
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.6.2</version>
</dependency>
1.2 创建客户端
@Before
public void before(){
//创建client
client = new RestHighLevelClient(
RestClient.builder(new HttpHost("192.168.223.128", 9200, "http")));
}
1.3 创建索引
// 1. 创建索引(所有和索引有关的操作全部推荐用org.elasticsearch.client.indices包下的类)
CreateIndexRequest request = new CreateIndexRequest("blog1");
request.settings(Settings.builder()
.put("index.number_of_shards", 1)//分片数
.put("index.number_of_replicas", 1)//副本数
);
Map<String, Object> properties = new HashMap();// properties
Map<String, Object> id = new HashMap();
id.put("type", "long");
properties.put("id", id);// 添加映射
Map<String, Object> title = new HashMap();
title.put("type", "text");
title.put("analyzer", "ik_max_word");
properties.put("title", title);// 添加映射
Map<String, Object> content = new HashMap();
content.put("type", "text");
content.put("analyzer", "ik_max_word");
properties.put("content", content);// 添加映射
Map<String, Object> salary = new HashMap();
salary.put("type", "long");
properties.put("salary", salary);// 添加映射
Map<String, Object> mapping = new HashMap();
mapping.put("properties", properties);
request.mapping(mapping);
CreateIndexResponse createIndexResponse = client.indices().create(request, RequestOptions.DEFAULT);
System.out.println(createIndexResponse.index());
1.4 查询索引
//2. 检测索引是否存在
GetIndexRequest request = new GetIndexRequest("blog1");
boolean exists = client.indices().exists(request, RequestOptions.DEFAULT);
System.out.println(exists);
1.5 创建文档
//增加文档(如果类注释推荐使用org.elasticsearch.client.Requests来创建类,最好用他推荐的方式)
IndexRequest request = Requests.indexRequest("blog1");
Random random = new Random();
for (int i = 0; i < 30; i++) {
request.id(i + "");//指定唯一标志ID
request.source(
"id", i,
"title", i + "ElasticSearch是一个基于Lucene的搜索服务器",
"content", i + "它提供了一个分布式多用户能力的全文搜索引擎,基于RESTful web接口。" +
"Elasticsearch是用Java 开发的,并作为Apache许可条款下的开放源码发布," +
"是当前流行的企业级搜索引擎。设计用于云计算中," +
"能够达到实时 搜索,稳定,可靠,快速,安装使用方便。",
"salary",random.nextInt(100)
);// 支持多种方式
IndexResponse indexResponse = client.index(request, RequestOptions.DEFAULT);
System.out.println(indexResponse);
}
1.6 修改文档
//修改文档
UpdateRequest request = new UpdateRequest("blog1", "1");//指定索引和文档ID
request.doc("title", "(修改)ElasticSearch是一个基于Lucene的搜索服务器",
"content", "(修改)它提供了一个分布式多用户能力的全文搜索引擎,基于RESTful web接口。Elasticsearch是用Java 开发的,并作为Apache许可条款下的开放源码发布,是当前流行的企业级搜索引擎。设计用于云计算中,能够达到实时 搜索,稳定,可靠,快速,安装使用方便。");// 支持多种方式
UpdateResponse response = client.update(request, RequestOptions.DEFAULT);
System.out.println(response);
1.7 Bulk批量操作
BulkRequest request = new BulkRequest();
request.add(new DeleteRequest("blog1", "1"));//删除
request.add(new UpdateRequest("blog1", "2")//更新
.doc(XContentType.JSON,"title", "哈哈哈"));
request.add(new IndexRequest("blog1").id("3")//新增
.source(XContentType.JSON,"title", "呵呵呵"));
BulkResponse response = client.bulk(request, RequestOptions.DEFAULT);
System.out.println(response.hasFailures());//失败标识,false标识全部成功
1.8 根据ID查询文档
GetRequest request = Requests.getRequest("blog1").id("2");
GetResponse response = client.get(request, RequestOptions.DEFAULT);
System.out.println(response.getSourceAsString());
1.9 高级查询
1.9.1 精确查询
@Test
public void test71() throws IOException {
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
//不分词单个精确匹配
builder.query(QueryBuilders.termQuery("title","服务器"));
//不分词多个精确匹配
/*builder.query(QueryBuilders.termsQuery("title","服务器1","服务器2"));*/
//分词单个精确匹配
/*builder.query(QueryBuilders.matchQuery("title","服务2器1").analyzer("ik_smart"));*/
//分词多个并集匹配
/*builder.query(QueryBuilders.multiMatchQuery("全文哈哈", "title", "content"));*/
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
System.out.println("共收到" + hits.getTotalHits() + "条结果");
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
1.9.2 模糊查询
@Test
public void test72() throws IOException {
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
//分词左右模糊
builder.query(QueryBuilders.queryStringQuery("服务器").field("title").analyzer("ik_smart"));
//通配符查询,支持* 任意字符串,不支持分词,类似于sql like;
//builder.query(QueryBuilders.wildcardQuery("title", "哈哈*"));
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
System.out.println("共收到" + hits.getTotalHits() + "条结果");
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
1.9.3 范围查询
@Test
public void test73() throws IOException {
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
//开闭区间查询
/*builder.query(QueryBuilders.rangeQuery("id").from(1).to(3));*/
/*builder.query(QueryBuilders.rangeQuery("id").from(1).to(3).includeUpper(false).includeLower(true));*/
//大于,小于
builder.query(QueryBuilders.rangeQuery("id").gte(2).lt(5));
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
System.out.println("共收到" + hits.getTotalHits() + "条结果");
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
1.9.4 组合查询
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
//布尔组合 must-->and ,must_not-->not should-->or
//组合冲突,返回空,组合覆盖,以后面的为主
builder.query(QueryBuilders.boolQuery().must(
QueryBuilders.rangeQuery("id").gt(10)
).must(
QueryBuilders.rangeQuery("id").gt(15)
));
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
System.out.println("共收到" + hits.getTotalHits() + "条结果");
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
1.9.5 分页查询
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.query(QueryBuilders.matchAllQuery());//无条件查询,默认每页10条记录
builder.from(0).size(5);//分页查询
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
Iterator<SearchHit> hits = response.getHits().iterator();
while (hits.hasNext()) {
System.out.println(hits.next().getSourceAsString());//当然,因为输出的为json格式数据,如果有需要我们可以转换为对象
}
client.close();
1.9.6 查询结果高亮显示
@Test
public void test8() throws IOException {
SearchRequest request = new SearchRequest("blog1");
SearchSourceBuilder builder = new SearchSourceBuilder();
//不分词单个精确匹配
/*builder.query(QueryBuilders.termQuery("title","服务器"));*/
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.preTags("<font style='color:red'>");
highlightBuilder.postTags("</font>");
highlightBuilder.field("title");
builder.highlighter(highlightBuilder);
request.source(builder);
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
SearchHits hits = response.getHits();
System.out.println("共收到" + hits.getTotalHits() + "条结果");
for (SearchHit hit : hits) {
System.out.println(hit.getHighlightFields());
}
}
2、Spring Data ElasticSearch
2.1 什么是Spring Data
Spring Data是一个用于简化数据库访问,并支持云服务的开源框架。其主要目标是使得对数据的访问变得方便快 捷,并支持map-reduce框架和云计算数据服务。 Spring Data可以极大的简化JPA的写法,可以在几乎不用写实现 的情况下,实现对数据的访问和操作。除了CRUD外,还包括如分页、排序等一些常用的功能。
2.2 什么是Spring Data ElasticSearch
Spring Data ElasticSearch 基于 spring data API 简化 elasticSearch操作,将原始操作elasticSearch的客户端API 进行封装 。Spring Data为Elasticsearch项目提供集成搜索引擎。Spring Data Elasticsearch POJO的关键功能区域 为中心的模型与Elastichsearch交互文档和轻松地编写一个存储库数据访问层
2.3 Spring集成Spring Data ElasticSearch
2.3.1 依赖坐标
<dependencies>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.6.2</version>
</dependency>
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-elasticsearch</artifactId>
<version>4.0.1.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>5.2.7.RELEASE</version>
</dependency>
</dependencies>
2.3.2 es实体对象类
package com.ydt.elasticsearch.domain;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;
//@Document 文档对象 (索引信息、文档类型 )
/*Spring Data通过注解来声明字段的映射属性,有下面的三个注解:
- `@Document` 作用在类,标记实体类为文档对象,一般有四个属性
- indexName:对应索引库名称
- type:对应在索引库中的类型 在ElasticSearch7.x中取消了type的概念
- shards:分片数量,默认5
- replicas:副本数量,默认1
- `@Id` 作用在成员变量,标记一个字段作为id主键
- `@Field` 作用在成员变量,标记为文档的字段,并指定字段映射属性:
- type:字段类型,取值是枚举:FieldType
- index:是否设置分词 analyzer,布尔类型,默认是true
- store:是否存储,布尔类型,默认是false
- analyzer:分词器名称:ik_max_word
- createIndex 不创建默认是standard标准分词器索引库,否则会出现异常
*/
@Document(indexName="blog2",type="article",createIndex = false)
public class Article {
//@Id 文档主键 唯一标识
@Id
//@Field
// index:是否设置分词 analyzer:存储时使用的分词器
// searchAnalyze:搜索时使用的分词器 store:是否存储 type: 数据类型
@Field(store=false, index = false,type = FieldType.Integer)
private Integer id;
@Field(index=true,analyzer="ik_max_word",searchAnalyzer="ik_max_word",type = FieldType.Text)
private String title;
@Field(index=true,analyzer="ik_max_word",searchAnalyzer="ik_max_word",type = FieldType.Text)
private String content;
@Field(store=false, index = false,type = FieldType.Integer)
private Integer salary;
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public Integer getSalary() {
return salary;
}
public void setSalary(Integer salary) {
this.salary = salary;
}
@Override
public String toString() {
return "Article{" +
"id=" + id +
", title='" + title + '\'' +
", content='" + content + '\'' +
", salary=" + salary +
'}';
}
}
2.3.3 es 数据操作接口(dao)
package com.ydt.elasticsearch.dao;
import com.ydt.elasticsearch.domain.Article;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
import org.springframework.stereotype.Repository;
@Repository
public interface ArticleRepository extends ElasticsearchRepository<Article, Integer> {
}
2.3.4 spring配置文件
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:elasticsearch="http://www.springframework.org/schema/data/elasticsearch"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context.xsd
http://www.springframework.org/schema/data/elasticsearch
http://www.springframework.org/schema/data/elasticsearch/spring-elasticsearch.xsd
">
<!--es 数据操作接口层扫描目录-->
<elasticsearch:repositories base-package="com.ydt.elasticsearch.dao"></elasticsearch:repositories>
<context:component-scan base-package="com.ydt.elasticsearch"/>
<!--该处端口需要使用9300-->
<!--<elasticsearch:transport-client id="client" cluster-nodes="192.168.223.129:9300"/>-->
<elasticsearch:rest-client id="client" hosts="http://192.168.223.129:9200"/>
<!--es7之后推荐使用ElasticsearchRestTemplate模板-->
<bean id="elasticsearchTemplate" class="org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate">
<constructor-arg name="client" ref="client"></constructor-arg>
</bean>
</beans>
2.3.5 增删改查测试
package elasticsearch;
import com.ydt.elasticsearch.dao.ArticleRepository;
import com.ydt.elasticsearch.domain.Article;
import org.elasticsearch.client.transport.TransportClient;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations="classpath:applicationContext.xml")
public class SpringbootElasticsearchApplicationTests2 {
@Autowired
private ElasticsearchRestTemplate elasticsearchTemplate;
@Autowired
private ArticleRepository articleRepository;
/**创建索引和映射*/
@Test
public void test1(){
elasticsearchTemplate.createIndex(Article.class);
elasticsearchTemplate.putMapping(Article.class);
/*IndexOperations ops = elasticsearchTemplate.indexOps(Article.class);
ops.create();
ops.putMapping(ops.createMapping(Article.class));*/
}
/**删除索引*/
@Test
public void test2(){
elasticsearchTemplate.deleteIndex("blog2");
/*elasticsearchTemplate.indexOps(Article.class).delete();*/
}
//新增文档
@Test
public void test3(){
Random random = new Random();
for (int i = 1; i < 50; i++) {
Article article = new Article();
article.setId(i);
article.setTitle(i+"hello");
article.setContent(i+"hello es world");
article.setSalary(Long.valueOf(random.nextInt(100)));
articleRepository.save(article);
}
}
/**分页查询*/
@Test
public void test4(){
Pageable pageable = PageRequest.of(0,3);
Page<Article> page = articleRepository.findAll(pageable);
for(Article article:page.getContent()){
System.out.println(article);
}
}
}
2.3.6 常用查询命名规则
@Repository
public interface ArticleRepository extends ElasticsearchRepository<Article, Integer> {
public List<Article> findByTitle(String condition);
}
@Test
public void test5(){
List<Article> articleList = articleRepository.findByTitle("5hello");
for (Article article : articleList) {
System.out.println(article);
}
}
3、聚合分析
3.1 ES聚合分析是什么
ElasticSearch虽然最强大的功能是全文索引,但是同时它作为一个数据库,也提供了像MySQL那样的聚合功能,比如:获取一组数据的 最大值(Max)、最小值(Min)。同样我们能够对这组数据进行 分组(Group)。
官方对聚合有四个关键字: Metric(指标)
、Bucketing(桶)
、Matrix(矩阵)
、Pipeline(管道)
。
Metric(指标): 指标分析类型,如计算最大值、最小值、平均值等等 (对桶内的文档进行聚合分析的操作)
Bucket(桶): 分桶类型,类似SQL中的GROUP BY语法 (满足特定条件的文档的集合)
Pipeline(管道): 管道分析类型,基于上一级的聚合分析结果进行在分析
Matrix(矩阵): 矩阵分析类型(聚合是一种面向数值型的聚合,用于计算一组文档字段中的统计信息)
3.2 指标(metric)和 桶(bucket)
虽然Elasticsearch有四种聚合方式,但在一般实际开发中,用到的比较多的就是Metric和Bucket。
(1) 桶(bucket)
a、简单来说桶就是满足特定条件的文档的集合。
b、当聚合开始被执行,每个文档里面的值通过计算来决定符合哪个桶的条件,如果匹配到,文档将放入相应的桶并接着开始聚合操作。
c、桶也可以被嵌套在其他桶里面。
(2)指标(metric)
a、桶能让我们划分文档到有意义的集合,但是最终我们需要的是对这些桶内的文档进行一些指标的计算。分桶是一种达到目的地的手段:它提供了一种给文档分组的方法来让我们可以计算感兴趣的指标。
b、大多数指标是简单的数学运算(如:最小值、平均值、最大值、汇总),这些是通过文档的值来计算的。
3.3 添加测试数据
因为我们只是做聚合分析测试,字段可以不需要进行分词(以下都是kibana上操作)
#创建员工索引库
PUT /employees
{
"mappings": {
"properties": {
"age": {
"type": "integer"
},
"gender": {
"type": "keyword"
},
"job": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 50
}
}
},
"name": {
"type": "keyword"
},
"salary": {
"type": "integer"
}
}
}
}
#批量添加10条数据,每条数据包含:姓名、年龄、工作、性别、薪资
PUT /employees/_bulk
{"index":{"_id":"1"}}
{"name":"Emma","age":32,"job":"Product Manager","gender":"female","salary":35000}
{"index":{"_id":"2"}}
{"name":"Underwood","age":41,"job":"Dev Manager","gender":"male","salary":50000}
{"index":{"_id":"3"}}
{"name":"Tran","age":25,"job":"Web Designer","gender":"male","salary":18000}
{"index":{"_id":"4"}}
{"name":"Rivera","age":26,"job":"Web Designer","gender":"female","salary":22000}
{"index":{"_id":"5"}}
{"name":"Rose","age":25,"job":"QA","gender":"female","salary":18000}
{"index":{"_id":"6"}}
{"name":"Lucy","age":31,"job":"QA","gender":"female","salary":25000}
{"index":{"_id":"7"}}
{"name":"Byrd","age":27,"job":"QA","gender":"male","salary":20000}
{"index":{"_id":"8"}}
{"name":"Foster","age":27,"job":"Java Programmer","gender":"male","salary":20000}
{"index":{"_id":"9"}}
{"name":"Gregory","age":32,"job":"Java Programmer","gender":"male","salary":22000}
{"index":{"_id":"10"}}
{"name":"Bryant","age":20,"job":"Java Programmer","gender":"male","salary":9000}
3.4 最小、最大和平均
3.4.1 原生API
POST employees/_search
{
"size": 0,
"aggs": {
"max_salary": {
"max": {
"field": "salary"
}
},
"min_salary": {
"min": {
"field": "salary"
}
},
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
}
3.4.1 高级API
SearchRequest request = new SearchRequest("employees");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MaxAggregationBuilder maxAggregationBuilder = AggregationBuilders.max("max_salary").field("salary");
MinAggregationBuilder minAggregationBuilder = AggregationBuilders.min("min_salary").field("salary");
AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_salary").field("salary");
searchSourceBuilder.aggregation(maxAggregationBuilder);
searchSourceBuilder.aggregation(minAggregationBuilder);
searchSourceBuilder.aggregation(avgAggregationBuilder);
searchSourceBuilder.size(0);
request.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(request, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
Map<String, Aggregation> asMap = aggregations.getAsMap();
ParsedMax parsedMax = (ParsedMax) asMap.get("max_salary");
System.out.println(parsedMax.getName()+":"+parsedMax.getValue());
ParsedMin parsedMin = (ParsedMin) asMap.get("min_salary");
System.out.println(parsedMin.getName()+":"+parsedMin.getValue());
ParsedAvg parsedAvg = (ParsedAvg) asMap.get("avg_salary");
System.out.println(parsedAvg.getName()+":"+parsedAvg.getValue());
3.5 聚合多值
3.5.1 原生API
POST employees/_search
{
"size": 0,
"aggs": {
"stats_salary": {
"stats": {
"field": "salary"
}
}
}
}
3.5.2 高级API
SearchRequest request = new SearchRequest("blog2");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
StatsAggregationBuilder statsAggregationBuilder = AggregationBuilders.stats("stats_salary").field("salary");
searchSourceBuilder.aggregation(statsAggregationBuilder);
request.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(request, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
ParsedStats stats = (ParsedStats) aggregations.getAsMap().get("stats_salary");
System.out.println(stats.getCount());
3.6 类型统计
3.6.1 原生API
POST employees/_search
{
"size": 0,
"aggs": {
"cardinate": {
"cardinality": {
"field": "job.keyword"
}
}
}
}
3.6.2 高级API
都要老胡写吗?
3.7 分组统计
比如我们统计每个工作类型薪资最高的数据
3.7.1 原生API
POST /employees/_search
{
"size": 0,
"aggs": {
"Job_gender_stats": {
"terms": {
"field": "job.keyword"
},
"aggs": {
"gender_stats": {
"terms": {
"field": "gender"
},
"aggs": {
"salary_stats": {
"max": {
"field": "salary"
}
}
}
}
}
}
}
}
3.7.2 高级API
@Test
public void test9() throws IOException {
SearchRequest request = new SearchRequest("employees");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
TermsAggregationBuilder termsAggregationBuilder
= AggregationBuilders.terms("job").field("job.keyword");
searchSourceBuilder.aggregation(termsAggregationBuilder);
searchSourceBuilder.size(0);
request.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(request, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
Map<String, Aggregation> asMap = aggregations.getAsMap();
Terms terms = (Terms) asMap.get("job");
List<? extends Terms.Bucket> buckets = terms.getBuckets();
for (Terms.Bucket bucket : buckets) {
System.out.println(bucket.getKeyAsString() + ":" + bucket.getDocCount());
}
}