面向文档的搜索分析引擎
- 应用系统的数据结构都是面向对象的,复杂的
- 对象数据存储到数据库中,只能拆解开来,变为扁平的多张表,每次查询的时候还得还原回对象格式
- ES是面向文档的,文档中存储的数据结构与面向对象的数据结构是一样的,基于这种文档数据结构,es可以提供复杂的索引,全文检索,分戏聚合等功能
- es的document用json数据格式来表达
//java实体类
public class Employee{
private String email;
private String firstName;
private String lastName;
private EmployeeInfo info;
private Date joinDate;
}
private class EmployeeInfo{
private String bio;
private Integer age;
private String[] interests;
}
EmployeeInfo info=new EmployeeInfo();
info.setBio("curious and modest");
info.setAge(30);
info.setInterests(new String[]{"bike","climb"});
Employee employee=new Employee();
employee.setEmail("zhangsan@sina.com");
employee.setFirstName("san"):
employee.setLastName("zhang");
employee.setInfo(info);
employee.setJoinDate(new Date());
//ES中保存的json格式
{
"email":"zhangsan@sina.com",
"first_name":"san",
"last_name":"zhang",
"info":{
"bio":"curious and modest",
"age":30,
"interests":["bike","climb"]
},
"join_date":"2020-01-01"
}
快速检查集群的健康状况
GET _cat/health?v
epoch timestamp cluster status node.total node.data shards pri relo init unassign pending_tasks max_task_wait_time active_shards_percent
1610811548 23:39:08 elasticsearch yellow 1 1 1 1 0 0 1 0 - 50.0%
集群的健康状态:
green:每个索引的primary shard和replica shard 都是active状态的
yellow:每个索引的primary shard都是active状态,但是部分replica shard不是active状态,处于不可用的状态
red:不是所有索引的primary shard都是active状态,部分索引有数据丢失
快速查看集群中有哪些索引
GET _cat/indices?v
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
yellow open .kibana VDC2Bp2LTtCrDMip3b_Csw 1 1 1 0 3.1kb 3.1kb
创建索引:
PUT test_index?pretty
//test_index : 新建的索引名称
//现在查看到的索引信息就有两条
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
yellow open test_index vrJfug0JQHOW9RXl1kpmVw 5 1 0 0 810b 810b
yellow open .kibana VDC2Bp2LTtCrDMip3b_Csw 1 1 1 0 3.1kb 3.1kb
删除索引:
DELETE test_index?pretty
ES的CURD
(1) 新增商品:新增文档,建立索引
es会自动建立index和type,不需要提前创建,而且es默认会对document每个field都建立倒排索引,让其可以被搜索
PUT /index/type/id
请求: PUT ecommerce/prodouct/1
{
"name":"gao lu jie",
"desc":"gao lu jie ya gao",
"price":15,
"procucter":"gaolujie producter",
"tags":["meibai","fangzhu"]
}
返回:
{
"_index": "ecommerce", ==>索引名称
"_type": "prodouct", ==>类型名称
"_id": "1", ==>ID
"_version": 1, ==>版本号
"result": "created", ==>新增
"_shards": {
"total": 2, ==>分片数量
"successful": 1, ==>成功写入一个分片
"failed": 0
},
"created": true ==>创建成功
}
(2)查询商品:检索文档
GET /index/type/id
GET /ecommerce/product/1
// 返回结果:
{
"_index": "ecommerce",
"_type": "product",
"_id": "1",
"_version": 1,
"found": true,
"_source": {
"name": "gao lu jie",
"desc": "gao lu jie ya gao",
"price": 15,
"procucter": "gaolujie producter",
"tags": [
"meibai",
"fangzhu"
]
}
}
(3)修改商品:替换文档
替换文档必须带上所有的field,才能修改信息,否则为空的字段会被替换
PUT /ecommerce/product/1
{
"name" : "jiaqiangban gaolujie yagao",
"desc" : "gaoxiao meibai",
"price" : 30,
"producer" : "gaolujie producer",
"tags": [ "meibai", "fangzhu" ]
}
(4)修改商品:更新文档
POST /ecommerce/product/1/_update
{
"doc": {
"name": "jiaqiangban gaolujie yagao"
}
}
{
"_index": "ecommerce",
"_type": "product",
"_id": "1",
"_version": 8,
"result": "updated",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
}
}
(5)删除商品:删除文档
DELETE /ecommerce/product/1
//返回结果:
{
"found": true,
"_index": "ecommerce",
"_type": "product",
"_id": "1",
"_version": 9,
"result": "deleted",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
}
}
query String search
//1.查询所有商品
GET ecommerce/product/_search
took:耗时
timed_out:是否超时
_shards:数据拆成了5个分片,所以对于搜索请求,会打到所有的primary shard(或某个replica shard)
hits.total:查询结果数
hits.max_score:document对于一个search的相关匹配分数,越相关匹配成就越高,分数越高
hits.hits:包含匹配搜索的document的详细数据
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "ecommerce",
"_type": "product",
"_id": "2",
"_score": 1,
"_source": {
"name": "jiajieshi yagao",
"desc": "youxiao fangzhu",
"price": 25,
"producer": "jiajieshi producer",
"tags": [
"fangzhu"
]
}
},
{
"_index": "ecommerce",
"_type": "product",
"_id": "1",
"_score": 1,
"_source": {
"name": "gaolujie",
"desc": "gaolujie yagao",
"price": 15,
"procucter": "gaolujie producter",
"tags": [
"meibai",
"fangzhu"
]
}
},
{
"_index": "ecommerce",
"_type": "product",
"_id": "3",
"_score": 1,
"_source": {
"name": "zhonghua yagao",
"desc": "caoben zhiwu",
"price": 40,
"producer": "zhonghua producer",
"tags": [
"qingxin"
]
}
}
]
}
}
//搜索商品中包含yagao的商品并按售价降序排序
GET ecommerce/product/_search?q=name:yagao&sort=price:desc
query DSL
//查询所有商品
GET ecommerce/product/_search
{
"query": {
"match_all": {}
}
}
//查询商品名称包含yagao的商品并按价格排序
GET /ecommerce/product/_search
{
"query": {
"match": {
"name": "yagao"
}
},
"sort": [
{
"price": {
"order": "desc"
}
}
]
}
//对商品进行分页
GET ecommerce/product/_search
{
"size": 1,
"from": 0,
"query": {
"match_all": {}
}
}
size:每页显示多少数据
form:页数,从0开始
//按照查询出的商品只显示名称跟价格字段
GET ecommerce/product/_search
{
"query": {
"match_all": {}
}
, "_source": ["name","price"]
}
//显示结果
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "ecommerce",
"_type": "product",
"_id": "2",
"_score": 1,
"_source": {
"price": 25,
"name": "jiajieshi yagao"
}
},
{
"_index": "ecommerce",
"_type": "product",
"_id": "1",
"_score": 1,
"_source": {
"price": 15,
"name": "gaolujie"
}
},
{
"_index": "ecommerce",
"_type": "product",
"_id": "3",
"_score": 1,
"_source": {
"price": 40,
"name": "zhonghua yagao"
}
}
]
}
}
query filter
搜索商品名称包含yagao,并且售价大于25的商品
GET ecommerce/product/_search
{
"query": {
"bool": { //由一个或多个字句组成(组合查询)
"must": { //返回的结果集必须满足must子句的条件
"match":{
"name":"yagao"
}
},
"filter": { //过滤 ,筛选价格大于25以上的商品
"range": {
"price": {
"gt": 25
}
}
}
}
}
}
full-text search(全文检索)
//查询商品producer字段中包含yagao,producer的商品
GET /ecommerce/product/_search
{
"query": {
"match": {
"producer": "yagao producer"
}
}
}
yagao producer 会被拆分为yagao和producer,然后建立倒排索引
phrase search(短语搜索)
跟全文索引相反,全文索引会将输入的搜索串拆分,去倒排索引里面匹配,只要能匹配上任意单词都可以作为结果返回
phrase search:要求输入的搜索串必须咋指定的字段文本中完全包含且一摸一样才能作为结果返回
GET ecommerce/product/_search
{
"query": {
"match_phrase": {
"producer": "yagao producer"
}
}
}
highight search(高亮搜索结果)
GET ecommerce/product/_search
{
"query": {
"match": {
"producer": "producer"
}
},
"highlight": {
"fields": {
"procucter": {}
}
}
}
聚合分析
1.计算每个tag下的商品数量
GET ecommerce/product/_search
{
"aggs": {
"gtoup_by_tags": {
"terms": {
"field": "tags"
}
}
}
}
//返回结果
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "ecommerce",
"node": "y8dcx4NSQ9C1fSNjpFHSyg",
"reason": {
"type": "illegal_argument_exception",
"reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."
}
}
]
},
"status": 400
}
//需要将文本field的fielddata属性设置为true
PUT /ecommerce/_mapping/product?update_all_types
{
"properties": {
"tags": {
"type": "text",
"fielddata": true
}
}
}
//最终返回结果
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"gtoup_by_tags": { //分组名称
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [ //分组结果
{
"key": "fangzhu",
"doc_count": 2
},
{
"key": "meibai",
"doc_count": 1
},
{
"key": "qingxin",
"doc_count": 1
}
]
}
}
}
对名称中包含yagao的商品,计算每个tag下的上品数量
GET ecommerce/product/_search
{
"query": {
"match": {
"name": "yagao"
}
},
"size": 0,
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags"
}
}
}
}
//结果集
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"group_by_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "fangzhu",
"doc_count": 1
},
{
"key": "qingxin",
"doc_count": 1
}
]
}
}
}
先分组,再算每组的平均值,计算每个tag下的商品的平均价格(嵌套聚合)
GET ecommerce/product/_search
{
"size": 0,
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags"
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
//结果集
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"group_by_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "fangzhu",
"doc_count": 2,
"avg_price": {
"value": 20
}
},
{
"key": "meibai",
"doc_count": 1,
"avg_price": {
"value": 15
}
},
{
"key": "qingxin",
"doc_count": 1,
"avg_price": {
"value": 40
}
}
]
}
}
}
计算每个tag下的商品的平均价格,并且按照平均价格降序排序
GET ecommerce/product/_search
{
"size": 0,
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags",
"order": {
"avg_price":"desc"
}
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
//结果集
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"group_by_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "qingxin",
"doc_count": 1,
"avg_price": {
"value": 40
}
},
{
"key": "fangzhu",
"doc_count": 2,
"avg_price": {
"value": 20
}
},
{
"key": "meibai",
"doc_count": 1,
"avg_price": {
"value": 15
}
}
]
}
}
}
按照指定的价格范围区间进行分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格
GET ecommerce/product/_search
{
"size": 0,
"aggs": {
"group_by_price": {
"range": {
"field": "price",
"ranges": [
{
"from": 0,
"to": 20
},{
"from": 20,
"to": 40
},{
"from": 40,
"to": 60
}
]
},
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags"
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
//结果集
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"group_by_price": {
"buckets": [
{
"key": "0.0-20.0",
"from": 0,
"to": 20,
"doc_count": 1,
"group_by_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "fangzhu",
"doc_count": 1,
"avg_price": {
"value": 15
}
},
{
"key": "meibai",
"doc_count": 1,
"avg_price": {
"value": 15
}
}
]
}
},
{
"key": "20.0-40.0",
"from": 20,
"to": 40,
"doc_count": 1,
"group_by_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "fangzhu",
"doc_count": 1,
"avg_price": {
"value": 25
}
}
]
}
},
{
"key": "40.0-60.0",
"from": 40,
"to": 60,
"doc_count": 1,
"group_by_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "qingxin",
"doc_count": 1,
"avg_price": {
"value": 40
}
}
]
}
}
]
}
}
}