es版本 5.1.1
基本增删改操作
查询删除
POST /user_analysys/user_event/_delete_by_query
{
"query": {
"match_all": {}
}
}
PUT /megacorp/employee/1
{
"first_name" : "John",
"last_name" : "Smith",
"age" : 25,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}
PUT /megacorp/employee/2
{
"first_name" : "Jane",
"last_name" : "Smith",
"age" : 32,
"about" : "I like to collect rock albums",
"interests": [ "music" ]
}
PUT /megacorp/employee/3
{
"first_name" : "Douglas",
"last_name" : "Fir",
"age" : 35,
"about": "I like to build cabinets",
"interests": [ "forestry" ]
}
GET /megacorp/employee/1
GET /megacorp/employee/_search
GET /megacorp/employee/1/_source
GET /megacorp/employee/_search?q=first_name:Jane
#局部更新
POST /megacorp/employee/1/_update
{
"doc": {
"about": "I love to go rock climbing xxx"
}
}
#更新不存在则插入
POST /megacorp/employee/4/_update
{
"doc": {
"about": "I love to go rock climbing xxx"
},
"upsert": {
"first_name" : "xiang",
"last_name" : "kevin",
"age" : 25,
"about" : "I love to go rock climbing zzz",
"interests": [ "sports", "music" ]
}
}
POST /megacorp/employee/_search
{
"query": {
"match": {
"first_name": "John"
}
}
}
POST /megacorp/employee/_search
{
"query": {
"match": {
"about": "rock climbing"
}
}
}
#match_phrase 确切的匹配若干个单词或者短语
POST /megacorp/employee/_search
{
"query": {
"match_phrase": {
"about": "rock climbing"
}
},
"highlight": {
"fields": {
"about":{}
}
}
}
POST /megacorp/employee/_search
{
"query": {
"bool": {
"must": {
"match": {
"last_name": "Smith"
}
},
"filter": {
"range": {
"age": {
"gte": 10,
"lte": 30
}
}
}
}
}
}
#https://www.elastic.co/guide/en/elasticsearch/reference/current/fielddata.html
GET /megacorp/employee/_mapping
PUT /megacorp/_mapping/employee
{
"properties": {
"interests": {
"type": "text",
"fielddata": true
}
}
}
#聚合,聚合也允许分级汇总。例如,让我们统计每种兴趣下职员的平均年龄
POST /megacorp/employee/_search
{
"query": {
"match": {
"last_name": "Smith"
}
},
"aggs" : {
"all_interests" : {
"terms" : {"field" : "interests"},
"aggs":{
"avg_age":{
"avg" : {"field":"age"}
}
}
}
}
}
分词和映射
GET /megacorp/employee/_search
GET /megacorp/employee/_mapping
#分词测试
GET /_analyze?analyzer=standard&text=Text to analyze
GET /megacorp/_analyze?field=about&text=Black-cats
GET /gb/_mapping
DELETE /gb
#创建映射
PUT /gb
{
"mappings": {
"type_tweet" : {
"properties" : {
"tweet" : {
"type" : "string",
"analyzer": "english"
},
"date" : {
"type" : "date"
},
"name" : {
"type" : "string"
},
"user_id" : {
"type" : "long"
}
}
}
}
}
#能添加新字段的映射
PUT /gb/_mapping/type_tweet
{
"properties" : {
"tag" : {
"type" : "string",
"index": "not_analyzed"
}
}
}
#不能修改映射
PUT /gb/_mapping/type_tweet
{
"properties" : {
"tag" : {
"type" : "long"
}
}
}
结构化查询和结构化过滤
#结构化查询 和 结构化过滤
#原则上来说,使用查询语句做全文本搜索或其他需要进行相关性评分的时候,剩下的全部用过滤语句
#一条过滤语句会询问每个文档的字段值是否包含着特定值。,不会进行相关性分析和评分
#查询语句会询问每个文档的字段值与特定值的匹配程度如何?
#可以使用以下语句验证合法性和执行过程
#GET /megacorp/employee/_validate/query?explain
#结构如下:
GET /megacorp/employee/_search?explain
{
"query": {
"bool": {
"must": [
{"match": {
"about": "rock"
}},
{"match": {
"last_name": "Smith"
}}
],
"filter": {
"range": {
"age": {
"gte": 10,
"lte": 25
}
}
}
}
}
}
GET /megacorp/employee/1
GET /megacorp/employee/_search
GET /megacorp/employee/_search
{
"query": {
"match": {
"about": "rock climbing"
}
}
}
#multi_match查询允许你做match查询的基础上同时搜索多个字段
GET /megacorp/employee/_search
{
"query": {
"multi_match": {
"query": "Smith",
"fields": ["first_name","last_name"]
}
}
}
#match_phrase 想要精确匹配所有同时包含
GET /megacorp/employee/_search
{
"query": {
"match_phrase": {
"about": "rock climbing"
}
}
}
GET /megacorp/_analyze?field=about&text=I love to go rock climbing xxx
#term是代表完全匹配,即不进行分词器分析,文档中必须包含整个搜索的词汇, 下面的查询由于已经被分词了, 就会找不到rock climbing, 搜不出结果
GET /megacorp/employee/_search
{
"query": {
"term": {
"about": "rock climbing"
}
}
}
#bool查询 must,must_not或者should
#must: 文档必须完全匹配条件
#should: should下面会带一个以上的条件,至少满足一个条件,这个文档就符合should
#must_not: 文档必须不匹配条件
GET /megacorp/employee/_search
{
"query": {
"bool": {
"must": [
{"range": {
"age": {
"gte": 10,
"lte": 25
}
}}
],
"should": [
{"match": {
"last_name": "Smith"
}}
],
"must_not": [
{"match": {
"first_name": "xiang"
}}
]
}
}
}
结构化搜索
## 结构化搜索
GET /megacorp/employee/_mapping
#过滤器的bool使用
GET /megacorp/employee/_search
{
"query": {
"bool": {
"must": [
{"match": {
"about": "rock climbing"
}}
],
"filter": {
"bool": {
"must" : [
{ "range": { "age": { "gte": 25 }}}
]
}
}
}
}
}
-- sql 等价 ---
SELECT product
FROM products
WHERE (price = 20 OR productID = "XHDK-A-1293-#fJ3")
AND (price != 30)
"filter" : {
"bool" : {
"should" : [
{ "term" : {"price" : 20}},
{ "term" : {"productID" : "XHDK-A-1293-#fJ3"}}
],
"must_not" : {
"term" : {"price" : 30}
}
}
}
SELECT document
FROM products
WHERE productID = "KDKE-B-9947-#kL5"
OR ( productID = "JODL-X-1937-#pV7"
AND price = 30 )
"filter" : {
"bool" : {
"should" : [
{ "term" : {"productID" : "KDKE-B-9947-#kL5"}},
{ "bool" : {
"must" : [
{ "term" : {"productID" : "JODL-X-1937-#pV7"}},
{ "term" : {"price" : 30}}
]
}}
]
}
}
# 文档是否存在某个字段 WHERE tags IS NOT NULL
# exists 过滤器 <==> is not null
# missing 过滤器 <==> is null
GET /megacorp/employee/_search
{
"query": {
"bool": {
"filter": {
"exists": {
"field": "age"
}
}
}
}
}
嵌套
### 嵌套对象
DELETE /my_index
PUT /my_index
{
"mappings": {
"blogpost": {
"properties": {
"title": { "type": "string" },
"body": { "type": "string" },
"tags": { "type": "string" },
"comments": {
"type": "nested",
"properties": {
"name": { "type": "string" },
"comment": { "type": "string" },
"age": { "type": "short" },
"stars": { "type": "short" },
"date": { "type": "date" }
}
}
}
}
}
}
GET /my_index/blogpost/_mapping
GET /my_index/blogpost/_search
PUT /my_index/blogpost/1
{
"title": "Nest eggs",
"body": "Making your money work...",
"tags": [ "cash", "shares" ],
"comments": [
{
"name": "John Smith",
"comment": "Great article",
"age": 28,
"stars": 4,
"date": "2014-09-01"
},
{
"name": "Alice White",
"comment": "More like this please",
"age": 31,
"stars": 5,
"date": "2014-10-22"
}
]
}
PUT /my_index/blogpost/2
{
"title": "Investment secrets",
"body": "What they don't tell you ...",
"tags": [ "shares", "equities" ],
"comments": [
{
"name": "Mary Brown",
"comment": "Lies, lies, lies",
"age": 42,
"stars": 1,
"date": "2014-10-18"
},
{
"name": "John Smith",
"comment": "You're making it up!",
"age": 28,
"stars": 2,
"date": "2014-10-16"
}
]
}
GET /my_index/blogpost/_search
{
"query": {
"bool": {
"must": [
{ "match": { "title": "eggs" }},
{
"nested": {
"path": "comments",
"query": {
"bool": {
"must": [
{ "match": { "comments.name": "john" }},
{ "match": { "comments.age": 28 }}
]
}}}}
]
}}}
#取回在十月中有收到回应的blog文章,并依照所取回的各个blog文章中最少stars数量的顺序作排序
GET /my_index/blogpost/_search
{
"query": {
"nested": {
"path": "comments",
"query": {
"bool": {
"filter": {
"range": {
"comments.date": {
"gte": "2014-10-01",
"lt": "2014-11-01"
}
}
}
}
}
}
},
"sort": {
"comments.stars": {
"order": "asc",
"mode": "min",
"nested_filter": {
"range": {
"comments.date": {
"gte": "2014-10-01",
"lt": "2014-11-01"
}
}
}
}
}
}
#嵌套对象的聚合
GET /my_index/blogpost/_search
{
"aggs": {
"comments": {
"nested": {
"path": "comments"
},
"aggs": {
"by_month": {
"date_histogram": {
"field": "comments.date",
"interval": "month",
"format": "yyyy-MM"
},
"aggs": {
"avg_stars": {
"avg": {
"field": "comments.stars"
}
}
}
}
}
}
}
}
PUT /my_index/blogpost/_mapping
{
"properties": {
"tags": {
"type": "string",
"fielddata": true
}
}
}
#<1> 共有四个评论
#<2> 有两个评论的发表者年龄介於20至30之间
#<3> 两个blog文章与这些评论相关
#<4> 这些blog文章的火红标签是shares丶cash丶equities
GET /my_index/blogpost/_search
{
"aggs": {
"comments": {
"nested": {
"path": "comments"
},
"aggs": {
"age_group": {
"histogram": {
"field": "comments.age",
"interval": 10
},
"aggs": {
"blogposts": {
"reverse_nested": {},
"aggs": {
"tags": {
"terms": {
"field": "tags"
}
}
}
}
}
}
}
}
}
}
agg聚合
#es聚合
DELETE /user_analysys_little
PUT /user_analysys_little
PUT /user_analysys_little/_mapping/user
{
"properties" : {
"userId" : {
"type" : "string",
"index": "not_analyzed"
},
"userName" : {
"type" : "string",
"index": "not_analyzed"
},
"provinceId" : {
"type" : "long",
"index": "not_analyzed"
},
"provinceName" : {
"type" : "string",
"index": "not_analyzed"
},
"age" : {
"type" : "long",
"index": "not_analyzed"
}
}
}
PUT /user_analysys_little/_mapping/user_event
{
"properties" : {
"userId" : {
"type" : "string",
"index": "not_analyzed"
},
"userName" : {
"type" : "string",
"index": "not_analyzed"
},
"provinceId" : {
"type" : "long",
"index": "not_analyzed"
},
"provinceName" : {
"type" : "string",
"index": "not_analyzed"
},
"age" : {
"type" : "long",
"index": "not_analyzed"
},
"eventId" : {
"type" : "long",
"index": "not_analyzed"
},
"eventName" : {
"type" : "string",
"index": "not_analyzed"
},
"statDate" : {
"type" : "date",
"index": "not_analyzed"
},
"productName" : {
"type" : "string",
"index": "not_analyzed"
}
}
}
GET _cat/indices
GET /user_analysys_little/user/_search
GET /user_analysys_little/user_event/_search
# select userId from user_event where provinceName='青海' group by userId
# 如果省略 query部分就是相当于对全局结果做统计
GET /user_analysys_little/user_event/_search
{
"size": 1,
"query": {
"term": {
"provinceName": {
"value": "青海"
}
}
},
"aggs": {
"group_userid": {
"terms": {
"field": "userId",
"order": {
"_count": "desc"
}
}
}
}
}
#全局桶, 将会忽略query的条件,对全局数据进行统计
#青海的平均年龄和全国的平均年龄做比较
GET /user_analysys_little/user_event/_search
{
"size": 1,
"query": {
"term": {
"provinceName": {
"value": "青海"
}
}
},
"aggs": {
"avg_age_青海": {
"avg": {
"field": "age"
}
},
"all": {
"global": {},
"aggs": {
"avg_age_全国": {
"avg": {
"field": "age"
}
}
}
}
}
}
# 先按省份分组,然后统计省份的平均/最大/最小年龄,再嵌套统计各个年龄的分布情况
GET /user_analysys_little/user_event/_search
{
"size": 1,
"aggs": {
"group_province": {
"terms": {
"field": "provinceName"
},
"aggs": {
"avg_age": {
"avg": {
"field": "age"
}
},
"max_age": {
"max": {
"field": "age"
}
},
"min_age": {
"min": {
"field": "age"
}
},
"group_age":{
"terms": {
"field": "age"
}
}
}
}
}
}
#直方图, 以年龄间隔为10的区间进行统计,例如 [10~19] [20~29] .. 这样分区间分组统计
GET /user_analysys_little/user_event/_search
{
"size": 1,
"aggs": {
"histogram_age": {
"histogram": {
"field": "age",
"interval": 10
},
"aggs": {
"max_age": {
"max": {
"field": "age"
}
},
"min_age":{
"min": {
"field": "age"
}
},
"avg_age":{
"avg": {
"field": "age"
}
}
}
}
}
}
#直方图,专门的时间统计, 按天统计, extended_bounds可以设定起始边界
GET /user_analysys_little/user_event/_search
{
"size": 1,
"aggs": {
"date_histogram_statDate": {
"date_histogram": {
"field": "statDate",
"interval": "day",
"format": "yyyy-MM-dd",
"time_zone":"+08:00",
"min_doc_count" : 0,
"extended_bounds" : {
"min" : "2016-11-28",
"max" : "2016-12-31"
}
}
}
}
}
#过滤桶, 使用过滤桶在查询范围基础上应用过滤器
#即搜索针对全青海的人,但聚合统计针对青海年龄在10~50的人
GET /user_analysys_little/user_event/_search
{
"size": 1,
"query": {
"term": {
"provinceName": {
"value": "青海"
}
}
},
"aggs": {
"avg_age_青海": {
"filter": {
"range": {
"age": {
"gte": 10,
"lte": 50
}
}
},
"aggs": {
"avg_age": {
"avg": {
"field": "age"
}
}
}
}
}
}
#后过滤器 只过滤搜索结果,不过滤聚合结果
#即搜索针对青海年龄在10~50的人,但聚合统计针对全青海的人
GET /user_analysys_little/user_event/_search
{
"size": 1,
"query": {
"term": {
"provinceName": {
"value": "青海"
}
}
},
"post_filter": {
"range": {
"age": {
"gte": 10,
"lte": 50
}
}
},
"aggs": {
"avg_age":{
"avg": {
"field": "age"
}
}
}
}
#去重, 统计每天有多少个不重复的用户 即 dau
#注意: cardinality是通过算法来做的近似计算,不是100%精确
#可以通过设置precision_threshold来调节精度, 接受 0–40,000 之间的数字,更大的值还是会被当作 40,000 来处理。 示例会确保当字段唯一值在 100 以内时会得到非常准确的结果。尽管算法是无法保证这点的,但如果基数在阈值以下,几乎总是 100% 正确的。高于阈值的基数会开始节省内存而牺牲准确度,同时也会对度量结果带入误差。
GET /user_analysys_little/user_event/_search
{
"size": 1,
"aggs": {
"date_histogram_statDate": {
"date_histogram": {
"field": "statDate",
"interval": "day",
"format": "yyyy-MM-dd",
"time_zone":"+08:00",
"min_doc_count" : 0,
"extended_bounds" : {
"min" : "2016-11-28",
"max" : "2016-12-31"
}
},
"aggs": {
"distinct_userId": {
"cardinality": {
"field": "userId",
"precision_threshold": 100
}
}
}
}
}
}