Elasticsearch的DSL操作命令大全（二）

小强签名设计

已于 2022-11-07 18:45:02 修改

阅读量2.4k

点赞数 1

分类专栏： Elasticsearch 文章标签： elasticsearch DSL 操作命令

于 2018-11-14 22:14:06 首次发布

本文链接：https://blog.csdn.net/m0_37739193/article/details/84075102

版权

Elasticsearch 专栏收录该内容

13 篇文章 1 订阅

订阅专栏

文章目录

奇葩需求：

1.
按照论坛名称，对论坛评论总量，1-5月按月聚合相对应字段为cmtCnt
按照论坛名称，对论坛发帖点赞量，1-5月按月聚合相对应字段为adtCnt

GET xiao-2018-4-1,xiao-2018-6-12,xiao-2018-3-1/Socials/_search
{
    "size" : 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool" : {
                    "must" : [
                        {
                            "term" : {
                               "sourceType" : "FORUM"
                            }
                        },
                        {
                            "range": {
                                "timeDay": {
                                    "gte": "2018-01-01",
                                    "lte": "2018-05-31"
                                }
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "all_interests" : {
            "terms" : {
                "size" : 100000,
                "field" : "website.keyword"
            },
            "aggs": {
                "month_num": {
                    "date_histogram": {
                        "field": "timeDay",
                        "interval": "month",
                        "format": "yyyy-MM"
                    },
                    "aggs": {
                        "single_sum": {
                            "sum" : { "field" : "cmtCnt" }
                        }
                    }
                }
            }
        }
    }
}

结果：

{
  "took": 9141,
  "timed_out": false,
  "_shards": {
    "total": 350,
    "successful": 350,
    "failed": 0
  },
  "hits": {
    "total": 735705,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "all_interests": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "论坛",
          "doc_count": 661238,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-01",
                "key": 1514764800000,
                "doc_count": 3,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-02",
                "key": 1517443200000,
                "doc_count": 0,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-03",
                "key": 1519862400000,
                "doc_count": 1403,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-04",
                "key": 1522540800000,
                "doc_count": 125895,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-05",
                "key": 1525132800000,
                "doc_count": 533937,
                "single_sum": {
                  "value": 0
                }
              }
            ]
          }
        },
        {
          "key": "百度贴吧",
          "doc_count": 21275,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-02",
                "key": 1517443200000,
                "doc_count": 290,
                "single_sum": {
                  "value": 406
                }
              },
              {
                "key_as_string": "2018-03",
                "key": 1519862400000,
                "doc_count": 20472,
                "single_sum": {
                  "value": 19174
                }
              },
              {
                "key_as_string": "2018-04",
                "key": 1522540800000,
                "doc_count": 483,
                "single_sum": {
                  "value": 1740
                }
              },
              {
                "key_as_string": "2018-05",
                "key": 1525132800000,
                "doc_count": 30,
                "single_sum": {
                  "value": 45
                }
              }
            ]
          }
        },
        {
          "key": "股吧",
          "doc_count": 6395,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-02",
                "key": 1517443200000,
                "doc_count": 10,
                "single_sum": {
                  "value": 26
                }
              },
              {
                "key_as_string": "2018-03",
                "key": 1519862400000,
                "doc_count": 6383,
                "single_sum": {
                  "value": 24965
                }
              },
              {
                "key_as_string": "2018-04",
                "key": 1522540800000,
                "doc_count": 1,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-05",
                "key": 1525132800000,
                "doc_count": 1,
                "single_sum": {
                  "value": 0
                }
              }
            ]
          }
        },
        {
          "key": "好大夫在线",
          "doc_count": 2933,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-02",
                "key": 1517443200000,
                "doc_count": 5,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-03",
                "key": 1519862400000,
                "doc_count": 2927,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-04",
                "key": 1522540800000,
                "doc_count": 1,
                "single_sum": {
                  "value": 0
                }
              }
            ]
          }
        }
 。。。。。太多省略

2.
按照论坛名称，对论坛正面情感总量（非负数），1-5月按月聚合相对应字段为sentimentOrient

GET xiao-2018-4-1,xiao-2018-6-12,xiao-2018-3-1/Socials/_search
{
    "size" : 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool" : {
                    "must" : [
                        {
                            "term" : {
                               "sourceType" : "FORUM"
                            }
                        },
                        {
                            "range": {
                                "timeDay": {
                                    "gte": "2018-01-01",
                                    "lte": "2018-05-31"
                                }
                            }
                        }
                    ],
                    "must_not" : [
                        { "term" : { "sentimentOrient" : -1} },
                        { "term" : { "sentimentOrient" : 0 } }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "all_interests" : {
            "terms" : {
                "size" : 100000,
                "field" : "website.keyword"
            },
            "aggs": {
                "month_num": {
                    "date_histogram": {
                        "field": "timeDay",
                        "interval": "month",
                        "format": "yyyy-MM"
                    }
                }
            }
        }
    }
}

疑问：本来一开始想按官网 https://elasticsearch.cn/book/elasticsearch_definitive_guide_2.x/_extended_example.html 上用 extended_bounds 来限制时间范围死活不好使我也是奇了怪了，最后只能转变思路在查询的时候做手脚了。

3.
需要监测的关键词：零跑,零跑汽车,零跑S01
需要过滤关键词：零跑腿,专家门诊
社交和新闻根据url字段去重后6月2号到7月2号的总数值

GET xiao-2018-6-12,xiao-2018-6-19,xiao-2018-6-26,xiao-2018-6-5/Socials/_search
{
    "size" : 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool" : {
                    "must" : [
                        {
                            "range": {
                                "timeDay": {
                                    "gte": "2018-06-02",
                                    "lte": "2018-07-02"
                                }
                            }
                        },
                        {
                            "query_string":{
                                "default_field":"textZh",
                                "query":"零跑 OR 零跑汽车 OR 零跑S01 NOT 零跑腿 NOT 专家门诊"
                                或者
                                "query" : "( ( \"\"零跑\"\" ) OR ( \"\"零跑汽车\"\" ) OR ( \"\"零跑S01\"\" ) NOT ( \"\"零跑腿\"\" ) NOT ( \"\"专家门诊\"\" ) )"
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "distinct_colors" : {
            "cardinality" : {
                "field" : "url"
            }
        }
    }
}

注：上面这个查询结果不对，还是有问题
原因：这个索引mapping里textZh字段的设置如下

          "textZh": {
            "type": "text",
            "store": true,
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            },
            "analyzer": "ik_smart"
          }

导致你输入“零跑”会被分词为“零”和“跑”，所以搜索的结果并不是你想要的
解决：
社交（_type是“Socials”的为社交，_type是“News”的为新闻）：

GET xiao-2018-6-12,xiao-2018-6-19,xiao-2018-6-26,xiao-2018-6-5/Socials/_search
{
    "size" : 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool": {
                    "must": {
                        "range": {
                            "timeStr": {
                                "gte": "2018-06-02 00:00:00",
                                "lte": "2018-07-03 00:00:00"
                            }
                        }
                    },
                    "should": [
                        {
                            "match_phrase": {
                                "textZh" : {
                                    "query" : "零跑"
                                }
                            }
                        },
                        {
                            "match_phrase": {
                                "textZh" : {
                                    "query" : "零跑汽车"
                                }
                            }
                        },
                        {
                            "match_phrase": {
                                "textZh" : {
                                    "query" : "零跑S01"
                                }
                            }
                        }
                    ],
                    "must_not": {
                        "bool": {
                            "should": [
                                {
                                    "match_phrase": {
                                        "textZh" : "零跑腿"
                                    }
                                },
                                {
                                    "match_phrase": {
                                        "textZh" : {
                                            "query" : "专家门诊"
                                        }
                                    }
                                }
                            ]
                        }
                    }
                }
            }
        }
    },
    "aggs" : {
        "distinct_colors" : {
            "cardinality" : {
                "field" : "url"
            }
        }
    }
}

4.25号到28号指定时间段的微博号总和

GET xiaoqiang-2018-10-29/Socials/_search
{
    "size": 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool" : {
                    "must" : [
                        {
                            "range": {
                                "timeHour": {
                                    "gte": "2018-10-25 21",
                                    "lte": "2018-10-28 21",
                                    "format": "yyyy-MM-dd HH"
                                }
                            }
                        },
                        {
                            "term" : {
                                "sourceType": "weibo"
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "all_interests" : {
           "terms" : {
              "script" : "String he=new SimpleDateFormat('HH').format(new Date(doc['timeHour'].value)); if(he.equals('01')){return he;}else{return null;}"
           },
            "aggs" : {
                "per_count" : {
                    "terms" : {
                       "script" : "doc['url'].value.substring(17,27)"
                    }
                }
            }
        }
    }
}

结果：

{
  "took": 4182,
  "timed_out": false,
  "_shards": {
    "total": 10,
    "successful": 10,
    "failed": 0
  },
  "hits": {
    "total": 2997411,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "all_interests": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "01",
          "doc_count": 154272,
          "per_count": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 154116,
            "buckets": [
              {
                "key": "5670137181",
                "doc_count": 20
              },
              {
                "key": "5408336201",
                "doc_count": 16
              },
              {
                "key": "1084088733",
                "doc_count": 15
              },
              {
                "key": "1283869875",
                "doc_count": 15
              },
              {
                "key": "1322676944",
                "doc_count": 15
              },
              {
                "key": "1668841355",
                "doc_count": 15
              },
              {
                "key": "1681901143",
                "doc_count": 15
              },
              {
                "key": "1704588860",
                "doc_count": 15
              },
              {
                "key": "1819301715",
                "doc_count": 15
              },
              {
                "key": "1823510107",
                "doc_count": 15
              }
            ]
          }
        }
      ]
    }
  }
}

5.近n天每天各个时段的微博号

GET xiaoqiang-2018-10-29/Socials/_search
{
    "size": 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool" : {
                    "must" : [
                        {
                            "range": {
                                "timeHour": {
                                    "gte": "2018-10-26 21",
                                    "lte": "2018-10-28 21",
                                    "format": "yyyy-MM-dd HH"
                                }
                            }
                        },
                        {
                            "term" : {
                                "sourceType": "weibo"
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "all_interests" : {
           "terms" : {
              "script" : "new SimpleDateFormat('HH').format(new Date(doc['timeHour'].value))",
              "order" : { "_term" : "desc" }
           },
           "aggs": {
                "month_num": {
                    "date_histogram": {
                        "field": "timeDay",
                        "interval": "day",
                        "format": "yyyy-MM-dd"
                    },
                    "aggs" : {
                        "per_count" : {
                            "terms" : {
                                "size" : 1,
                                "script" : "doc['url'].value.substring(17,27)"
                            }
                        }
                    }
                }
            }
        }
    }
}

结果：

{
  "took": 15584,
  "timed_out": false,
  "_shards": {
    "total": 10,
    "successful": 10,
    "failed": 0
  },
  "hits": {
    "total": 2143757,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "all_interests": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 1356451,
      "buckets": [
        {
          "key": "23",
          "doc_count": 93821,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 37782,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 37767,
                  "buckets": [
                    {
                      "key": "1057449614",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 56039,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 56024,
                  "buckets": [
                    {
                      "key": "1218291087",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "22",
          "doc_count": 92328,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 37043,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 37028,
                  "buckets": [
                    {
                      "key": "1306672440",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 55285,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 55270,
                  "buckets": [
                    {
                      "key": "1221062225",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "21",
          "doc_count": 92859,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 36797,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 36782,
                  "buckets": [
                    {
                      "key": "3799113457",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 56062,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 56047,
                  "buckets": [
                    {
                      "key": "1360660712",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "20",
          "doc_count": 95391,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 38552,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 38537,
                  "buckets": [
                    {
                      "key": "2722601793",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 56839,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 56824,
                  "buckets": [
                    {
                      "key": "1799231193",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "19",
          "doc_count": 94108,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 37687,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 37672,
                  "buckets": [
                    {
                      "key": "1750745673",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 56421,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 56406,
                  "buckets": [
                    {
                      "key": "1814872401",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "18",
          "doc_count": 95298,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 37882,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 37867,
                  "buckets": [
                    {
                      "key": "3933789681",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 57416,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 57401,
                  "buckets": [
                    {
                      "key": "1508661252",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "17",
          "doc_count": 87719,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 34590,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 34575,
                  "buckets": [
                    {
                      "key": "2038738841",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 53129,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 53114,
                  "buckets": [
                    {
                      "key": "2237799475",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "16",
          "doc_count": 64030,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 27736,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 27721,
                  "buckets": [
                    {
                      "key": "1911197885",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 36294,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 36279,
                  "buckets": [
                    {
                      "key": "1984810814",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "15",
          "doc_count": 44697,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 19721,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 19706,
                  "buckets": [
                    {
                      "key": "1564695515",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 24976,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 24961,
                  "buckets": [
                    {
                      "key": "1772563447",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "14",
          "doc_count": 27055,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 11754,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 11740,
                  "buckets": [
                    {
                      "key": "5054956136",
                      "doc_count": 14
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 15301,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 15286,
                  "buckets": [
                    {
                      "key": "1690639232",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        }
      ]
    }
  }
}

获取多层聚合的数据api：

	private boolean handleresaggWeibo(SearchResponse response,JSONObject result,JSONObject message) {
		boolean flag = true;
		JSONObject json = null;
		List<String> jsons = null;
		String timeinterval = message.getString("timeinterval");
		//获取聚合参数
		Map<String, Aggregation> aggMap = response.getAggregations().asMap();
		StringTerms gradeTerms = (StringTerms) aggMap.get("timeinterval");  
		Iterator<Bucket> gradeBucketIt = gradeTerms.getBuckets().iterator();
		while(gradeBucketIt.hasNext()) {
			jsons = new ArrayList<>();
			Bucket gradeBucket = gradeBucketIt.next();  
			System.out.println(gradeBucket.getKey() + "时间段共有 " + gradeBucket.getDocCount() +"个文档。");  
			StringTerms classTerms = (StringTerms) gradeBucket.getAggregations().asMap().get("weiboIds");  
			Iterator<Bucket> classBucketIt = classTerms.getBuckets().iterator();
			int i = 0;
			while(classBucketIt.hasNext()) {
				Bucket classBucket = classBucketIt.next();
				String pattern = "^\\d+$";
				String weiboid = classBucket.getKeyAsString();
				Pattern p = Pattern.compile(pattern);
				Matcher matcher = p.matcher(weiboid);
				if (matcher.matches()) {
					jsons.add(i, weiboid);
					i++;
				}
			}
			result.put(timeinterval, jsons);
			result.put("flag", "1");
			result.put("status", "返回列表成功");
			flag = true;
		}
		return flag;
	}

获取sum聚合的数据api：
如：

{
  "took": 77,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 597200,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "per_count": {
      "doc_count_error_upper_bound": 886,
      "sum_other_doc_count": 112423,
      "buckets": [
        {
          "key": "本人",
          "doc_count": 233146,
          "sum_view": {
            "value": 105999
          }
        },
        {
          "key": "好文",
          "doc_count": 11008,
          "sum_view": {
            "value": 2860
          }
        }
      ]
    }
  }
}

api：

		while(gradeBucketIt.hasNext()) {
			JSONObject json = new JSONObject();
			Bucket gradeBucket = gradeBucketIt.next();
			String author = (String) gradeBucket.getKey();
			long DocCount = gradeBucket.getDocCount();
			InternalSum view = (InternalSum) gradeBucket.getAggregations().asList().get(0);
			int viewNum = (int) view.getValue();
			json.put("author", author);
			json.put("docCount", DocCount);
			json.put("viewNum", viewNum);
			json.put("transfer", 0);
			jsons.add(jsons.toString());
		}

同时对两个字段做sum聚合：
查询语句：

    "aggs" : {
        "per_count" : {
           "terms" : {
              "size" : 2,
              "field" : "author"
           },
           "aggs" : {
                "sum_view" : {
                    "sum" : {
                       "field" : "view"
                    }
                },
                "sum_transfer" : {
                    "sum" : {
                       "field" : "transfer"
                    }
                }
            }
        }
    }

运行结果：

{
  "took": 91,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 599127,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "per_count": {
      "doc_count_error_upper_bound": 1015,
      "sum_other_doc_count": 162154,
      "buckets": [
        {
          "key": "本人",
          "doc_count": 233890,
          "sum_transfer": {
            "value": 0
          },
          "sum_view": {
            "value": 106418
          }
        },
        {
          "key": "腾讯",
          "doc_count": 79699,
          "sum_transfer": {
            "value": 0
          },
          "sum_view": {
            "value": 1179
          }
        }
      ]
    }
  }
}

查询api：

AggregationBuilder ggregationBuilder = AggregationBuilders.terms("per_count").field("author").size(5)
		.subAggregation(AggregationBuilders.sum("sum_view").field("view"))
		.subAggregation(AggregationBuilders.sum("sum_transfer").field("transfer"));

获取数据api：

		while(gradeBucketIt.hasNext()) {
			JSONObject json = new JSONObject();
			Bucket gradeBucket = gradeBucketIt.next();
			String author = (String) gradeBucket.getKey();
			long DocCount = gradeBucket.getDocCount();
			InternalSum view = (InternalSum) gradeBucket.getAggregations().asMap().get("sum_view");
			InternalSum transfer = (InternalSum) gradeBucket.getAggregations().asMap().get("sum_transfer");
			int viewNum = (int) view.getValue();
			int transferNum = (int) transfer.getValue();
			json.put("author", author);
			json.put("docCount", DocCount);
			json.put("viewNum", viewNum);
			json.put("transfer", transferNum);
			jsons.add(jsons.toString());
		}

es内置的分词器：

standard analyzer
simple analyzer
whitespace analyzer
language analyzer(特定的语言的分词器)

例句：Set the shape to semi-transparent by calling set_trans(5)
不同分词器的分词结果：

standard analyzer：set, the, shape, to, semi, transparent, by, calling, set_trans, 5（默认的是standard）
simple analyzer：set, the, shape, to, semi, transparent, by, calling, set, trans
whitespace analyzer：Set, the, shape, to, semi-transparent, by, calling, set_trans(5)
language analyzer（特定的语言的分词器，比如说，english，英语分词器）：set, shape, semi, transpar, call, set_tran, 5

分词器测试：

GET /_analyze
{
  "analyzer": "standard",
  "text":"I love you"
}

结果：

{
  "tokens": [
    {
      "token": "i",
      "start_offset": 0,
      "end_offset": 1,
      "type": "<ALPHANUM>",
      "position": 0
    },
    {
      "token": "love",
      "start_offset": 2,
      "end_offset": 6,
      "type": "<ALPHANUM>",
      "position": 1
    },
    {
      "token": "you",
      "start_offset": 7,
      "end_offset": 10,
      "type": "<ALPHANUM>",
      "position": 2
    }
  ]
}

修改mapping：

1.删除索引：

DELETE hui

@Test
public void deleteIndex(){
	IndicesExistsRequest inExistsRequest = new IndicesExistsRequest("indexName");
	IndicesExistsResponse inExistsResponse = client.admin().indices().exists(inExistsRequest).actionGet();
    boolean exists = inExistsResponse.isExists(); // 先判断索引存不存在
    if (exists) {
    	client.admin().indices().prepareDelete("indexName").execute().actionGet();
    }
}

2.创建索引：

PUT hui

3.创建mapping：

POST hui/my_type/_mapping
{
    "my_type": {
        "properties": {
            "title": {
                "type": "keyword",
                "store": true 
            },
            "date": {
                "type": "date",
                "format": "yyyy-MM-dd"
            },
            "content": {
                "type": "text"
            }
        }
    }
}

注：第2和3步可合并为：

PUT hui
{
  "mappings": {
    "my_type": {
      "properties": {
        "title": {
          "type": "keyword",
          "store": true 
        },
        "date": {
          "type": "date",
          "format": "yyyy-MM-dd"
        },
        "content": {
          "type": "text"
        }
      }
    }
  }
}

参考：https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-put-mapping.html

4.mapping增加字段
（Elasticsearch的mapping一旦创建，只能增加字段，而不能修改已有字段的类型）

POST hui/my_type/_mapping
{
    "my_type": {
        "properties": {
            "hui":{
                "type": "text",
                "store": true
            }
        }
    }
}

5.修改mapping字段：

POST hui/my_type/_mapping
{
    "my_type": {
        "properties": {
            "hui":{
                "type": "integer"
            }
        }
    }
}

报错：

{
  "error": {
    "root_cause": [
      {
        "type": "illegal_argument_exception",
        "reason": "mapper [hui] of different type, current_type [text], merged_type [integer]"
      }
    ],
    "type": "illegal_argument_exception",
    "reason": "mapper [hui] of different type, current_type [text], merged_type [integer]"
  },
  "status": 400
}

原因：
如果一个字段的类型修改以后，那么该字段的所有数据都需要重新索引。Elasticsearch底层使用的是lucene库，字段类型修改以后索引和搜索要涉及分词方式等操作，不允许修改类型在是符合lucene机制的

字段的某些属性不可以改变，而有的可以改变。如store属性就不可以改变：

POST hui/my_type/_mapping
{
    "my_type": {
        "properties": {
            "hui":{
                "type": "text"
            }
        }
    }
}

报错：

{
  "error": {
    "root_cause": [
      {
        "type": "remote_transport_exception",
        "reason": "[0B7eiG0][192.168.0.1:9300][indices:admin/mapping/put]"
      }
    ],
    "type": "illegal_argument_exception",
    "reason": "Mapper for [hui] conflicts with existing mapping in other types:\n[mapper [hui] has different [store] values]"
  },
  "status": 400
}

预加载 fielddata就可以增加或者删除：

POST hui/my_type/_mapping
{
    "my_type": {
        "properties": {
            "hui":{
                "type": "text",
                "store": true,
                "fielddata": true
            }
        }
    }
}

重构索引：

1.重建索引hui插入数据并设置别名：

PUT hui
POST hui/News/_mapping
{
    "News": {
        "properties": {
            "hui":{
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            }
        }
    }
}
POST hui/News/1
{
   "hui" : "hehe"
}
POST hui/_alias/xiao

2.创建索引qiang并插入数据：

PUT qiang
POST qiang/News/_mapping
{
    "News": {
        "properties": {
            "hui":{
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                },
                "store": true
            }
        }
    }
}

3.开始执行重构索引命令：

POST _reindex
{
  "source": {
    "index": "hui"
  },
  "dest": {
    "index": "qiang",
    "version_type": "internal"
  }
}

注：数据量大的时候会如下显示连接超时，但是却不影响功能实现。我重新导入了十九万的数据大约用了十多分钟

{
  "statusCode": 504,
  "error": "Gateway Timeout",
  "message": "Client request timeout"
}

4.使用Task API查询进度：

GET _tasks?detailed=true&actions=*reindex
{
  "nodes": {
    "yFpET0TETpuWGCxxyodXmg": {
      "name": "yFpET0T",
      "transport_address": "192.168.0.100:9300",
      "host": "192.168.0.100",
      "ip": "192.168.0.100:9300",
      "roles": [
        "master",
        "data",
        "ingest"
      ],
      "attributes": {
        "ml.max_open_jobs": "10",
        "ml.enabled": "true"
      },
      "tasks": {
        "yFpET0TETpuWGCxxyodXmg:6319552": {
          "node": "yFpET0TETpuWGCxxyodXmg",
          "id": 6319552,
          "type": "transport",
          "action": "indices:data/write/reindex",
          "status": {
            "total": 194111,
            "updated": 0,
            "created": 50000,
            "deleted": 0,
            "batches": 51,
            "version_conflicts": 0,
            "noops": 0,
            "retries": {
              "bulk": 0,
              "search": 0
            },
            "throttled_millis": 0,
            "requests_per_second": -1,
            "throttled_until_millis": 0
          },
          "description": "reindex from [mei_toutiao] to [mei_toutiao_v2]",
          "start_time_in_millis": 1532338516013,
          "running_time_in_nanos": 176981696219,
          "cancellable": true
        }
      }
    }
  }
}

5.如果复制完成则显示：

{
  "nodes": {}
}

6.别名转换：

POST /_aliases
{
    "actions": [
        { "remove": {
            "alias": "xiao",
            "index": "hui"
        }},
        { "add": {
            "alias": "xiao",
            "index": "qiang"
        }}
    ]
}