Elasticsearch的DSL操作命令大全(二)

奇葩需求:

1.
按照论坛名称,对论坛评论总量,1-5月按月聚合 相对应字段为cmtCnt
按照论坛名称,对论坛发帖点赞量,1-5月按月聚合 相对应字段为adtCnt

GET xiao-2018-4-1,xiao-2018-6-12,xiao-2018-3-1/Socials/_search
{
    "size" : 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool" : {
                    "must" : [
                        {
                            "term" : {
                               "sourceType" : "FORUM"
                            }
                        },
                        {
                            "range": {
                                "timeDay": {
                                    "gte": "2018-01-01",
                                    "lte": "2018-05-31"
                                }
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "all_interests" : {
            "terms" : {
                "size" : 100000,
                "field" : "website.keyword"
            },
            "aggs": {
                "month_num": {
                    "date_histogram": {
                        "field": "timeDay",
                        "interval": "month",
                        "format": "yyyy-MM"
                    },
                    "aggs": {
                        "single_sum": {
                            "sum" : { "field" : "cmtCnt" }
                        }
                    }
                }
            }
        }
    }
}

结果:

{
  "took": 9141,
  "timed_out": false,
  "_shards": {
    "total": 350,
    "successful": 350,
    "failed": 0
  },
  "hits": {
    "total": 735705,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "all_interests": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "论坛",
          "doc_count": 661238,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-01",
                "key": 1514764800000,
                "doc_count": 3,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-02",
                "key": 1517443200000,
                "doc_count": 0,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-03",
                "key": 1519862400000,
                "doc_count": 1403,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-04",
                "key": 1522540800000,
                "doc_count": 125895,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-05",
                "key": 1525132800000,
                "doc_count": 533937,
                "single_sum": {
                  "value": 0
                }
              }
            ]
          }
        },
        {
          "key": "百度贴吧",
          "doc_count": 21275,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-02",
                "key": 1517443200000,
                "doc_count": 290,
                "single_sum": {
                  "value": 406
                }
              },
              {
                "key_as_string": "2018-03",
                "key": 1519862400000,
                "doc_count": 20472,
                "single_sum": {
                  "value": 19174
                }
              },
              {
                "key_as_string": "2018-04",
                "key": 1522540800000,
                "doc_count": 483,
                "single_sum": {
                  "value": 1740
                }
              },
              {
                "key_as_string": "2018-05",
                "key": 1525132800000,
                "doc_count": 30,
                "single_sum": {
                  "value": 45
                }
              }
            ]
          }
        },
        {
          "key": "股吧",
          "doc_count": 6395,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-02",
                "key": 1517443200000,
                "doc_count": 10,
                "single_sum": {
                  "value": 26
                }
              },
              {
                "key_as_string": "2018-03",
                "key": 1519862400000,
                "doc_count": 6383,
                "single_sum": {
                  "value": 24965
                }
              },
              {
                "key_as_string": "2018-04",
                "key": 1522540800000,
                "doc_count": 1,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-05",
                "key": 1525132800000,
                "doc_count": 1,
                "single_sum": {
                  "value": 0
                }
              }
            ]
          }
        },
        {
          "key": "好大夫在线",
          "doc_count": 2933,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-02",
                "key": 1517443200000,
                "doc_count": 5,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-03",
                "key": 1519862400000,
                "doc_count": 2927,
                "single_sum": {
                  "value": 0
                }
              },
              {
                "key_as_string": "2018-04",
                "key": 1522540800000,
                "doc_count": 1,
                "single_sum": {
                  "value": 0
                }
              }
            ]
          }
        }
 。。。。。太多省略

2.
按照论坛名称,对论坛正面情感总量(非负数),1-5月按月聚合 相对应字段为sentimentOrient

GET xiao-2018-4-1,xiao-2018-6-12,xiao-2018-3-1/Socials/_search
{
    "size" : 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool" : {
                    "must" : [
                        {
                            "term" : {
                               "sourceType" : "FORUM"
                            }
                        },
                        {
                            "range": {
                                "timeDay": {
                                    "gte": "2018-01-01",
                                    "lte": "2018-05-31"
                                }
                            }
                        }
                    ],
                    "must_not" : [
                        { "term" : { "sentimentOrient" : -1} },
                        { "term" : { "sentimentOrient" : 0 } }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "all_interests" : {
            "terms" : {
                "size" : 100000,
                "field" : "website.keyword"
            },
            "aggs": {
                "month_num": {
                    "date_histogram": {
                        "field": "timeDay",
                        "interval": "month",
                        "format": "yyyy-MM"
                    }
                }
            }
        }
    }
}

疑问:本来一开始想按官网 https://elasticsearch.cn/book/elasticsearch_definitive_guide_2.x/_extended_example.html 上用 extended_bounds 来限制时间范围死活不好使我也是奇了怪了,最后只能转变思路在查询的时候做手脚了。

3.
需要监测的关键词:零跑,零跑汽车,零跑S01
需要过滤关键词:零跑腿,专家门诊
社交和新闻根据url字段去重后6月2号到7月2号的总数值

GET xiao-2018-6-12,xiao-2018-6-19,xiao-2018-6-26,xiao-2018-6-5/Socials/_search
{
    "size" : 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool" : {
                    "must" : [
                        {
                            "range": {
                                "timeDay": {
                                    "gte": "2018-06-02",
                                    "lte": "2018-07-02"
                                }
                            }
                        },
                        {
                            "query_string":{
                                "default_field":"textZh",
                                "query":"零跑 OR 零跑汽车 OR 零跑S01 NOT 零跑腿 NOT 专家门诊"
                                或者
                                "query" : "( ( \"\"零跑\"\" ) OR ( \"\"零跑汽车\"\" ) OR ( \"\"零跑S01\"\" ) NOT ( \"\"零跑腿\"\" ) NOT ( \"\"专家门诊\"\" ) )"
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "distinct_colors" : {
            "cardinality" : {
                "field" : "url"
            }
        }
    }
}

注:上面这个查询结果不对,还是有问题
原因:这个索引mapping里textZh字段的设置如下

          "textZh": {
            "type": "text",
            "store": true,
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            },
            "analyzer": "ik_smart"
          }

导致你输入“零跑”会被分词为“零”和“跑”,所以搜索的结果并不是你想要的
解决:
社交(_type是“Socials”的为社交,_type是“News”的为新闻):

GET xiao-2018-6-12,xiao-2018-6-19,xiao-2018-6-26,xiao-2018-6-5/Socials/_search
{
    "size" : 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool": {
                    "must": {
                        "range": {
                            "timeStr": {
                                "gte": "2018-06-02 00:00:00",
                                "lte": "2018-07-03 00:00:00"
                            }
                        }
                    },
                    "should": [
                        {
                            "match_phrase": {
                                "textZh" : {
                                    "query" : "零跑"
                                }
                            }
                        },
                        {
                            "match_phrase": {
                                "textZh" : {
                                    "query" : "零跑汽车"
                                }
                            }
                        },
                        {
                            "match_phrase": {
                                "textZh" : {
                                    "query" : "零跑S01"
                                }
                            }
                        }
                    ],
                    "must_not": {
                        "bool": {
                            "should": [
                                {
                                    "match_phrase": {
                                        "textZh" : "零跑腿"
                                    }
                                },
                                {
                                    "match_phrase": {
                                        "textZh" : {
                                            "query" : "专家门诊"
                                        }
                                    }
                                }
                            ]
                        }
                    }
                }
            }
        }
    },
    "aggs" : {
        "distinct_colors" : {
            "cardinality" : {
                "field" : "url"
            }
        }
    }
}

4.25号到28号指定时间段的微博号总和

GET xiaoqiang-2018-10-29/Socials/_search
{
    "size": 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool" : {
                    "must" : [
                        {
                            "range": {
                                "timeHour": {
                                    "gte": "2018-10-25 21",
                                    "lte": "2018-10-28 21",
                                    "format": "yyyy-MM-dd HH"
                                }
                            }
                        },
                        {
                            "term" : {
                                "sourceType": "weibo"
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "all_interests" : {
           "terms" : {
              "script" : "String he=new SimpleDateFormat('HH').format(new Date(doc['timeHour'].value)); if(he.equals('01')){return he;}else{return null;}"
           },
            "aggs" : {
                "per_count" : {
                    "terms" : {
                       "script" : "doc['url'].value.substring(17,27)"
                    }
                }
            }
        }
    }
}

结果:

{
  "took": 4182,
  "timed_out": false,
  "_shards": {
    "total": 10,
    "successful": 10,
    "failed": 0
  },
  "hits": {
    "total": 2997411,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "all_interests": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "01",
          "doc_count": 154272,
          "per_count": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 154116,
            "buckets": [
              {
                "key": "5670137181",
                "doc_count": 20
              },
              {
                "key": "5408336201",
                "doc_count": 16
              },
              {
                "key": "1084088733",
                "doc_count": 15
              },
              {
                "key": "1283869875",
                "doc_count": 15
              },
              {
                "key": "1322676944",
                "doc_count": 15
              },
              {
                "key": "1668841355",
                "doc_count": 15
              },
              {
                "key": "1681901143",
                "doc_count": 15
              },
              {
                "key": "1704588860",
                "doc_count": 15
              },
              {
                "key": "1819301715",
                "doc_count": 15
              },
              {
                "key": "1823510107",
                "doc_count": 15
              }
            ]
          }
        }
      ]
    }
  }
}

5.近n天每天各个时段的微博号

GET xiaoqiang-2018-10-29/Socials/_search
{
    "size": 0,
    "query" : {
        "constant_score" : {
            "filter" : {
                "bool" : {
                    "must" : [
                        {
                            "range": {
                                "timeHour": {
                                    "gte": "2018-10-26 21",
                                    "lte": "2018-10-28 21",
                                    "format": "yyyy-MM-dd HH"
                                }
                            }
                        },
                        {
                            "term" : {
                                "sourceType": "weibo"
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "all_interests" : {
           "terms" : {
              "script" : "new SimpleDateFormat('HH').format(new Date(doc['timeHour'].value))",
              "order" : { "_term" : "desc" }
           },
           "aggs": {
                "month_num": {
                    "date_histogram": {
                        "field": "timeDay",
                        "interval": "day",
                        "format": "yyyy-MM-dd"
                    },
                    "aggs" : {
                        "per_count" : {
                            "terms" : {
                                "size" : 1,
                                "script" : "doc['url'].value.substring(17,27)"
                            }
                        }
                    }
                }
            }
        }
    }
}

结果:

{
  "took": 15584,
  "timed_out": false,
  "_shards": {
    "total": 10,
    "successful": 10,
    "failed": 0
  },
  "hits": {
    "total": 2143757,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "all_interests": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 1356451,
      "buckets": [
        {
          "key": "23",
          "doc_count": 93821,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 37782,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 37767,
                  "buckets": [
                    {
                      "key": "1057449614",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 56039,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 56024,
                  "buckets": [
                    {
                      "key": "1218291087",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "22",
          "doc_count": 92328,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 37043,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 37028,
                  "buckets": [
                    {
                      "key": "1306672440",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 55285,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 55270,
                  "buckets": [
                    {
                      "key": "1221062225",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "21",
          "doc_count": 92859,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 36797,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 36782,
                  "buckets": [
                    {
                      "key": "3799113457",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 56062,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 56047,
                  "buckets": [
                    {
                      "key": "1360660712",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "20",
          "doc_count": 95391,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 38552,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 38537,
                  "buckets": [
                    {
                      "key": "2722601793",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 56839,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 56824,
                  "buckets": [
                    {
                      "key": "1799231193",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "19",
          "doc_count": 94108,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 37687,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 37672,
                  "buckets": [
                    {
                      "key": "1750745673",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 56421,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 56406,
                  "buckets": [
                    {
                      "key": "1814872401",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "18",
          "doc_count": 95298,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 37882,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 37867,
                  "buckets": [
                    {
                      "key": "3933789681",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 57416,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 57401,
                  "buckets": [
                    {
                      "key": "1508661252",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "17",
          "doc_count": 87719,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 34590,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 34575,
                  "buckets": [
                    {
                      "key": "2038738841",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 53129,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 53114,
                  "buckets": [
                    {
                      "key": "2237799475",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "16",
          "doc_count": 64030,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 27736,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 27721,
                  "buckets": [
                    {
                      "key": "1911197885",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 36294,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 36279,
                  "buckets": [
                    {
                      "key": "1984810814",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "15",
          "doc_count": 44697,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 19721,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 19706,
                  "buckets": [
                    {
                      "key": "1564695515",
                      "doc_count": 15
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 24976,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 24961,
                  "buckets": [
                    {
                      "key": "1772563447",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        },
        {
          "key": "14",
          "doc_count": 27055,
          "month_num": {
            "buckets": [
              {
                "key_as_string": "2018-10-27",
                "key": 1540598400000,
                "doc_count": 11754,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 11740,
                  "buckets": [
                    {
                      "key": "5054956136",
                      "doc_count": 14
                    }
                  ]
                }
              },
              {
                "key_as_string": "2018-10-28",
                "key": 1540684800000,
                "doc_count": 15301,
                "per_count": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 15286,
                  "buckets": [
                    {
                      "key": "1690639232",
                      "doc_count": 15
                    }
                  ]
                }
              }
            ]
          }
        }
      ]
    }
  }
}

获取多层聚合的数据api:

	private boolean handleresaggWeibo(SearchResponse response,JSONObject result,JSONObject message) {
		boolean flag = true;
		JSONObject json = null;
		List<String> jsons = null;
		String timeinterval = message.getString("timeinterval");
		//获取聚合参数
		Map<String, Aggregation> aggMap = response.getAggregations().asMap();
		StringTerms gradeTerms = (StringTerms) aggMap.get("timeinterval");  
		Iterator<Bucket> gradeBucketIt = gradeTerms.getBuckets().iterator();
		while(gradeBucketIt.hasNext()) {
			jsons = new ArrayList<>();
			Bucket gradeBucket = gradeBucketIt.next();  
			System.out.println(gradeBucket.getKey() + "时间段共有 " + gradeBucket.getDocCount() +"个文档。");  
			StringTerms classTerms = (StringTerms) gradeBucket.getAggregations().asMap().get("weiboIds");  
			Iterator<Bucket> classBucketIt = classTerms.getBuckets().iterator();
			int i = 0;
			while(classBucketIt.hasNext()) {
				Bucket classBucket = classBucketIt.next();
				String pattern = "^\\d+$";
				String weiboid = classBucket.getKeyAsString();
				Pattern p = Pattern.compile(pattern);
				Matcher matcher = p.matcher(weiboid);
				if (matcher.matches()) {
					jsons.add(i, weiboid);
					i++;
				}
			}
			result.put(timeinterval, jsons);
			result.put("flag", "1");
			result.put("status", "返回列表成功");
			flag = true;
		}
		return flag;
	}

获取sum聚合的数据api:
如:

{
  "took": 77,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 597200,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "per_count": {
      "doc_count_error_upper_bound": 886,
      "sum_other_doc_count": 112423,
      "buckets": [
        {
          "key": "本人",
          "doc_count": 233146,
          "sum_view": {
            "value": 105999
          }
        },
        {
          "key": "好文",
          "doc_count": 11008,
          "sum_view": {
            "value": 2860
          }
        }
      ]
    }
  }
}

api:

		while(gradeBucketIt.hasNext()) {
			JSONObject json = new JSONObject();
			Bucket gradeBucket = gradeBucketIt.next();
			String author = (String) gradeBucket.getKey();
			long DocCount = gradeBucket.getDocCount();
			InternalSum view = (InternalSum) gradeBucket.getAggregations().asList().get(0);
			int viewNum = (int) view.getValue();
			json.put("author", author);
			json.put("docCount", DocCount);
			json.put("viewNum", viewNum);
			json.put("transfer", 0);
			jsons.add(jsons.toString());
		}

同时对两个字段做sum聚合:
查询语句:

    "aggs" : {
        "per_count" : {
           "terms" : {
              "size" : 2,
              "field" : "author"
           },
           "aggs" : {
                "sum_view" : {
                    "sum" : {
                       "field" : "view"
                    }
                },
                "sum_transfer" : {
                    "sum" : {
                       "field" : "transfer"
                    }
                }
            }
        }
    }

运行结果:

{
  "took": 91,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 599127,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "per_count": {
      "doc_count_error_upper_bound": 1015,
      "sum_other_doc_count": 162154,
      "buckets": [
        {
          "key": "本人",
          "doc_count": 233890,
          "sum_transfer": {
            "value": 0
          },
          "sum_view": {
            "value": 106418
          }
        },
        {
          "key": "腾讯",
          "doc_count": 79699,
          "sum_transfer": {
            "value": 0
          },
          "sum_view": {
            "value": 1179
          }
        }
      ]
    }
  }
}

查询api:

AggregationBuilder ggregationBuilder = AggregationBuilders.terms("per_count").field("author").size(5)
		.subAggregation(AggregationBuilders.sum("sum_view").field("view"))
		.subAggregation(AggregationBuilders.sum("sum_transfer").field("transfer"));

获取数据api:

		while(gradeBucketIt.hasNext()) {
			JSONObject json = new JSONObject();
			Bucket gradeBucket = gradeBucketIt.next();
			String author = (String) gradeBucket.getKey();
			long DocCount = gradeBucket.getDocCount();
			InternalSum view = (InternalSum) gradeBucket.getAggregations().asMap().get("sum_view");
			InternalSum transfer = (InternalSum) gradeBucket.getAggregations().asMap().get("sum_transfer");
			int viewNum = (int) view.getValue();
			int transferNum = (int) transfer.getValue();
			json.put("author", author);
			json.put("docCount", DocCount);
			json.put("viewNum", viewNum);
			json.put("transfer", transferNum);
			jsons.add(jsons.toString());
		}
es内置的分词器:
  • standard analyzer
  • simple analyzer
  • whitespace analyzer
  • language analyzer(特定的语言的分词器)

例句:Set the shape to semi-transparent by calling set_trans(5)
不同分词器的分词结果:

  • standard analyzer:set, the, shape, to, semi, transparent, by, calling, set_trans, 5(默认的是standard)
  • simple analyzer:set, the, shape, to, semi, transparent, by, calling, set, trans
  • whitespace analyzer:Set, the, shape, to, semi-transparent, by, calling, set_trans(5)
  • language analyzer(特定的语言的分词器,比如说,english,英语分词器):set, shape, semi, transpar, call, set_tran, 5

分词器测试:

GET /_analyze
{
  "analyzer": "standard",
  "text":"I love you"
}

结果:

{
  "tokens": [
    {
      "token": "i",
      "start_offset": 0,
      "end_offset": 1,
      "type": "<ALPHANUM>",
      "position": 0
    },
    {
      "token": "love",
      "start_offset": 2,
      "end_offset": 6,
      "type": "<ALPHANUM>",
      "position": 1
    },
    {
      "token": "you",
      "start_offset": 7,
      "end_offset": 10,
      "type": "<ALPHANUM>",
      "position": 2
    }
  ]
}
修改mapping:

1.删除索引:

DELETE hui
@Test
public void deleteIndex(){
	IndicesExistsRequest inExistsRequest = new IndicesExistsRequest("indexName");
	IndicesExistsResponse inExistsResponse = client.admin().indices().exists(inExistsRequest).actionGet();
    boolean exists = inExistsResponse.isExists(); // 先判断索引存不存在
    if (exists) {
    	client.admin().indices().prepareDelete("indexName").execute().actionGet();
    }
}

2.创建索引:

PUT hui

3.创建mapping:

POST hui/my_type/_mapping
{
    "my_type": {
        "properties": {
            "title": {
                "type": "keyword",
                "store": true 
            },
            "date": {
                "type": "date",
                "format": "yyyy-MM-dd"
            },
            "content": {
                "type": "text"
            }
        }
    }
}

注:第2和3步可合并为:

PUT hui
{
  "mappings": {
    "my_type": {
      "properties": {
        "title": {
          "type": "keyword",
          "store": true 
        },
        "date": {
          "type": "date",
          "format": "yyyy-MM-dd"
        },
        "content": {
          "type": "text"
        }
      }
    }
  }
}

参考:https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-put-mapping.html

4.mapping增加字段
(Elasticsearch的mapping一旦创建,只能增加字段,而不能修改已有字段的类型)

POST hui/my_type/_mapping
{
    "my_type": {
        "properties": {
            "hui":{
                "type": "text",
                "store": true
            }
        }
    }
}

5.修改mapping字段:

POST hui/my_type/_mapping
{
    "my_type": {
        "properties": {
            "hui":{
                "type": "integer"
            }
        }
    }
}

报错:

{
  "error": {
    "root_cause": [
      {
        "type": "illegal_argument_exception",
        "reason": "mapper [hui] of different type, current_type [text], merged_type [integer]"
      }
    ],
    "type": "illegal_argument_exception",
    "reason": "mapper [hui] of different type, current_type [text], merged_type [integer]"
  },
  "status": 400
}

原因:
如果一个字段的类型修改以后,那么该字段的所有数据都需要重新索引。Elasticsearch底层使用的是lucene库,字段类型修改以后索引和搜索要涉及分词方式等操作,不允许修改类型在是符合lucene机制的

字段的某些属性不可以改变,而有的可以改变。如store属性就不可以改变:

POST hui/my_type/_mapping
{
    "my_type": {
        "properties": {
            "hui":{
                "type": "text"
            }
        }
    }
}

报错:

{
  "error": {
    "root_cause": [
      {
        "type": "remote_transport_exception",
        "reason": "[0B7eiG0][192.168.0.1:9300][indices:admin/mapping/put]"
      }
    ],
    "type": "illegal_argument_exception",
    "reason": "Mapper for [hui] conflicts with existing mapping in other types:\n[mapper [hui] has different [store] values]"
  },
  "status": 400
}

预加载 fielddata就可以增加或者删除:

POST hui/my_type/_mapping
{
    "my_type": {
        "properties": {
            "hui":{
                "type": "text",
                "store": true,
                "fielddata": true
            }
        }
    }
}
重构索引:

1.重建索引hui插入数据并设置别名:

PUT hui
POST hui/News/_mapping
{
    "News": {
        "properties": {
            "hui":{
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            }
        }
    }
}
POST hui/News/1
{
   "hui" : "hehe"
}
POST hui/_alias/xiao

2.创建索引qiang并插入数据:

PUT qiang
POST qiang/News/_mapping
{
    "News": {
        "properties": {
            "hui":{
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                },
                "store": true
            }
        }
    }
}

3.开始执行重构索引命令:

POST _reindex
{
  "source": {
    "index": "hui"
  },
  "dest": {
    "index": "qiang",
    "version_type": "internal"
  }
}

注:数据量大的时候会如下显示连接超时,但是却不影响功能实现。我重新导入了十九万的数据大约用了十多分钟

{
  "statusCode": 504,
  "error": "Gateway Timeout",
  "message": "Client request timeout"
}

4.使用Task API查询进度:

GET _tasks?detailed=true&actions=*reindex
{
  "nodes": {
    "yFpET0TETpuWGCxxyodXmg": {
      "name": "yFpET0T",
      "transport_address": "192.168.0.100:9300",
      "host": "192.168.0.100",
      "ip": "192.168.0.100:9300",
      "roles": [
        "master",
        "data",
        "ingest"
      ],
      "attributes": {
        "ml.max_open_jobs": "10",
        "ml.enabled": "true"
      },
      "tasks": {
        "yFpET0TETpuWGCxxyodXmg:6319552": {
          "node": "yFpET0TETpuWGCxxyodXmg",
          "id": 6319552,
          "type": "transport",
          "action": "indices:data/write/reindex",
          "status": {
            "total": 194111,
            "updated": 0,
            "created": 50000,
            "deleted": 0,
            "batches": 51,
            "version_conflicts": 0,
            "noops": 0,
            "retries": {
              "bulk": 0,
              "search": 0
            },
            "throttled_millis": 0,
            "requests_per_second": -1,
            "throttled_until_millis": 0
          },
          "description": "reindex from [mei_toutiao] to [mei_toutiao_v2]",
          "start_time_in_millis": 1532338516013,
          "running_time_in_nanos": 176981696219,
          "cancellable": true
        }
      }
    }
  }
}

5.如果复制完成则显示:

{
  "nodes": {}
}

6.别名转换:

POST /_aliases
{
    "actions": [
        { "remove": {
            "alias": "xiao",
            "index": "hui"
        }},
        { "add": {
            "alias": "xiao",
            "index": "qiang"
        }}
    ]
}
  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小强签名设计

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值