奇葩需求:
1.
按照论坛名称,对论坛评论总量,1-5月按月聚合 相对应字段为cmtCnt
按照论坛名称,对论坛发帖点赞量,1-5月按月聚合 相对应字段为adtCnt
GET xiao-2018-4-1,xiao-2018-6-12,xiao-2018-3-1/Socials/_search
{
"size" : 0,
"query" : {
"constant_score" : {
"filter" : {
"bool" : {
"must" : [
{
"term" : {
"sourceType" : "FORUM"
}
},
{
"range": {
"timeDay": {
"gte": "2018-01-01",
"lte": "2018-05-31"
}
}
}
]
}
}
}
},
"aggs" : {
"all_interests" : {
"terms" : {
"size" : 100000,
"field" : "website.keyword"
},
"aggs": {
"month_num": {
"date_histogram": {
"field": "timeDay",
"interval": "month",
"format": "yyyy-MM"
},
"aggs": {
"single_sum": {
"sum" : { "field" : "cmtCnt" }
}
}
}
}
}
}
}
结果:
{
"took": 9141,
"timed_out": false,
"_shards": {
"total": 350,
"successful": 350,
"failed": 0
},
"hits": {
"total": 735705,
"max_score": 0,
"hits": []
},
"aggregations": {
"all_interests": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "论坛",
"doc_count": 661238,
"month_num": {
"buckets": [
{
"key_as_string": "2018-01",
"key": 1514764800000,
"doc_count": 3,
"single_sum": {
"value": 0
}
},
{
"key_as_string": "2018-02",
"key": 1517443200000,
"doc_count": 0,
"single_sum": {
"value": 0
}
},
{
"key_as_string": "2018-03",
"key": 1519862400000,
"doc_count": 1403,
"single_sum": {
"value": 0
}
},
{
"key_as_string": "2018-04",
"key": 1522540800000,
"doc_count": 125895,
"single_sum": {
"value": 0
}
},
{
"key_as_string": "2018-05",
"key": 1525132800000,
"doc_count": 533937,
"single_sum": {
"value": 0
}
}
]
}
},
{
"key": "百度贴吧",
"doc_count": 21275,
"month_num": {
"buckets": [
{
"key_as_string": "2018-02",
"key": 1517443200000,
"doc_count": 290,
"single_sum": {
"value": 406
}
},
{
"key_as_string": "2018-03",
"key": 1519862400000,
"doc_count": 20472,
"single_sum": {
"value": 19174
}
},
{
"key_as_string": "2018-04",
"key": 1522540800000,
"doc_count": 483,
"single_sum": {
"value": 1740
}
},
{
"key_as_string": "2018-05",
"key": 1525132800000,
"doc_count": 30,
"single_sum": {
"value": 45
}
}
]
}
},
{
"key": "股吧",
"doc_count": 6395,
"month_num": {
"buckets": [
{
"key_as_string": "2018-02",
"key": 1517443200000,
"doc_count": 10,
"single_sum": {
"value": 26
}
},
{
"key_as_string": "2018-03",
"key": 1519862400000,
"doc_count": 6383,
"single_sum": {
"value": 24965
}
},
{
"key_as_string": "2018-04",
"key": 1522540800000,
"doc_count": 1,
"single_sum": {
"value": 0
}
},
{
"key_as_string": "2018-05",
"key": 1525132800000,
"doc_count": 1,
"single_sum": {
"value": 0
}
}
]
}
},
{
"key": "好大夫在线",
"doc_count": 2933,
"month_num": {
"buckets": [
{
"key_as_string": "2018-02",
"key": 1517443200000,
"doc_count": 5,
"single_sum": {
"value": 0
}
},
{
"key_as_string": "2018-03",
"key": 1519862400000,
"doc_count": 2927,
"single_sum": {
"value": 0
}
},
{
"key_as_string": "2018-04",
"key": 1522540800000,
"doc_count": 1,
"single_sum": {
"value": 0
}
}
]
}
}
。。。。。太多省略
2.
按照论坛名称,对论坛正面情感总量(非负数),1-5月按月聚合 相对应字段为sentimentOrient
GET xiao-2018-4-1,xiao-2018-6-12,xiao-2018-3-1/Socials/_search
{
"size" : 0,
"query" : {
"constant_score" : {
"filter" : {
"bool" : {
"must" : [
{
"term" : {
"sourceType" : "FORUM"
}
},
{
"range": {
"timeDay": {
"gte": "2018-01-01",
"lte": "2018-05-31"
}
}
}
],
"must_not" : [
{ "term" : { "sentimentOrient" : -1} },
{ "term" : { "sentimentOrient" : 0 } }
]
}
}
}
},
"aggs" : {
"all_interests" : {
"terms" : {
"size" : 100000,
"field" : "website.keyword"
},
"aggs": {
"month_num": {
"date_histogram": {
"field": "timeDay",
"interval": "month",
"format": "yyyy-MM"
}
}
}
}
}
}
疑问:本来一开始想按官网 https://elasticsearch.cn/book/elasticsearch_definitive_guide_2.x/_extended_example.html 上用 extended_bounds
来限制时间范围死活不好使我也是奇了怪了,最后只能转变思路在查询的时候做手脚了。
3.
需要监测的关键词:零跑,零跑汽车,零跑S01
需要过滤关键词:零跑腿,专家门诊
社交和新闻根据url字段去重后6月2号到7月2号的总数值
GET xiao-2018-6-12,xiao-2018-6-19,xiao-2018-6-26,xiao-2018-6-5/Socials/_search
{
"size" : 0,
"query" : {
"constant_score" : {
"filter" : {
"bool" : {
"must" : [
{
"range": {
"timeDay": {
"gte": "2018-06-02",
"lte": "2018-07-02"
}
}
},
{
"query_string":{
"default_field":"textZh",
"query":"零跑 OR 零跑汽车 OR 零跑S01 NOT 零跑腿 NOT 专家门诊"
或者
"query" : "( ( \"\"零跑\"\" ) OR ( \"\"零跑汽车\"\" ) OR ( \"\"零跑S01\"\" ) NOT ( \"\"零跑腿\"\" ) NOT ( \"\"专家门诊\"\" ) )"
}
}
]
}
}
}
},
"aggs" : {
"distinct_colors" : {
"cardinality" : {
"field" : "url"
}
}
}
}
注:上面这个查询结果不对,还是有问题
原因:这个索引mapping里textZh字段的设置如下
"textZh": {
"type": "text",
"store": true,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ik_smart"
}
导致你输入“零跑”会被分词为“零”和“跑”,所以搜索的结果并不是你想要的
解决:
社交(_type是“Socials”的为社交,_type是“News”的为新闻):
GET xiao-2018-6-12,xiao-2018-6-19,xiao-2018-6-26,xiao-2018-6-5/Socials/_search
{
"size" : 0,
"query" : {
"constant_score" : {
"filter" : {
"bool": {
"must": {
"range": {
"timeStr": {
"gte": "2018-06-02 00:00:00",
"lte": "2018-07-03 00:00:00"
}
}
},
"should": [
{
"match_phrase": {
"textZh" : {
"query" : "零跑"
}
}
},
{
"match_phrase": {
"textZh" : {
"query" : "零跑汽车"
}
}
},
{
"match_phrase": {
"textZh" : {
"query" : "零跑S01"
}
}
}
],
"must_not": {
"bool": {
"should": [
{
"match_phrase": {
"textZh" : "零跑腿"
}
},
{
"match_phrase": {
"textZh" : {
"query" : "专家门诊"
}
}
}
]
}
}
}
}
}
},
"aggs" : {
"distinct_colors" : {
"cardinality" : {
"field" : "url"
}
}
}
}
4.25号到28号指定时间段的微博号总和
GET xiaoqiang-2018-10-29/Socials/_search
{
"size": 0,
"query" : {
"constant_score" : {
"filter" : {
"bool" : {
"must" : [
{
"range": {
"timeHour": {
"gte": "2018-10-25 21",
"lte": "2018-10-28 21",
"format": "yyyy-MM-dd HH"
}
}
},
{
"term" : {
"sourceType": "weibo"
}
}
]
}
}
}
},
"aggs" : {
"all_interests" : {
"terms" : {
"script" : "String he=new SimpleDateFormat('HH').format(new Date(doc['timeHour'].value)); if(he.equals('01')){return he;}else{return null;}"
},
"aggs" : {
"per_count" : {
"terms" : {
"script" : "doc['url'].value.substring(17,27)"
}
}
}
}
}
}
结果:
{
"took": 4182,
"timed_out": false,
"_shards": {
"total": 10,
"successful": 10,
"failed": 0
},
"hits": {
"total": 2997411,
"max_score": 0,
"hits": []
},
"aggregations": {
"all_interests": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "01",
"doc_count": 154272,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 154116,
"buckets": [
{
"key": "5670137181",
"doc_count": 20
},
{
"key": "5408336201",
"doc_count": 16
},
{
"key": "1084088733",
"doc_count": 15
},
{
"key": "1283869875",
"doc_count": 15
},
{
"key": "1322676944",
"doc_count": 15
},
{
"key": "1668841355",
"doc_count": 15
},
{
"key": "1681901143",
"doc_count": 15
},
{
"key": "1704588860",
"doc_count": 15
},
{
"key": "1819301715",
"doc_count": 15
},
{
"key": "1823510107",
"doc_count": 15
}
]
}
}
]
}
}
}
5.近n天每天各个时段的微博号
GET xiaoqiang-2018-10-29/Socials/_search
{
"size": 0,
"query" : {
"constant_score" : {
"filter" : {
"bool" : {
"must" : [
{
"range": {
"timeHour": {
"gte": "2018-10-26 21",
"lte": "2018-10-28 21",
"format": "yyyy-MM-dd HH"
}
}
},
{
"term" : {
"sourceType": "weibo"
}
}
]
}
}
}
},
"aggs" : {
"all_interests" : {
"terms" : {
"script" : "new SimpleDateFormat('HH').format(new Date(doc['timeHour'].value))",
"order" : { "_term" : "desc" }
},
"aggs": {
"month_num": {
"date_histogram": {
"field": "timeDay",
"interval": "day",
"format": "yyyy-MM-dd"
},
"aggs" : {
"per_count" : {
"terms" : {
"size" : 1,
"script" : "doc['url'].value.substring(17,27)"
}
}
}
}
}
}
}
}
结果:
{
"took": 15584,
"timed_out": false,
"_shards": {
"total": 10,
"successful": 10,
"failed": 0
},
"hits": {
"total": 2143757,
"max_score": 0,
"hits": []
},
"aggregations": {
"all_interests": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1356451,
"buckets": [
{
"key": "23",
"doc_count": 93821,
"month_num": {
"buckets": [
{
"key_as_string": "2018-10-27",
"key": 1540598400000,
"doc_count": 37782,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 37767,
"buckets": [
{
"key": "1057449614",
"doc_count": 15
}
]
}
},
{
"key_as_string": "2018-10-28",
"key": 1540684800000,
"doc_count": 56039,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 56024,
"buckets": [
{
"key": "1218291087",
"doc_count": 15
}
]
}
}
]
}
},
{
"key": "22",
"doc_count": 92328,
"month_num": {
"buckets": [
{
"key_as_string": "2018-10-27",
"key": 1540598400000,
"doc_count": 37043,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 37028,
"buckets": [
{
"key": "1306672440",
"doc_count": 15
}
]
}
},
{
"key_as_string": "2018-10-28",
"key": 1540684800000,
"doc_count": 55285,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 55270,
"buckets": [
{
"key": "1221062225",
"doc_count": 15
}
]
}
}
]
}
},
{
"key": "21",
"doc_count": 92859,
"month_num": {
"buckets": [
{
"key_as_string": "2018-10-27",
"key": 1540598400000,
"doc_count": 36797,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 36782,
"buckets": [
{
"key": "3799113457",
"doc_count": 15
}
]
}
},
{
"key_as_string": "2018-10-28",
"key": 1540684800000,
"doc_count": 56062,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 56047,
"buckets": [
{
"key": "1360660712",
"doc_count": 15
}
]
}
}
]
}
},
{
"key": "20",
"doc_count": 95391,
"month_num": {
"buckets": [
{
"key_as_string": "2018-10-27",
"key": 1540598400000,
"doc_count": 38552,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 38537,
"buckets": [
{
"key": "2722601793",
"doc_count": 15
}
]
}
},
{
"key_as_string": "2018-10-28",
"key": 1540684800000,
"doc_count": 56839,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 56824,
"buckets": [
{
"key": "1799231193",
"doc_count": 15
}
]
}
}
]
}
},
{
"key": "19",
"doc_count": 94108,
"month_num": {
"buckets": [
{
"key_as_string": "2018-10-27",
"key": 1540598400000,
"doc_count": 37687,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 37672,
"buckets": [
{
"key": "1750745673",
"doc_count": 15
}
]
}
},
{
"key_as_string": "2018-10-28",
"key": 1540684800000,
"doc_count": 56421,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 56406,
"buckets": [
{
"key": "1814872401",
"doc_count": 15
}
]
}
}
]
}
},
{
"key": "18",
"doc_count": 95298,
"month_num": {
"buckets": [
{
"key_as_string": "2018-10-27",
"key": 1540598400000,
"doc_count": 37882,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 37867,
"buckets": [
{
"key": "3933789681",
"doc_count": 15
}
]
}
},
{
"key_as_string": "2018-10-28",
"key": 1540684800000,
"doc_count": 57416,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 57401,
"buckets": [
{
"key": "1508661252",
"doc_count": 15
}
]
}
}
]
}
},
{
"key": "17",
"doc_count": 87719,
"month_num": {
"buckets": [
{
"key_as_string": "2018-10-27",
"key": 1540598400000,
"doc_count": 34590,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 34575,
"buckets": [
{
"key": "2038738841",
"doc_count": 15
}
]
}
},
{
"key_as_string": "2018-10-28",
"key": 1540684800000,
"doc_count": 53129,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 53114,
"buckets": [
{
"key": "2237799475",
"doc_count": 15
}
]
}
}
]
}
},
{
"key": "16",
"doc_count": 64030,
"month_num": {
"buckets": [
{
"key_as_string": "2018-10-27",
"key": 1540598400000,
"doc_count": 27736,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 27721,
"buckets": [
{
"key": "1911197885",
"doc_count": 15
}
]
}
},
{
"key_as_string": "2018-10-28",
"key": 1540684800000,
"doc_count": 36294,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 36279,
"buckets": [
{
"key": "1984810814",
"doc_count": 15
}
]
}
}
]
}
},
{
"key": "15",
"doc_count": 44697,
"month_num": {
"buckets": [
{
"key_as_string": "2018-10-27",
"key": 1540598400000,
"doc_count": 19721,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 19706,
"buckets": [
{
"key": "1564695515",
"doc_count": 15
}
]
}
},
{
"key_as_string": "2018-10-28",
"key": 1540684800000,
"doc_count": 24976,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 24961,
"buckets": [
{
"key": "1772563447",
"doc_count": 15
}
]
}
}
]
}
},
{
"key": "14",
"doc_count": 27055,
"month_num": {
"buckets": [
{
"key_as_string": "2018-10-27",
"key": 1540598400000,
"doc_count": 11754,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 11740,
"buckets": [
{
"key": "5054956136",
"doc_count": 14
}
]
}
},
{
"key_as_string": "2018-10-28",
"key": 1540684800000,
"doc_count": 15301,
"per_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 15286,
"buckets": [
{
"key": "1690639232",
"doc_count": 15
}
]
}
}
]
}
}
]
}
}
}
获取多层聚合的数据api:
private boolean handleresaggWeibo(SearchResponse response,JSONObject result,JSONObject message) {
boolean flag = true;
JSONObject json = null;
List<String> jsons = null;
String timeinterval = message.getString("timeinterval");
//获取聚合参数
Map<String, Aggregation> aggMap = response.getAggregations().asMap();
StringTerms gradeTerms = (StringTerms) aggMap.get("timeinterval");
Iterator<Bucket> gradeBucketIt = gradeTerms.getBuckets().iterator();
while(gradeBucketIt.hasNext()) {
jsons = new ArrayList<>();
Bucket gradeBucket = gradeBucketIt.next();
System.out.println(gradeBucket.getKey() + "时间段共有 " + gradeBucket.getDocCount() +"个文档。");
StringTerms classTerms = (StringTerms) gradeBucket.getAggregations().asMap().get("weiboIds");
Iterator<Bucket> classBucketIt = classTerms.getBuckets().iterator();
int i = 0;
while(classBucketIt.hasNext()) {
Bucket classBucket = classBucketIt.next();
String pattern = "^\\d+$";
String weiboid = classBucket.getKeyAsString();
Pattern p = Pattern.compile(pattern);
Matcher matcher = p.matcher(weiboid);
if (matcher.matches()) {
jsons.add(i, weiboid);
i++;
}
}
result.put(timeinterval, jsons);
result.put("flag", "1");
result.put("status", "返回列表成功");
flag = true;
}
return flag;
}
获取sum聚合的数据api:
如:
{
"took": 77,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 597200,
"max_score": 0,
"hits": []
},
"aggregations": {
"per_count": {
"doc_count_error_upper_bound": 886,
"sum_other_doc_count": 112423,
"buckets": [
{
"key": "本人",
"doc_count": 233146,
"sum_view": {
"value": 105999
}
},
{
"key": "好文",
"doc_count": 11008,
"sum_view": {
"value": 2860
}
}
]
}
}
}
api:
while(gradeBucketIt.hasNext()) {
JSONObject json = new JSONObject();
Bucket gradeBucket = gradeBucketIt.next();
String author = (String) gradeBucket.getKey();
long DocCount = gradeBucket.getDocCount();
InternalSum view = (InternalSum) gradeBucket.getAggregations().asList().get(0);
int viewNum = (int) view.getValue();
json.put("author", author);
json.put("docCount", DocCount);
json.put("viewNum", viewNum);
json.put("transfer", 0);
jsons.add(jsons.toString());
}
同时对两个字段做sum聚合:
查询语句:
"aggs" : {
"per_count" : {
"terms" : {
"size" : 2,
"field" : "author"
},
"aggs" : {
"sum_view" : {
"sum" : {
"field" : "view"
}
},
"sum_transfer" : {
"sum" : {
"field" : "transfer"
}
}
}
}
}
运行结果:
{
"took": 91,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 599127,
"max_score": 0,
"hits": []
},
"aggregations": {
"per_count": {
"doc_count_error_upper_bound": 1015,
"sum_other_doc_count": 162154,
"buckets": [
{
"key": "本人",
"doc_count": 233890,
"sum_transfer": {
"value": 0
},
"sum_view": {
"value": 106418
}
},
{
"key": "腾讯",
"doc_count": 79699,
"sum_transfer": {
"value": 0
},
"sum_view": {
"value": 1179
}
}
]
}
}
}
查询api:
AggregationBuilder ggregationBuilder = AggregationBuilders.terms("per_count").field("author").size(5)
.subAggregation(AggregationBuilders.sum("sum_view").field("view"))
.subAggregation(AggregationBuilders.sum("sum_transfer").field("transfer"));
获取数据api:
while(gradeBucketIt.hasNext()) {
JSONObject json = new JSONObject();
Bucket gradeBucket = gradeBucketIt.next();
String author = (String) gradeBucket.getKey();
long DocCount = gradeBucket.getDocCount();
InternalSum view = (InternalSum) gradeBucket.getAggregations().asMap().get("sum_view");
InternalSum transfer = (InternalSum) gradeBucket.getAggregations().asMap().get("sum_transfer");
int viewNum = (int) view.getValue();
int transferNum = (int) transfer.getValue();
json.put("author", author);
json.put("docCount", DocCount);
json.put("viewNum", viewNum);
json.put("transfer", transferNum);
jsons.add(jsons.toString());
}
es内置的分词器:
- standard analyzer
- simple analyzer
- whitespace analyzer
- language analyzer(特定的语言的分词器)
例句:Set the shape to semi-transparent by calling set_trans(5)
不同分词器的分词结果:
- standard analyzer:set, the, shape, to, semi, transparent, by, calling, set_trans, 5(默认的是standard)
- simple analyzer:set, the, shape, to, semi, transparent, by, calling, set, trans
- whitespace analyzer:Set, the, shape, to, semi-transparent, by, calling, set_trans(5)
- language analyzer(特定的语言的分词器,比如说,english,英语分词器):set, shape, semi, transpar, call, set_tran, 5
分词器测试:
GET /_analyze
{
"analyzer": "standard",
"text":"I love you"
}
结果:
{
"tokens": [
{
"token": "i",
"start_offset": 0,
"end_offset": 1,
"type": "<ALPHANUM>",
"position": 0
},
{
"token": "love",
"start_offset": 2,
"end_offset": 6,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "you",
"start_offset": 7,
"end_offset": 10,
"type": "<ALPHANUM>",
"position": 2
}
]
}
修改mapping:
1.删除索引:
DELETE hui
@Test
public void deleteIndex(){
IndicesExistsRequest inExistsRequest = new IndicesExistsRequest("indexName");
IndicesExistsResponse inExistsResponse = client.admin().indices().exists(inExistsRequest).actionGet();
boolean exists = inExistsResponse.isExists(); // 先判断索引存不存在
if (exists) {
client.admin().indices().prepareDelete("indexName").execute().actionGet();
}
}
2.创建索引:
PUT hui
3.创建mapping:
POST hui/my_type/_mapping
{
"my_type": {
"properties": {
"title": {
"type": "keyword",
"store": true
},
"date": {
"type": "date",
"format": "yyyy-MM-dd"
},
"content": {
"type": "text"
}
}
}
}
注:第2和3步可合并为:
PUT hui
{
"mappings": {
"my_type": {
"properties": {
"title": {
"type": "keyword",
"store": true
},
"date": {
"type": "date",
"format": "yyyy-MM-dd"
},
"content": {
"type": "text"
}
}
}
}
}
参考:https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-put-mapping.html
4.mapping增加字段
(Elasticsearch的mapping一旦创建,只能增加字段,而不能修改已有字段的类型)
POST hui/my_type/_mapping
{
"my_type": {
"properties": {
"hui":{
"type": "text",
"store": true
}
}
}
}
5.修改mapping字段:
POST hui/my_type/_mapping
{
"my_type": {
"properties": {
"hui":{
"type": "integer"
}
}
}
}
报错:
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "mapper [hui] of different type, current_type [text], merged_type [integer]"
}
],
"type": "illegal_argument_exception",
"reason": "mapper [hui] of different type, current_type [text], merged_type [integer]"
},
"status": 400
}
原因:
如果一个字段的类型修改以后,那么该字段的所有数据都需要重新索引。Elasticsearch底层使用的是lucene库,字段类型修改以后索引和搜索要涉及分词方式等操作,不允许修改类型在是符合lucene机制的
字段的某些属性不可以改变,而有的可以改变。如store属性就不可以改变:
POST hui/my_type/_mapping
{
"my_type": {
"properties": {
"hui":{
"type": "text"
}
}
}
}
报错:
{
"error": {
"root_cause": [
{
"type": "remote_transport_exception",
"reason": "[0B7eiG0][192.168.0.1:9300][indices:admin/mapping/put]"
}
],
"type": "illegal_argument_exception",
"reason": "Mapper for [hui] conflicts with existing mapping in other types:\n[mapper [hui] has different [store] values]"
},
"status": 400
}
预加载 fielddata就可以增加或者删除:
POST hui/my_type/_mapping
{
"my_type": {
"properties": {
"hui":{
"type": "text",
"store": true,
"fielddata": true
}
}
}
}
重构索引:
1.重建索引hui插入数据并设置别名:
PUT hui
POST hui/News/_mapping
{
"News": {
"properties": {
"hui":{
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
POST hui/News/1
{
"hui" : "hehe"
}
POST hui/_alias/xiao
2.创建索引qiang并插入数据:
PUT qiang
POST qiang/News/_mapping
{
"News": {
"properties": {
"hui":{
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"store": true
}
}
}
}
3.开始执行重构索引命令:
POST _reindex
{
"source": {
"index": "hui"
},
"dest": {
"index": "qiang",
"version_type": "internal"
}
}
注:数据量大的时候会如下显示连接超时,但是却不影响功能实现。我重新导入了十九万的数据大约用了十多分钟
{
"statusCode": 504,
"error": "Gateway Timeout",
"message": "Client request timeout"
}
4.使用Task API查询进度:
GET _tasks?detailed=true&actions=*reindex
{
"nodes": {
"yFpET0TETpuWGCxxyodXmg": {
"name": "yFpET0T",
"transport_address": "192.168.0.100:9300",
"host": "192.168.0.100",
"ip": "192.168.0.100:9300",
"roles": [
"master",
"data",
"ingest"
],
"attributes": {
"ml.max_open_jobs": "10",
"ml.enabled": "true"
},
"tasks": {
"yFpET0TETpuWGCxxyodXmg:6319552": {
"node": "yFpET0TETpuWGCxxyodXmg",
"id": 6319552,
"type": "transport",
"action": "indices:data/write/reindex",
"status": {
"total": 194111,
"updated": 0,
"created": 50000,
"deleted": 0,
"batches": 51,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1,
"throttled_until_millis": 0
},
"description": "reindex from [mei_toutiao] to [mei_toutiao_v2]",
"start_time_in_millis": 1532338516013,
"running_time_in_nanos": 176981696219,
"cancellable": true
}
}
}
}
}
5.如果复制完成则显示:
{
"nodes": {}
}
6.别名转换:
POST /_aliases
{
"actions": [
{ "remove": {
"alias": "xiao",
"index": "hui"
}},
{ "add": {
"alias": "xiao",
"index": "qiang"
}}
]
}