php elasticsearch ik,elasticsearch中将content字段设置为ik分词器后再使用 terms 聚合生成类似热门词汇的功能...

索引的 msg2017-04 中 201038447 的mapping

{

"msg2017-04": {

"mappings": {

"201038447": {

"properties": {

"@timestamp": {

"type": "date"

},

"content": {

"type": "text",

"boost": 8,

"analyzer": "ik_smart",

"include_in_all": true

},

"createTime": {

"type": "date"

}

}

}

}

}

}

索引的 settings

{

"msg2017-04": {

"settings": {

"index": {

"creation_date": "1492398234434",

"number_of_shards": "5",

"number_of_replicas": "1",

"uuid": "yiGoDhL1T3WLexG79e5uQg",

"version": {

"created": "5020299"

},

"provided_name": "msg2017-04"

}

}

}

}

环境:

linux

elasticsearch 5.2.2

已安装ik分词

分词结果

// request

GET /msg2017-04/_search?pretty

{

"size": 1,

"aggs": {

"fenci" : {

"terms" : {

"field" : "content.ik_smart"

}

}

}

}

// response

{

"took": 2,

"timed_out": false,

"_shards": {

"total": 5,

"successful": 5,

"failed": 0

},

"hits": {

"total": 105,

"max_score": 1,

"hits": [

{

"_index": "msg2017-04",

"_type": "7510570179@chatroom",

"_id": "5067959408840553063",

"_score": 1,

"_source": {

"wxid": "wxid_1idf7gf5jgh822",

"msgId": "69",

"msgSvrId": "5067959408840553063",

"type": 0,

"isSend": "1",

"status": "2",

"speakerId": "",

"content": "rhh",

"imei": "867464024215618",

"room": "7510570179@chatroom",

"roomName": "和湖光山色hzhzh",

"roomOwner": "mikezhangsky",

"roomMembers": "mikezhangsky;wxid_1idf7gf5jgh822;wxid_j56srpxywn5n22;wxid_90uy0wlz229e22;sun461629376",

"roomSize": "5",

"createTime": "2017-04-07T03:08:37",

"@timestamp": "2017-04-17T03:14:15"

}

}

]

},

"aggregations": {

"fenci": {

"doc_count_error_upper_bound": 0,

"sum_other_doc_count": 0,

"buckets": []

}

}

}

我想通过中文分词后再聚合,这样就可以实时的统计出一段时间内的热词,类似微博的热搜。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值