elasticsearch查询去重

实现查询去重、分页

例如:实现依据qid去重,createTime排序

DSL:

[html]  view plain  copy
  1. GET  /nb_luban_answer/_search  
  2. {  
  3.   "query": {  
  4.     "match": {  
  5.       "status": 1  
  6.     }  
  7.   },  
  8.   "sort": [  
  9.     {  
  10.       "createTime": {  
  11.         "order": "desc"  
  12.       }  
  13.     }  
  14.   ],"aggs": {  
  15.     "qid": {  
  16.       "terms": {  
  17.         "field": "qid",  
  18.         "size": 10  
  19.       },"aggs": {  
  20.         "rated": {  
  21.           "top_hits": {  
  22.             "sort": [{  
  23.               "createTime": {"order": "desc"}  
  24.             }],   
  25.             "size": 1  
  26.           }  
  27.         }  
  28.       }  
  29.     }  
  30.   },   
  31.   "size": 0,  
  32.   "from": 0  
  33. }   

执行的结果:

[java]  view plain  copy
  1. {  
  2.   "_shards": {  
  3.     "total"4,  
  4.     "failed"0,  
  5.     "successful"4  
  6.   },  
  7.   "hits": {  
  8.     "hits": [],  
  9.     "total"4,  
  10.     "max_score"0  
  11.   },  
  12.   "took"4,  
  13.   "timed_out"false,  
  14.   "aggregations": {  
  15.     "qid": {  
  16.       "doc_count_error_upper_bound"0,  
  17.       "sum_other_doc_count"0,  
  18.       "buckets": [  
  19.         {  
  20.           "rated": {  
  21.             "hits": {  
  22.               "hits": [  
  23.                 {  
  24.                   "_index""nb_luban_answer",  
  25.                   "_type""luban_answer",  
  26.                   "_source": {  
  27.                     "img""{\r\n  \"img1\" : \"11111'><SCRIPT>alert(11111)</SCRIPT>'\",\r\n  \"img2\" : \"2222'><SCRIPT>alert(11111)</SCRIPT>'\",\r\n  \"img3\" : \"33333'><SCRIPT>alert(11111)</SCRIPT>'\"\r\n}",  
  28.                     "pin""motai869",  
  29.                     "createTime""2017-03-09 00:00:00",  
  30.                     "id""SN-0000001792-9-1",  
  31.                     "tableNameSuffix""1",  
  32.                     "qid""SN-0000001536-9-3",  
  33.                     "content""888'><SCRIPT>alert(11111)</SCRIPT>'",  
  34.                     "status"1  
  35.                   },  
  36.                   "_id""SN-0000001792-9-1",  
  37.                   "sort": [  
  38.                     1489017600000  
  39.                   ],  
  40.                   "_score"null  
  41.                 }  
  42.               ],  
  43.               "total"2,  
  44.               "max_score"null  
  45.             }  
  46.           },  
  47.           "doc_count"2,  
  48.           "key""SN-0000001536-9-3"  
  49.         },  
  50.         {  
  51.           "rated": {  
  52.             "hits": {  
  53.               "hits": [  
  54.                 {  
  55.                   "_index""nb_luban_answer",  
  56.                   "_type""luban_answer",  
  57.                   "_source": {  
  58.                     "img""{\r\n  \"img1\" : \"jfs/t3184/361/7557351412/2685/17143f65/58b91982N4e71a5d8.jpg\",\r\n  \"img2\" : \"jfs/t3184/361/7557351412/2685/17143f65/58b91982N4e71a5d8.jpg\",\r\n  \"img3\" : \"jfs/t3184/361/7557351412/2685/17143f65/58b91982N4e71a5d8.jpg\"\r\n}",  
  59.                     "pin""motai869",  
  60.                     "createTime""2017-03-03 00:00:00",  
  61.                     "id""SN-0000001280-9-3",  
  62.                     "tableNameSuffix""3",  
  63.                     "qid""SN-0000000256-0-3",  
  64.                     "content""hahaha",  
  65.                     "status"1  
  66.                   },  
  67.                   "_id""SN-0000001280-9-3",  
  68.                   "sort": [  
  69.                     1488499200000  
  70.                   ],  
  71.                   "_score"null  
  72.                 }  
  73.               ],  
  74.               "total"1,  
  75.               "max_score"null  
  76.             }  
  77.           },  
  78.           "doc_count"1,  
  79.           "key""SN-0000000256-0-3"  
  80.         },  
  81.         {  
  82.           "rated": {  
  83.             "hits": {  
  84.               "hits": [  
  85.                 {  
  86.                   "_index""nb_luban_answer",  
  87.                   "_type""luban_answer",  
  88.                   "_source": {  
  89.                     "pin""motai869",  
  90.                     "createTime""2017-03-02 00:00:00",  
  91.                     "id""SN-0000208128-8-1",  
  92.                     "tableNameSuffix""1",  
  93.                     "qid""SN-0000207872-9-3",  
  94.                     "content""hahaha",  
  95.                     "status"1  
  96.                   },  
  97.                   "_id""SN-0000208128-8-1",  
  98.                   "sort": [  
  99.                     1488412800000  
  100.                   ],  
  101.                   "_score"null  
  102.                 }  
  103.               ],  
  104.               "total"1,  
  105.               "max_score"null  
  106.             }  
  107.           },  
  108.           "doc_count"1,  
  109.           "key""SN-0000207872-9-3"  
  110.         }  
  111.       ]  
  112.     }  
  113.   }  
  114. }  

JAVA API:

[html]  view plain  copy
  1. String indexName="nb_luban_answer";  
  2.         String typeName="luban_answer";  
  3.   
  4.         AggregationBuilder aggregation =  
  5.                 AggregationBuilders  
  6.                         .terms("agg").field("qid")  
  7.                         .subAggregation(  
  8.                                 AggregationBuilders.topHits("top").addSort("createTime",SortOrder.DESC).setSize(1)  
  9.                         );  
  10.   
  11.         SearchResponse sResponse = Tool.CLIENT.prepareSearch(indexName).setTypes(typeName)  
  12.                 .setQuery(QueryBuilders.matchQuery("status",1))  
  13.                 .addSort("createTime", SortOrder.DESC)  
  14.                 .addAggregation(aggregation)  
  15.                 .execute().actionGet();  
  16.         Terms agg = sResponse.getAggregations().get("agg");  
  17.         for (Terms.Bucket entry : agg.getBuckets()) {  
  18.             String key = String.valueOf(entry.getKey()) ;  
  19.             long docCount = entry.getDocCount();  
  20.             System.out.println("key:"+ key +" doc_count:"+ docCount);  
  21.   
  22.             TopHits topHitsentry.getAggregations().get("top");  
  23.             for (SearchHit hit : topHits.getHits()){  
  24.                 System.out.println(" -> id: "+ hit.getId()+" createTime: "+hit.getSource().get("createTime"));  
  25.             }  
  26.         }  


ES中所有数据:


参考:Elasticsearch 5.x 字段折叠的使用点击打开链接

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值