[es-6.5.4] QueryCache/RequestCache

https://opensourceconnections.com/blog/2017/07/10/caching_in_elasticsearch/

零、数据准备

https://www.elastic.co/guide/en/kibana/7.1/tutorial-load-dataset.html

QueryCache,最小缓存MaxDoc需要是10000,则需要生成10000以上的document

一、QueryCache

https://www.elastic.co/guide/en/elasticsearch/reference/6.5/query-cache.html

  1. The query cache only caches queries which are being used in a filter context. 只有在FilterContext里面的Query,才有可能被缓存
    1. // account.json
      
      // 可能会缓存(实际不会,TermQuery是不会被缓存的)
      curl -XGET "http://localhost:9200/bank/_search" -H 'Content-Type: application/json' -d'
      {
        "query": {
          "bool": {
            "filter": {
              "term": {
                "firstname.keyword": "Amber"
              }
            }
          }
        }
      }'
      
      
      // 不会被缓存
      curl -XGET "http://localhost:9200/bank/_search" -H 'Content-Type: application/json' -d'
      {
        "query": {
          "term": {
            "firstname.keyword": {
              "value": "Amber"
            }
          }
        }
      }'
      
      
      // 会被缓存,RangeQuery
      curl -XGET "http://localhost:9200/bank2/_search" -H 'Content-Type: application/json' -d'
      {
        "query": {
          "bool": {
            "filter": {
              "range": {
                "age": {
                  "gte": 20,
                  "lte": 30
                }
              }
            }
          }
        }
      }'

       

  2. org.apache.lucene.search.LRUQueryCache.CachingWrapperWeight.buildScorer(LeafReaderContext context)
    1.  Index中最小的Doc数量需要在10000以上
      1. public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
              if (used.compareAndSet(false, true)) {
                policy.onUse(getQuery());
              }
        
              if (in.isCacheable(context) == false) {
                // this segment is not suitable for caching
                return in.bulkScorer(context);
              }
        
              // Short-circuit: Check whether this segment is eligible for caching
              // before we take a lock because of #get
              if (shouldCache(context) == false) {
                return in.bulkScorer(context);
              }
        
              final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
              if (cacheHelper == null) {
                // this reader has no cacheHelper
                return in.bulkScorer(context);
              }
        
              // If the lock is already busy, prefer using the uncached version than waiting
              if (lock.tryLock() == false) {
                return in.bulkScorer(context);
              }
        
              DocIdSet docIdSet;
              try {
                docIdSet = get(in.getQuery(), context, cacheHelper);
              } finally {
                lock.unlock();
              }
        
              if (docIdSet == null) {
                if (policy.shouldCache(in.getQuery())) {
                  docIdSet = cache(context);
                  putIfAbsent(in.getQuery(), context, docIdSet, cacheHelper);
                } else {
                  return in.bulkScorer(context);
                }
              }
        
              assert docIdSet != null;
              if (docIdSet == DocIdSet.EMPTY) {
                return null;
              }
              final DocIdSetIterator disi = docIdSet.iterator();
              if (disi == null) {
                return null;
              }
        
              return new DefaultBulkScorer(new ConstantScoreScorer(this, 0f, disi));
            }

         

        private boolean cacheEntryHasReasonableWorstCaseSize(int maxDoc) {
              // The worst-case (dense) is a bit set which needs one bit per document
              final long worstCaseRamUsage = maxDoc / 8;
              final long totalRamAvailable = maxRamBytesUsed;
              // Imagine the worst-case that a cache entry is large than the size of
              // the cache: not only will this entry be trashed immediately but it
              // will also evict all current entries from the cache. For this reason
              // we only cache on an IndexReader if we have available room for
              // 5 different filters on this reader to avoid excessive trashing
              return worstCaseRamUsage * 5 < totalRamAvailable;
            }
        public LRUQueryCache(int maxSize, long maxRamBytesUsed) {
            this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000, .03f));
          }
        
          // pkg-private for testing
          static class MinSegmentSizePredicate implements Predicate<LeafReaderContext> {
            private final int minSize;
            private final float minSizeRatio;
        
            MinSegmentSizePredicate(int minSize, float minSizeRatio) {
              this.minSize = minSize;
              this.minSizeRatio = minSizeRatio;
            }
        
            @Override
            public boolean test(LeafReaderContext context) {
              final int maxDoc = context.reader().maxDoc();
              if (maxDoc < minSize) {
                return false;
              }
              final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
              final float sizeRatio = (float) context.reader().maxDoc() / topLevelContext.reader().maxDoc();
              return sizeRatio >= minSizeRatio;
            }
          }

         

  3. org.apache.lucene.search.UsageTrackingQueryCachingPolicy.shouldNeverCache(Query query) Lucene-V7.5.0
    1. 不是所有的Query,都能被缓存,如TermQuery
      1. private static boolean shouldNeverCache(Query query) {
            if (query instanceof TermQuery) {
              // We do not bother caching term queries since they are already plenty fast.
              return true;
            }
        
            if (query instanceof MatchAllDocsQuery) {
              // MatchAllDocsQuery has an iterator that is faster than what a bit set could do.
              return true;
            }
        
            // For the below queries, it's cheap to notice they cannot match any docs so
            // we do not bother caching them.
            if (query instanceof MatchNoDocsQuery) {
              return true;
            }
        
            if (query instanceof BooleanQuery) {
              BooleanQuery bq = (BooleanQuery) query;
              if (bq.clauses().isEmpty()) {
                return true;
              }
            }
        
            if (query instanceof DisjunctionMaxQuery) {
              DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
              if (dmq.getDisjuncts().isEmpty()) {
                return true;
              }
            }
        
            return false;
          }

         

  4. org.apache.lucene.search.UsageTrackingQueryCachingPolicy.shouldCache(Query query)
    1. /**
         * For a given filter, return how many times it should appear in the history
         * before being cached. The default implementation returns 2 for filters that
         * need to evaluate against the entire index to build a {@link DocIdSetIterator},
         * like {@link MultiTermQuery}, point-based queries or {@link TermInSetQuery},
         * and 5 for other filters.
         */
        protected int minFrequencyToCache(Query query) {
          if (isCostly(query)) {
            return 2;
          } else {
            // default: cache after the filter has been seen 5 times
            int minFrequency = 5;
            if (query instanceof BooleanQuery
                || query instanceof DisjunctionMaxQuery) {
              // Say you keep reusing a boolean query that looks like "A OR B" and
              // never use the A and B queries out of that context. 5 times after it
              // has been used, we would cache both A, B and A OR B, which is
              // wasteful. So instead we cache compound queries a bit earlier so that
              // we would only cache "A OR B" in that case.
              minFrequency--;
            }
            return minFrequency;
          }
        }

       

  5. QueryCache状态
    1. http://localhost:9200/_nodes/stats/indices/query_cache
  6.  

二、RequestCache

转载于:https://my.oschina.net/u/204498/blog/3062929

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值