[es-6.5.4] QueryCache/RequestCache

最新推荐文章于 2023-10-09 15:59:36 发布

chunji6513

最新推荐文章于 2023-10-09 15:59:36 发布

阅读量239

点赞数

原文链接：https://my.oschina.net/u/204498/blog/3062929

版权

https://opensourceconnections.com/blog/2017/07/10/caching_in_elasticsearch/

零、数据准备

https://www.elastic.co/guide/en/kibana/7.1/tutorial-load-dataset.html

QueryCache，最小缓存MaxDoc需要是10000，则需要生成10000以上的document

一、QueryCache

https://www.elastic.co/guide/en/elasticsearch/reference/6.5/query-cache.html

The query cache only caches queries which are being used in a filter context. 只有在FilterContext里面的Query，才有可能被缓存

// account.json

// 可能会缓存(实际不会，TermQuery是不会被缓存的)
curl -XGET "http://localhost:9200/bank/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "bool": {
      "filter": {
        "term": {
          "firstname.keyword": "Amber"
        }
      }
    }
  }
}'


// 不会被缓存
curl -XGET "http://localhost:9200/bank/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "term": {
      "firstname.keyword": {
        "value": "Amber"
      }
    }
  }
}'


// 会被缓存，RangeQuery
curl -XGET "http://localhost:9200/bank2/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "bool": {
      "filter": {
        "range": {
          "age": {
            "gte": 20,
            "lte": 30
          }
        }
      }
    }
  }
}'

org.apache.lucene.search.LRUQueryCache.CachingWrapperWeight.buildScorer(LeafReaderContext context)

Index中最小的Doc数量需要在10000以上

public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
      if (used.compareAndSet(false, true)) {
        policy.onUse(getQuery());
      }

      if (in.isCacheable(context) == false) {
        // this segment is not suitable for caching
        return in.bulkScorer(context);
      }

      // Short-circuit: Check whether this segment is eligible for caching
      // before we take a lock because of #get
      if (shouldCache(context) == false) {
        return in.bulkScorer(context);
      }

      final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
      if (cacheHelper == null) {
        // this reader has no cacheHelper
        return in.bulkScorer(context);
      }

      // If the lock is already busy, prefer using the uncached version than waiting
      if (lock.tryLock() == false) {
        return in.bulkScorer(context);
      }

      DocIdSet docIdSet;
      try {
        docIdSet = get(in.getQuery(), context, cacheHelper);
      } finally {
        lock.unlock();
      }

      if (docIdSet == null) {
        if (policy.shouldCache(in.getQuery())) {
          docIdSet = cache(context);
          putIfAbsent(in.getQuery(), context, docIdSet, cacheHelper);
        } else {
          return in.bulkScorer(context);
        }
      }

      assert docIdSet != null;
      if (docIdSet == DocIdSet.EMPTY) {
        return null;
      }
      final DocIdSetIterator disi = docIdSet.iterator();
      if (disi == null) {
        return null;
      }

      return new DefaultBulkScorer(new ConstantScoreScorer(this, 0f, disi));
    }

private boolean cacheEntryHasReasonableWorstCaseSize(int maxDoc) {
      // The worst-case (dense) is a bit set which needs one bit per document
      final long worstCaseRamUsage = maxDoc / 8;
      final long totalRamAvailable = maxRamBytesUsed;
      // Imagine the worst-case that a cache entry is large than the size of
      // the cache: not only will this entry be trashed immediately but it
      // will also evict all current entries from the cache. For this reason
      // we only cache on an IndexReader if we have available room for
      // 5 different filters on this reader to avoid excessive trashing
      return worstCaseRamUsage * 5 < totalRamAvailable;
    }

public LRUQueryCache(int maxSize, long maxRamBytesUsed) {
    this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000, .03f));
  }

  // pkg-private for testing
  static class MinSegmentSizePredicate implements Predicate<LeafReaderContext> {
    private final int minSize;
    private final float minSizeRatio;

    MinSegmentSizePredicate(int minSize, float minSizeRatio) {
      this.minSize = minSize;
      this.minSizeRatio = minSizeRatio;
    }

    @Override
    public boolean test(LeafReaderContext context) {
      final int maxDoc = context.reader().maxDoc();
      if (maxDoc < minSize) {
        return false;
      }
      final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
      final float sizeRatio = (float) context.reader().maxDoc() / topLevelContext.reader().maxDoc();
      return sizeRatio >= minSizeRatio;
    }
  }

org.apache.lucene.search.UsageTrackingQueryCachingPolicy.shouldNeverCache(Query query) Lucene-V7.5.0

不是所有的Query，都能被缓存，如TermQuery

private static boolean shouldNeverCache(Query query) {
    if (query instanceof TermQuery) {
      // We do not bother caching term queries since they are already plenty fast.
      return true;
    }

    if (query instanceof MatchAllDocsQuery) {
      // MatchAllDocsQuery has an iterator that is faster than what a bit set could do.
      return true;
    }

    // For the below queries, it's cheap to notice they cannot match any docs so
    // we do not bother caching them.
    if (query instanceof MatchNoDocsQuery) {
      return true;
    }

    if (query instanceof BooleanQuery) {
      BooleanQuery bq = (BooleanQuery) query;
      if (bq.clauses().isEmpty()) {
        return true;
      }
    }

    if (query instanceof DisjunctionMaxQuery) {
      DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
      if (dmq.getDisjuncts().isEmpty()) {
        return true;
      }
    }

    return false;
  }

org.apache.lucene.search.UsageTrackingQueryCachingPolicy.shouldCache(Query query)

/**
   * For a given filter, return how many times it should appear in the history
   * before being cached. The default implementation returns 2 for filters that
   * need to evaluate against the entire index to build a {@link DocIdSetIterator},
   * like {@link MultiTermQuery}, point-based queries or {@link TermInSetQuery},
   * and 5 for other filters.
   */
  protected int minFrequencyToCache(Query query) {
    if (isCostly(query)) {
      return 2;
    } else {
      // default: cache after the filter has been seen 5 times
      int minFrequency = 5;
      if (query instanceof BooleanQuery
          || query instanceof DisjunctionMaxQuery) {
        // Say you keep reusing a boolean query that looks like "A OR B" and
        // never use the A and B queries out of that context. 5 times after it
        // has been used, we would cache both A, B and A OR B, which is
        // wasteful. So instead we cache compound queries a bit earlier so that
        // we would only cache "A OR B" in that case.
        minFrequency--;
      }
      return minFrequency;
    }
  }

QueryCache状态
1. http://localhost:9200/_nodes/stats/indices/query_cache

二、RequestCache

转载于:https://my.oschina.net/u/204498/blog/3062929

chunji6513

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
[es-6.5.4] QueryCache/RequestCache

https://opensourceconnections.com/blog/2017/07/10/caching_in_elasticsearch/ 零、数据准备 https://www.elastic.co/guide/en/kibana/7.1/tutorial-load-datas...
复制链接

扫一扫