https://opensourceconnections.com/blog/2017/07/10/caching_in_elasticsearch/
零、数据准备
https://www.elastic.co/guide/en/kibana/7.1/tutorial-load-dataset.html
QueryCache,最小缓存MaxDoc需要是10000,则需要生成10000以上的document
一、QueryCache
https://www.elastic.co/guide/en/elasticsearch/reference/6.5/query-cache.html
- The query cache only caches queries which are being used in a filter context. 只有在FilterContext里面的Query,才有可能被缓存
-
// account.json // 可能会缓存(实际不会,TermQuery是不会被缓存的) curl -XGET "http://localhost:9200/bank/_search" -H 'Content-Type: application/json' -d' { "query": { "bool": { "filter": { "term": { "firstname.keyword": "Amber" } } } } }' // 不会被缓存 curl -XGET "http://localhost:9200/bank/_search" -H 'Content-Type: application/json' -d' { "query": { "term": { "firstname.keyword": { "value": "Amber" } } } }' // 会被缓存,RangeQuery curl -XGET "http://localhost:9200/bank2/_search" -H 'Content-Type: application/json' -d' { "query": { "bool": { "filter": { "range": { "age": { "gte": 20, "lte": 30 } } } } } }'
-
- org.apache.lucene.search.LRUQueryCache.CachingWrapperWeight.buildScorer(LeafReaderContext context)
- Index中最小的Doc数量需要在10000以上
-
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { if (used.compareAndSet(false, true)) { policy.onUse(getQuery()); } if (in.isCacheable(context) == false) { // this segment is not suitable for caching return in.bulkScorer(context); } // Short-circuit: Check whether this segment is eligible for caching // before we take a lock because of #get if (shouldCache(context) == false) { return in.bulkScorer(context); } final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper(); if (cacheHelper == null) { // this reader has no cacheHelper return in.bulkScorer(context); } // If the lock is already busy, prefer using the uncached version than waiting if (lock.tryLock() == false) { return in.bulkScorer(context); } DocIdSet docIdSet; try { docIdSet = get(in.getQuery(), context, cacheHelper); } finally { lock.unlock(); } if (docIdSet == null) { if (policy.shouldCache(in.getQuery())) { docIdSet = cache(context); putIfAbsent(in.getQuery(), context, docIdSet, cacheHelper); } else { return in.bulkScorer(context); } } assert docIdSet != null; if (docIdSet == DocIdSet.EMPTY) { return null; } final DocIdSetIterator disi = docIdSet.iterator(); if (disi == null) { return null; } return new DefaultBulkScorer(new ConstantScoreScorer(this, 0f, disi)); }
private boolean cacheEntryHasReasonableWorstCaseSize(int maxDoc) { // The worst-case (dense) is a bit set which needs one bit per document final long worstCaseRamUsage = maxDoc / 8; final long totalRamAvailable = maxRamBytesUsed; // Imagine the worst-case that a cache entry is large than the size of // the cache: not only will this entry be trashed immediately but it // will also evict all current entries from the cache. For this reason // we only cache on an IndexReader if we have available room for // 5 different filters on this reader to avoid excessive trashing return worstCaseRamUsage * 5 < totalRamAvailable; }
public LRUQueryCache(int maxSize, long maxRamBytesUsed) { this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000, .03f)); } // pkg-private for testing static class MinSegmentSizePredicate implements Predicate<LeafReaderContext> { private final int minSize; private final float minSizeRatio; MinSegmentSizePredicate(int minSize, float minSizeRatio) { this.minSize = minSize; this.minSizeRatio = minSizeRatio; } @Override public boolean test(LeafReaderContext context) { final int maxDoc = context.reader().maxDoc(); if (maxDoc < minSize) { return false; } final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context); final float sizeRatio = (float) context.reader().maxDoc() / topLevelContext.reader().maxDoc(); return sizeRatio >= minSizeRatio; } }
-
- Index中最小的Doc数量需要在10000以上
- org.apache.lucene.search.UsageTrackingQueryCachingPolicy.shouldNeverCache(Query query) Lucene-V7.5.0
- 不是所有的Query,都能被缓存,如TermQuery
-
private static boolean shouldNeverCache(Query query) { if (query instanceof TermQuery) { // We do not bother caching term queries since they are already plenty fast. return true; } if (query instanceof MatchAllDocsQuery) { // MatchAllDocsQuery has an iterator that is faster than what a bit set could do. return true; } // For the below queries, it's cheap to notice they cannot match any docs so // we do not bother caching them. if (query instanceof MatchNoDocsQuery) { return true; } if (query instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) query; if (bq.clauses().isEmpty()) { return true; } } if (query instanceof DisjunctionMaxQuery) { DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query; if (dmq.getDisjuncts().isEmpty()) { return true; } } return false; }
-
- 不是所有的Query,都能被缓存,如TermQuery
- org.apache.lucene.search.UsageTrackingQueryCachingPolicy.shouldCache(Query query)
-
/** * For a given filter, return how many times it should appear in the history * before being cached. The default implementation returns 2 for filters that * need to evaluate against the entire index to build a {@link DocIdSetIterator}, * like {@link MultiTermQuery}, point-based queries or {@link TermInSetQuery}, * and 5 for other filters. */ protected int minFrequencyToCache(Query query) { if (isCostly(query)) { return 2; } else { // default: cache after the filter has been seen 5 times int minFrequency = 5; if (query instanceof BooleanQuery || query instanceof DisjunctionMaxQuery) { // Say you keep reusing a boolean query that looks like "A OR B" and // never use the A and B queries out of that context. 5 times after it // has been used, we would cache both A, B and A OR B, which is // wasteful. So instead we cache compound queries a bit earlier so that // we would only cache "A OR B" in that case. minFrequency--; } return minFrequency; } }
-
- QueryCache状态
二、RequestCache