solr中的cache的实现原理

最新推荐文章于 2020-03-09 14:41:19 发布

iteye_14612

最新推荐文章于 2020-03-09 14:41:19 发布

阅读量286

点赞数

分类专栏： solr 文章标签： solr cache 缓存

本文链接：https://blog.csdn.net/iteye_14612/article/details/82680579

版权

solr 专栏收录该内容

21 篇文章 0 订阅

订阅专栏

先说一下，我使用的版本是5.5.3

搭建过solr的人肯定对solrconf.xml不陌生，在<query></query>中有多个cache，比如filterCache、queryResultCache，documentCache。这个博客就是介绍这三个cache的意思、配置以及他们的使用。

我们直接看代码，对于这三个cache的使用是在solrIndexSearcher中，他有下面的属性

private final boolean cachingEnabled;//这个indexSearcher是否使用缓存

private final SolrCache<Query,DocSet> filterCache;对应于filterCache

private final SolrCache<QueryResultKey,DocList> queryResultCache;//对应于queryResultCache

private final SolrCache<Integer,Document> documentCache;//对应于documentCache

private final SolrCache<String,UnInvertedField> fieldValueCache;//这个稍后再说

在SolrIndexSearcher的构造方法中可以发现对上面的几个cache的赋值：

 if (cachingEnabled) {//默认是启用缓存的，即进入if
      ArrayList<SolrCache> clist = new ArrayList<>();
      fieldValueCache = solrConfig.fieldValueCacheConfig==null ? null : solrConfig.fieldValueCacheConfig.newInstance();//如果配置文件中存在fieldValueCache则创建fieldValueCache
      if (fieldValueCache!=null) clist.add(fieldValueCache);
      filterCache= solrConfig.filterCacheConfig==null ? null : solrConfig.filterCacheConfig.newInstance();//如果存在配置文件创建filterCache
      if (filterCache!=null) clist.add(filterCache);
      queryResultCache = solrConfig.queryResultCacheConfig==null ? null : solrConfig.queryResultCacheConfig.newInstance();//如果存在配置文件创建queryResultCache
      if (queryResultCache!=null) clist.add(queryResultCache);
      documentCache = solrConfig.documentCacheConfig==null ? null : solrConfig.documentCacheConfig.newInstance();//如果存在配置文件创建documentCache
      if (documentCache!=null) clist.add(documentCache);

      if (solrConfig.userCacheConfigs == null) {//可以发现还可以创建一个叫做userCacheConfig的cache，我自己没有试验
        cacheMap = noGenericCaches;
      } else {
        cacheMap = new HashMap<>(solrConfig.userCacheConfigs.length);
        for (CacheConfig userCacheConfig : solrConfig.userCacheConfigs) {
          SolrCache cache = null;
          if (userCacheConfig != null) cache = userCacheConfig.newInstance();
          if (cache != null) {
            cacheMap.put(cache.name(), cache);
            clist.add(cache);
          }
        }
      }

      cacheList = clist.toArray(new SolrCache[clist.size()]);
    }

通过上面的代码可以发现，如果在solrconf.xml中配置了对应的cache，就会在solrIndexSearcher中创建对应的cache。

solrconf.xml中cache的实现原理：

我们以<filterCache class="solr.FastLRUCache" size="512" initialSize="512" autowarmCount="0"/>这个为例：创建这个cache的实现类是FastLRUCache，他的实现原理就是封装了concurrentHashMap，最大可以存放512个缓存的key，初始大小为512个，autoWarmCount这个稍后再说。在solr中默认有两个cache，一个是刚才说的FastLRUCache，还有一个是LRUCache，他的实现原理是LinkedHashMap+同步，很明显这个的性能要比前一个要差一些，所以可以将LRUCache都换为FastLRuCache。不过这两个cahce都是基于lru算法的，貌似也不适合我们的需求，最好是lfu的，所以可以通过改变这些配置，使用一个基于lfu算法的cache，当然这个不是这篇博客的内容。我们先看一下这个FastLRUCache的实现：

1、初始化cache的方法：

public Object init(Map args, Object persistence, CacheRegenerator regenerator) { // map即我们在配置文件中写的那些属性的封装，比如size=512
    super.init(args, regenerator);//在这个方法中就做两个事，一个是调用它的名字，即这个cache的名字，通过调用name()方法可以获得这个名字，第二个获得指定的warmCount，可以是百分数，也可以是具体的数字（warmCount先不用操心）以后会介绍
    String str = (String) args.get("size");//缓存的大小，
    int limit = str == null ? 1024 : Integer.parseInt(str);//默认是1024个
    int minLimit;
    str = (String) args.get("minSize");//缓存的最小值
    if (str == null) {
      minLimit = (int) (limit * 0.9);
    } else {
      minLimit = Integer.parseInt(str);
    }
    if (minLimit==0) minLimit=1;
    if (limit <= minLimit) limit=minLimit+1;

    int acceptableLimit;
    str = (String) args.get("acceptableSize");//缓存在清理之后剩余的可以接受的数量，默认是最大值的95%
    if (str == null) {
      acceptableLimit = (int) (limit * 0.95);
    } else {
      acceptableLimit = Integer.parseInt(str);
    }
    // acceptable limit should be somewhere between minLimit and limit
    acceptableLimit = Math.max(minLimit, acceptableLimit);

    str = (String) args.get("initialSize");
    final int initialSize = str == null ? limit : Integer.parseInt(str);//初始值，即创建的ConcurrentHashMAP的初始值。
    str = (String) args.get("cleanupThread");//这个是用来说明当缓存的数量太大要进行驱逐的时候要不要新生成一个thread还是使用添加缓存的thread。
    boolean newThread = str == null ? false : Boolean.parseBoolean(str);//默认是false，即使用添加缓存的thread。这个最好设置为true

    str = (String) args.get("showItems");
    showItems = str == null ? 0 : Integer.parseInt(str);//showItems是以后做统计的时候用到的，下面的关于统计的方法中有介绍
    description = generateDescription(limit, initialSize, minLimit, acceptableLimit, newThread);//这个是用来产生一个描述性质的字符串，没啥用
    cache = new ConcurrentLRUCache<>(limit, minLimit, acceptableLimit, initialSize, newThread, false, null);//根据使用的参数来创建真正的缓存对象。创建的是一个ConcurrentLRUCache对象。
    cache.setAlive(false);
  }

在FastLruCache中还有一些put，get，clear这些显而易见的方法（对生成的ConcurrentLRUCache对象操作），另外还有一个warm方法比较重要，我专门在一篇博客中写他的作用。

接下来我们进入到ConcurrentLRUCache类中，看看他的实现。

 public ConcurrentLRUCache(int upperWaterMark, final int lowerWaterMark, int acceptableWatermark,
                            int initialSize, boolean runCleanupThread, boolean runNewThreadForCleanup,
                            EvictionListener<K,V> evictionListener) {
    if (upperWaterMark < 1) throw new IllegalArgumentException("upperWaterMark must be > 0");
    if (lowerWaterMark >= upperWaterMark)
      throw new IllegalArgumentException("lowerWaterMark must be  < upperWaterMark");
    map = new ConcurrentHashMap<>(initialSize);//最终的缓存使用的就是一个ConcurrentHashMap，在配置文件中指定他的初始化大小。
    newThreadForCleanup = runNewThreadForCleanup;//如果添加一个cahceh后缓存满了，要重新运行一个线程做缓存驱逐
    this.upperWaterMark = upperWaterMark;//缓存的最大值
    this.lowerWaterMark = lowerWaterMark;//缓存的最小值，这个乍听上去没有什么用，其实他是配置做驱逐用的
    this.acceptableWaterMark = acceptableWatermark;//
    this.evictionListener = evictionListener;//这个默认就是null。
    if (runCleanupThread) {//一直运行一个线程做缓存驱逐，最好是采用这个配置，这样不影响前台的搜索，否则可能会导致某个搜索变得很慢。
      cleanupThread = new CleanupThread(this);
      cleanupThread.start();
    }
  }

再看一下他的添加方法：

 @Override
  public V put(K key, V val) {//put方法
    if (val == null) return null;
    CacheEntry<K,V> e = new CacheEntry<>(key, val, stats.accessCounter.incrementAndGet());//将参数封装
    CacheEntry<K,V> oldCacheEntry = map.put(key, e);//去的原来的值
    int currentSize;
    if (oldCacheEntry == null) {//如果原来没有值则缓存数量增加1，
      currentSize = stats.size.incrementAndGet();//stats是用来描述这个缓存的使用情况的，比如命中数，未命中数，使用的次数，size属性是一个AtomicInteger
    } else {
      currentSize = stats.size.get();
    }
    if (islive) {//这个概念可以先不用管。（这里的isLive是在这个caceh所属的SolrIndexSearcher注册之后真正提供搜索服务的searcher后才会成为true，即等他真正成为要使用的searcher之后才会记录统计，因为可能我们在listener中可能调用一个新创建的searcher，这个时候这个searcher并没有成为
     //提供服务的sarcher，此时要记录为nonLive，这里提到的listener在后面的博客中会有介绍。）
      stats.putCounter.incrementAndGet();//这里的stats是用于做统计的，比如命中数、未命中数等
    } else {
      stats.nonLivePutCounter.incrementAndGet();
    }
    //如果缓存的大小超过上限，则要进行驱逐，根据配置要使用三种驱逐的手段，
    if (currentSize > upperWaterMark && !isCleaning) {
      if (newThreadForCleanup) {//临时建立线程进行markAndSweep（驱逐），很差
        new Thread() {
          @Override
          public void run() {
            markAndSweep();
          }
        }.start();
      } else if (cleanupThread != null){//一直维护一个线程，最好
        cleanupThread.wakeThread();
      } else {
        markAndSweep();//使用当前的线程进行操作，不是很好，尤其是当缓存很多的时候
      }
    }
    return oldCacheEntry == null ? null : oldCacheEntry.value;
  }

看到这里就明白了solr自带的缓存的实现原理了。（markAndSweep方法我没有完全看懂，不过不影响我们的理解）

在缓存中还有一个重要的方法是获得这个缓存的使用情况： public NamedList getStatistics() 方法，返回一个类似于map的结构，我们看看FastLRUCache的代码：

  public NamedList getStatistics() {
    NamedList<Serializable> lst = new SimpleOrderedMap<>();
    if (cache == null)  return lst;
    ConcurrentLRUCache.Stats stats = cache.getStats();//这里的stats就是用来记录缓存使用的情况的，比如大小，添加次数，访问次数、查询未命中次数，驱逐次数。
    long lookups = stats.getCumulativeLookups();//这个是查询总的次数，包括命中的次数+未命中的次数
    long hits = stats.getCumulativeHits();//查询的有效命中次数
    long inserts = stats.getCumulativePuts();//添加缓存的次数
    long evictions = stats.getCumulativeEvictions();//累计的驱逐次数
    long size = stats.getCurrentSize();//大小
    long clookups = 0;
    long chits = 0;
    long cinserts = 0;
    long cevictions = 0;

    // NOTE: It is safe to iterate on a CopyOnWriteArrayList
    for (ConcurrentLRUCache.Stats statistiscs : statsList) {//这个是对于多个SolrIndexSearcher之间的统计，不过现在我做测试发现并没有开启，也就是统计的还是一个SolrIndexSearcher生存期间的缓存使用情况，
      clookups += statistiscs.getCumulativeLookups();
      chits += statistiscs.getCumulativeHits();
      cinserts += statistiscs.getCumulativePuts();
      cevictions += statistiscs.getCumulativeEvictions();
    }

    lst.add("lookups", lookups);//返回的结果包括这些：
    lst.add("hits", hits);
    lst.add("hitratio", calcHitRatio(lookups, hits));
    lst.add("inserts", inserts);
    lst.add("evictions", evictions);
    lst.add("size", size);

    lst.add("warmupTime", warmupTime);
    lst.add("cumulative_lookups", clookups);
    lst.add("cumulative_hits", chits);
    lst.add("cumulative_hitratio", calcHitRatio(clookups, chits));
    lst.add("cumulative_inserts", cinserts);
    lst.add("cumulative_evictions", cevictions);

    if (showItems != 0) {//showItem的意思是将多少个缓存的key展示出来，展示最近搜索的，
      Map items = cache.getLatestAccessedItems( showItems == -1 ? Integer.MAX_VALUE : showItems );
      for (Map.Entry e : (Set <Map.Entry>)items.entrySet()) {
        Object k = e.getKey();
        Object v = e.getValue();

        String ks = "item_" + k;
        String vs = v.toString();
        lst.add(ks,vs);
      }
      
    }

    return lst;
  }

关于这个统计的获取在后面关于solr的SolrEventListener（监听SolrIndexSearcher的变化的监听器）中会有介绍。