Solr.IndexWriter源码分析.2

最新推荐文章于 2024-08-30 04:45:00 发布
Wild__Child
最新推荐文章于 2024-08-30 04:45:00 发布
阅读量313
点赞数
文章标签： solr
本文链接：https://blog.csdn.net/Wild__Child/article/details/121843636
版权
2021SC@SDUSC
/**
   * 返回只读阅读器，涵盖所有
   * 提交和未提交的对索引的更改。
   * 这提供了“近乎实时”的搜索，因为
   * 在 IndexWriter 会话期间所做的更改可以
   * 无需关闭即可快速搜索
   * 作者也不调用 {@link #commit}。
   *
   * <p>注意，这在功能上等同于调用
   * {#flush} 然后打开一个新的阅读器。但是这个周转时间
   * 方法应该更快，因为它避免了潜在的
   * 代价高昂的 {@link #commit}。</p>
   *
   * <p>您必须关闭返回的{@link IndexReader}
   * 用完这个方法。</p>
   *
   * <p>它<i>接近</i>实时，因为没有硬
   * 保证您能以多快的速度获得新读者
   * 使用 IndexWriter 进行更改。你必须
   * 在您的情况下进行实验，以确定它是否
   * 足够快。由于这是一个新的和实验性的
   * 功能，请报告您的发现，以便我们
   * 学习、改进和迭代。</p>
   *
   * <p>生成的阅读器支持{@link
   * DirectoryReader#openIfChanged}，但该调用只会转发
   * 回到这个方法（虽然这可能会在
   * 未来）。</p>
   *
   * <p>第一次调用这个方法时，这个
   * writer 实例将尽一切努力汇集
   * 它打开进行合并的读者，申请
   * 删除等。这意味着额外的资源（RAM、
   * 文件描述符、CPU 时间）将被消耗。</p>
   *
   * <p>为了降低重新打开阅读器的延迟，您应该
   * 调用 {@link IndexWriterConfig#setMergedSegmentWarmer} 到
   * 在提交之前预热新合并的段
   * 到索引。这对于最小化很重要
   * 大型合并后的索引到搜索延迟。 </p>
   *
   * <p>如果 addIndexes* 调用正在另一个线程中运行，
   * 那么这个读者将只搜索这些片段
   * 复制成功的外部索引
   * 到此为止</p>。
   *
   * <p><b>注意</b>：一旦写入器关闭，任何
   * 优秀的读者可能会继续使用。然而，
   * 如果您尝试重新打开这些阅读器中的任何一个，您将
   * 遇到 {@link AlreadyClosedException}。</p>
   *
   * @lucene.experimental
   *
   * @return IndexReader 覆盖整个索引加上所有
   * 到目前为止此 IndexWriter 实例所做的更改
   *
   * @throws IOException 如果存在低级 I/O 错误
   */
   */
  DirectoryReader getReader(boolean applyAllDeletes, boolean writeAllDeletes) throws IOException {
    ensureOpen();

    if (writeAllDeletes && applyAllDeletes == false) {
      throw new IllegalArgumentException("applyAllDeletes must be true when writeAllDeletes=true");
    }

    final long tStart = System.currentTimeMillis();

    if (infoStream.isEnabled("IW")) {
      infoStream.message("IW", "flush at getReader");
    }

    // 在刷新之前预先执行此操作，以便读者
    // 在此刷新期间获得的第一次被合并
    // 这个方法被调用：
    readerPool.enableReaderPooling();
    StandardDirectoryReader r = null;
    doBeforeFlush();
    boolean anyChanges;
    final long maxFullFlushMergeWaitMillis = config.getMaxFullFlushMergeWaitMillis();
    /*
     * 要发布 NRT 阅读器，我们必须确保
     * DW 不会添加任何段或删除，直到我们
     * 完成创建 NRT DirectoryReader。
     * 我们在完成打开后释放两阶段全冲
     * 目录阅读器！
     */
    MergePolicy.MergeSpecification onGetReaderMerges = null;
    final AtomicBoolean stopCollectingMergedReaders = new AtomicBoolean(false);
    final Map<String, SegmentReader> mergedReaders = new HashMap<>();
    final Map<String, SegmentReader> openedReadOnlyClones = new HashMap<>();
    // 此函数用于控制打开哪些SR，以便对其进行跟踪
     // 并在我们在此 getReader 调用中等待合并的情况下重用它们。
    IOUtils.IOFunction<SegmentCommitInfo, SegmentReader> readerFactory = sci -> {
      final ReadersAndUpdates rld = getPooledInstance(sci, true);
      try {
        assert Thread.holdsLock(IndexWriter.this);
        SegmentReader segmentReader = rld.getReadOnlyClone(IOContext.READ);
        if (maxFullFlushMergeWaitMillis > 0) { // only track this if we actually do fullFlush merges
          openedReadOnlyClones.put(sci.info.name, segmentReader);
        }
        return segmentReader;
      } finally {
        release(rld);
      }
    };
    Closeable onGetReaderMergeResources = null;
    SegmentInfos openingSegmentInfos = null;
    boolean success2 = false;
    try {
      /*
       *这是 getReader 方法的基本部分。我们需要注意以下几点：
       * - 将所有当前内存中的 DWPT 刷新到磁盘
       * - 将所有删除和更新应用于新的和现有的 DWPT
       * - 防止对要应用的并发索引 DWPT 进行刷新和应用删除
       * - 在更新的 SIS 上打开 SDR
       *
       * 为了防止并发刷新，我们调用 DocumentsWriter#flushAllThreads 来交换 deleteQueue
       *（这会强制发生在此与随后的完全刷新之间的关系之前）并通知
       * FlushControl (#markForFullFlush()) 它应该防止任何新的 DWPT 刷新，直到我们\
       * 完成 (DocumentsWriter#finishFullFlush(boolean))。所有这些都由 fullFlushLock 保护以防止多个
       * 同时发生的完全刷新。一旦 DocWriter 启动了完全刷新，我们就可以顺序刷新
       * 并对写入的段应用删除和更新，而不必担心并发索引 DWPT。重要的
       * 方面是这一切都发生在 DocumentsWriter#flushAllThread() 和 DocumentsWriter#finishFullFlush(boolean) 之间
       * 因为一旦刷新被标记为完成，删除就开始应用于磁盘上的段，而不能保证
       * 打开 SDR 时，相应添加的文档（在更新情况下）会刷新并可见。
       */
      boolean success = false;
      synchronized (fullFlushLock) {
        try {
          // TODO: should we somehow make the seqNo available in the returned NRT reader?
          anyChanges = docWriter.flushAllThreads() < 0;
          if (anyChanges == false) {
            // prevent double increment since docWriter#doFlush increments the flushcount
            // if we flushed anything.
            flushCount.incrementAndGet();
          }
          publishFlushedSegments(true);
          processEvents(false);

          if (applyAllDeletes) {
            applyAllDeletesAndUpdates();
          }
          synchronized(this) {

          
            // 注意：我们还不能在内存中携带 doc values 更新，所以我们总是必须将它们写入磁盘并重新打开每个
             // 段阅读器：

             // TODO：我们可以改为在同步块中克隆 SIS 和 pull/incref 读取器，然后在没有 IW 锁的情况下执行此操作？
             // 必须在 IW 上执行此同步，以防止合并在最后一秒完成并无法写入其 DV 更新：
            writeReaderPool(writeAllDeletes);

            // 防止segmentInfos在打开时改变
             // 读者； 理论上我们可以做类似的重试逻辑，
             // 就像我们在加载 segment_N 时所做的那样
            r = StandardDirectoryReader.open(this, readerFactory, segmentInfos, applyAllDeletes, writeAllDeletes);
            if (infoStream.isEnabled("IW")) {
              infoStream.message("IW", "return reader version=" + r.getVersion() + " reader=" + r);
            }
            if (maxFullFlushMergeWaitMillis > 0) {
             
              // 我们从已经修剪掉完全删除的读者的读者那里获取 SIS
               // 这使得在合并后拉动下面的读者更简单，因为我们可以安全地
               // 他们没有关闭。 如果我们使用，每个段在我们打开的 SDR 中都有一个对应的 SR
               // 这个SIS
               // 我们需要对 SR 和信息进行相当复杂的管理，因为我们不能等待合并
               // 当我们持有 fullFlushLock 时，因为合并可能会遇到一个悲剧事件，并且不能报告
               // 持有该锁时。 在锁外合并，即。 在调用 docWriter.finishFullFlush(boolean) 之后
               // 产生错误的结果，因为删除可能会在合并过程中潜入
              openingSegmentInfos = r.getSegmentInfos().clone();
              onGetReaderMerges = preparePointInTimeMerge(openingSegmentInfos, stopCollectingMergedReaders::get, MergeTrigger.GET_READER,
                  sci -> {
                    assert stopCollectingMergedReaders.get() == false : "illegal state  merge reader must be not pulled since we already stopped waiting for merges";
                    SegmentReader apply = readerFactory.apply(sci);
                    mergedReaders.put(sci.info.name, apply);
                  // 我们需要 incRef 打开 SR 的文件，否则可能会再次合并
                     // 在我们将其传递给 SDR 之前删除该段
                    deleter.incRef(sci.files());
                  });
              onGetReaderMergeResources = () -> {
               
                // 这需要在我们完成后关闭一次。 在异常的情况下它释放
                 // 所有资源，关闭合并的阅读器并减少文件引用。
                 // 这只发生在尚未从合并的读者中删除并在其他地方发布的读者
                synchronized (this) {
                  stopCollectingMergedReaders.set(true);
                  IOUtils.close(mergedReaders.values().stream().map(sr -> (Closeable) () -> {
                    try {
                      deleter.decRef(sr.getSegmentInfo().files());
                    } finally {
                      sr.close();
                    }
                  }).collect(Collectors.toList()));
                }
              };
            }
          }
          success = true;
        } finally {
          // Done: finish the full flush!
          assert Thread.holdsLock(fullFlushLock);
          docWriter.finishFullFlush(success);
          if (success) {
            processEvents(false);
            doAfterFlush();
          } else {
            if (infoStream.isEnabled("IW")) {
              infoStream.message("IW", "hit exception during NRT reader");
            }
          }
        }
      }
      if (onGetReaderMerges != null) { // only relevant if we do merge on getReader
        StandardDirectoryReader mergedReader = finishGetReaderMerge(stopCollectingMergedReaders, mergedReaders,
            openedReadOnlyClones, openingSegmentInfos, applyAllDeletes,
            writeAllDeletes, onGetReaderMerges, maxFullFlushMergeWaitMillis);
        if (mergedReader != null) {
          try {
            r.close();
          } finally {
            r = mergedReader;
          }
        }
      }

      anyChanges |= maybeMerge.getAndSet(false);
      if (anyChanges) {
        maybeMerge(config.getMergePolicy(), MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);
      }
      if (infoStream.isEnabled("IW")) {
        infoStream.message("IW", "getReader took " + (System.currentTimeMillis() - tStart) + " msec");
      }
      success2 = true;
    } catch (VirtualMachineError tragedy) {
      tragicEvent(tragedy, "getReader");
      throw tragedy;
    } finally {
      if (!success2) {
        try {
          IOUtils.closeWhileHandlingException(r, onGetReaderMergeResources);
        } finally {
          maybeCloseOnTragicEvent();
        }
      } else {
        IOUtils.close(onGetReaderMergeResources);
      }
    }
    return r;
  }

  private StandardDirectoryReader finishGetReaderMerge(AtomicBoolean stopCollectingMergedReaders, Map<String, SegmentReader> mergedReaders,
                                                       Map<String, SegmentReader> openedReadOnlyClones, SegmentInfos openingSegmentInfos,
                                                       boolean applyAllDeletes, boolean writeAllDeletes,
                                                       MergePolicy.MergeSpecification pointInTimeMerges, long maxCommitMergeWaitMillis) throws IOException {
    assert openingSegmentInfos != null;
    mergeScheduler.merge(mergeSource, MergeTrigger.GET_READER);
    pointInTimeMerges.await(maxCommitMergeWaitMillis, TimeUnit.MILLISECONDS);
    synchronized (this) {
      stopCollectingMergedReaders.set(true);
      StandardDirectoryReader reader = maybeReopenMergedNRTReader(mergedReaders, openedReadOnlyClones, openingSegmentInfos,
          applyAllDeletes, writeAllDeletes);
      IOUtils.close(mergedReaders.values());
      mergedReaders.clear();
      return reader;
    }
  }

  private StandardDirectoryReader maybeReopenMergedNRTReader(Map<String, SegmentReader> mergedReaders,
                                                             Map<String, SegmentReader> openedReadOnlyClones, SegmentInfos openingSegmentInfos,
                                                             boolean applyAllDeletes, boolean writeAllDeletes) throws IOException {
    assert Thread.holdsLock(this);
    if (mergedReaders.isEmpty() == false) {
      Collection<String> files = new ArrayList<>();
      try {
        return StandardDirectoryReader.open(this,
            sci -> {
            
              // 一旦我们移除阅读器并将其返回 StandardDirectoryReader#open
               // 将负责关闭它。 我们只需要处理留在
               // 合并阅读器映射并关闭它们。
              SegmentReader remove = mergedReaders.remove(sci.info.name);
              if (remove == null) {
                remove = openedReadOnlyClones.remove(sci.info.name);
                assert remove != null;
                
                // 我们从前一个阅读器中重用的每个阅读器都需要被 incRef'd
                 // 因为我们重用它们但在 SDR:open 调用中没有隐式 incRef
                remove.incRef();
              } else {
                files.addAll(remove.getSegmentInfo().files());
              }
              return remove;
            }, openingSegmentInfos, applyAllDeletes, writeAllDeletes);
      } finally {
    
        // 现在 SDR#open 调用已经添加了文件，所以我们可以让它们离开
        deleter.decRef(files);
      }
    }
    return null;
  }

  @Override
  public final long ramBytesUsed() {
    ensureOpen();
    return docWriter.ramBytesUsed();
  }

  /**
   * 返回当前正在刷新的字节数
   */
  public final long getFlushingBytes() {
    ensureOpen();
    return docWriter.getFlushingBytes();
  }

  final void writeSomeDocValuesUpdates() throws IOException {
    if (writeDocValuesLock.tryLock()) {
      try {
        final double ramBufferSizeMB = config.getRAMBufferSizeMB();
        // If the reader pool is > 50% of our IW buffer, then write the updates:
        if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
          long startNS = System.nanoTime();

          long ramBytesUsed = readerPool.ramBytesUsed();
          if (ramBytesUsed > 0.5 * ramBufferSizeMB * 1024 * 1024) {
            if (infoStream.isEnabled("BD")) {
              infoStream.message("BD", String.format(Locale.ROOT, "now write some pending DV updates: %.2f MB used vs IWC Buffer %.2f MB",
                  ramBytesUsed/1024./1024., ramBufferSizeMB));
            }

            // Sort by largest ramBytesUsed:
            final List<ReadersAndUpdates> list = readerPool.getReadersByRam();
            int count = 0;
            for (ReadersAndUpdates rld : list) {

              if (ramBytesUsed <= 0.5 * ramBufferSizeMB * 1024 * 1024) {
                break;
              }
             
              // 我们需要在之前/之后做，因为不是这个 RAU 中的所有 RAM 都被 DV 更新使用，并且
               // 并非所有这些字节都可以在这里写入：
              long bytesUsedBefore = rld.ramBytesUsed.get();
              if (bytesUsedBefore == 0) {
                continue; // nothing to do here - lets not acquire the lock
              }
             
              // 只在每次写入时获取 IW 锁，因为这是一个耗时的操作。 这条路
               // 其他线程有机会在我们的写入之间运行。
              synchronized (this) {
               
                // 有可能是 readerPool#getReadersByRam 返回的 reader 段
                 // 在这里处理之前被丢弃。 如果发生这种情况，我们需要跳过那个读者。
                 // 这也是释放 ram 的最大努力，可能有其他线程同时写入此 rld
                 // 哪个获胜，然后如果 readerPooling 关闭，这个 rld 将被删除。
                if (readerPool.get(rld.info, false) == null) {
                  continue;
                }
                if (rld.writeFieldUpdates(directory, globalFieldNumberMap, bufferedUpdatesStream.getCompletedDelGen(), infoStream)) {
                  checkpointNoSIS();
                }
              }
              long bytesUsedAfter = rld.ramBytesUsed.get();
              ramBytesUsed -= bytesUsedBefore - bytesUsedAfter;
              count++;
            }

            if (infoStream.isEnabled("BD")) {
              infoStream.message("BD", String.format(Locale.ROOT, "done write some DV updates for %d segments: now %.2f MB used vs IWC Buffer %.2f MB; took %.2f sec",
                  count, readerPool.ramBytesUsed()/1024./1024., ramBufferSizeMB, ((System.nanoTime() - startNS)/1000000000.)));
            }
          }
        }
      } finally {
        writeDocValuesLock.unlock();
      }
    }
  }

  /**
   * 获取汇集阅读器的已删除文档数。
    * 如果读者没有被汇集，segmentInfo 的
    * 返回 delCount。
   */
  @Override
  public int numDeletedDocs(SegmentCommitInfo info) {
    ensureOpen(false);
    validate(info);
    final ReadersAndUpdates rld = getPooledInstance(info, false);
    if (rld != null) {
      return rld.getDelCount(); // get the full count from here since SCI might change concurrently
    } else {
      final int delCount = info.getDelCount(softDeletesEnabled);
      assert delCount <= info.info.maxDoc(): "delCount: " + delCount + " maxDoc: " + info.info.maxDoc();
      return delCount;
    }
  }

  /**
    * 内部用于抛出 {@link AlreadyClosedException} 如果这个
    * IndexWriter 已关闭或正在关闭。
    *
    * @param failIfClosing
    * 如果为 true，则 {@code IndexWriter} 正在执行时也会失败
    * 关闭 ({@code closed=true}) 但尚未完成关闭 (
    * {@code closed=false})
    * @throws AlreadyClosedException
    * 如果此 IndexWriter 已关闭或正在关闭
   */
  protected final void ensureOpen(boolean failIfClosing) throws AlreadyClosedException {
    if (closed || (failIfClosing && closing)) {
      throw new AlreadyClosedException("this IndexWriter is closed", tragedy.get());
    }
  }

  /**
    * 内部用于抛出一个 {@link
    * AlreadyClosedException} 如果这个 IndexWriter 已经
    * 已关闭 ({@code closed=true}) 或正在处理
    * 关闭（{@code 关闭=true}）。
    * <p>
    * 调用 {@link #ensureOpen(boolean) ensureOpen(true)}。
    * @throws AlreadyClosedException 如果此 IndexWriter 已关闭
   */
  protected final void ensureOpen() throws AlreadyClosedException {
    ensureOpen(true);
  }