Lucene跳表

org.apache.lucene.codecs.MultiLevelSkipListReader skipTo

  
  /** Skips entries to the first beyond the current whose document number is
   *  greater than or equal to <i>target</i>. Returns the current doc count. 
   */
  public int skipTo(int target) throws IOException {

    // walk up the levels until highest level is found that has a skip
    // for this target
    int level = 0;
    // 往上走找到第一个大于target的level。 找不到的话也会结束这个while
    while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
      level++;
    }    

    //当level大于0时执行下面的while
    while (level >= 0) {
        // target如果大于level层的哨兵
      if (target > skipDoc[level]) {
        // 加载下一个SkipDatum
        if (!loadNextSkip(level)) {
        // 不成功则继续在此level往后走
          continue;
        }
      } else {
        // no more skips on this level, go down one level
        // 如果没有更多的SkipDatum则向下走
        if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
          // 往下走
          seekChild(level - 1);
        } 
        level--;
      }
    }
    
    return numSkipped[0] - skipInterval[0] - 1;
  }

skipDoc:哨兵数组

skipStream: 

lastChildPointer:这一层的childPoint

  private boolean loadNextSkip(int level) throws IOException {
    // we have to skip, the target document is greater than the current
    // skip list entry    
    // 设置    
    setLastSkipData(level);
      
    numSkipped[level] += skipInterval[level];

    // numSkipped may overflow a signed int, so compare as unsigned.
    if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
      // this skip list is exhausted
      skipDoc[level] = Integer.MAX_VALUE;
      if (numberOfSkipLevels > level) numberOfSkipLevels = level; 
      return false;
    }

    // read next skip entry
    skipDoc[level] += readSkipData(level, skipStream[level]);
    
    if (level != 0) {
      // read the child pointer if we are not on the leaf level
      // childPointer[level] = 下层的起始位置+下层的偏移量
      // skipPointer[level - 1] : 下一层的其实地址
      // skipStream[level].readVLong(): 下一层的偏移量  
      childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
    }
    
    return true;

  }

(此图引用自参考文档) 

  
  /** Copies the values of the last read skip entry on this level */
  protected void setLastSkipData(int level) {
    lastDoc = skipDoc[level];
    lastChildPointer = childPointer[level];
  }

初始化操作 

org.apache.lucene.backward_codecs.lucene40.blocktree.SegmentTermsEnum postings

 org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsReader postings

 org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsReader reset

    @Override
    public int advance(int target) throws IOException {
      if (target > nextSkipDoc) {
        if (skipper == null) {
          // Lazy init: first time this enum has ever been used for skipping
          skipper =
              new Lucene84SkipReader(
                  docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads);
        }

        if (!skipped) {
          assert skipOffset != -1;
          // This is the first time this enum has skipped
          // since reset() was called; load the skip data:
          // 初始化, 传入跳表的位置信息  
          skipper.init(
              docTermStartFP + skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
          skipped = true;
        }

        final int newDocUpto = skipper.skipTo(target) + 1;

        if (newDocUpto > blockUpto - BLOCK_SIZE + docBufferUpto) {
          // Skipper moved
          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
          blockUpto = newDocUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
          accum = skipper.getDoc();
          docIn.seek(skipper.getDocPointer());
          posPendingFP = skipper.getPosPointer();
          payPendingFP = skipper.getPayPointer();
          posPendingCount = skipper.getPosBufferUpto();
          lastStartOffset = 0; // new document
          payloadByteUpto = skipper.getPayloadByteUpto();
        }
        nextSkipDoc = skipper.getNextSkipDoc();
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
      }

      // Now scan:
      long doc;
      while (true) {
        doc = docBuffer[docBufferUpto];
        freq = (int) freqBuffer[docBufferUpto];
        posPendingCount += freq;
        docBufferUpto++;

        if (doc >= target) {
          break;
        }
      }

      position = 0;
      lastStartOffset = 0;
      return this.doc = (int) doc;
    }

加载跳表 

  /** Initializes the reader, for reuse on a new term. */
  public void init(long skipPointer, int df) throws IOException {
    this.skipPointer[0] = skipPointer;
    this.docCount = df;
    assert skipPointer >= 0 && skipPointer <= skipStream[0].length() 
    : "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
    Arrays.fill(skipDoc, 0);
    Arrays.fill(numSkipped, 0);
    Arrays.fill(childPointer, 0);
    
    for (int i = 1; i < numberOfSkipLevels; i++) {
      skipStream[i] = null;
    }
    loadSkipLevels();
  }





/** Loads the skip levels  */
  private void loadSkipLevels() throws IOException {
    if (docCount <= skipInterval[0]) {
      numberOfSkipLevels = 1;
    } else {
      numberOfSkipLevels = 1+MathUtil.log(docCount/skipInterval[0], skipMultiplier);
    }

    if (numberOfSkipLevels > maxNumberOfSkipLevels) {
      numberOfSkipLevels = maxNumberOfSkipLevels;
    }

    skipStream[0].seek(skipPointer[0]);
    
    int toBuffer = numberOfLevelsToBuffer;
    
    for (int i = numberOfSkipLevels - 1; i > 0; i--) {
      // the length of the current level
      long length = skipStream[0].readVLong();
      
      // the start pointer of the current level
      skipPointer[i] = skipStream[0].getFilePointer();
      if (toBuffer > 0) {
        // buffer this level
        skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
        toBuffer--;
      } else {
        // clone this stream, it is already at the start of the current level
        skipStream[i] = skipStream[0].clone();
        if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
          ((BufferedIndexInput) skipStream[i]).setBufferSize(Math.max(BufferedIndexInput.MIN_BUFFER_SIZE, (int) length));
        }
        
        // move base stream beyond the current level
        // length:本层的长度
        // skipStream[0] 本的层的地址
        // 层与层之间时连续的所以可以这样计算
        skipStream[0].seek(skipStream[0].getFilePointer() + length);
      }
    }
   
    // use base stream for the lowest level
    skipPointer[0] = skipStream[0].getFilePointer();
  }

参考

索引文件的生成(四)-htmlhttps://www.amazingkoala.com.cn/Lucene/Index/2020/0106/124.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值