org.apache.lucene.codecs.MultiLevelSkipListReader skipTo
/** Skips entries to the first beyond the current whose document number is
* greater than or equal to <i>target</i>. Returns the current doc count.
*/
public int skipTo(int target) throws IOException {
// walk up the levels until highest level is found that has a skip
// for this target
int level = 0;
// 往上走找到第一个大于target的level。 找不到的话也会结束这个while
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
level++;
}
//当level大于0时执行下面的while
while (level >= 0) {
// target如果大于level层的哨兵
if (target > skipDoc[level]) {
// 加载下一个SkipDatum
if (!loadNextSkip(level)) {
// 不成功则继续在此level往后走
continue;
}
} else {
// no more skips on this level, go down one level
// 如果没有更多的SkipDatum则向下走
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
// 往下走
seekChild(level - 1);
}
level--;
}
}
return numSkipped[0] - skipInterval[0] - 1;
}
skipDoc:哨兵数组
skipStream:
lastChildPointer:这一层的childPoint
private boolean loadNextSkip(int level) throws IOException {
// we have to skip, the target document is greater than the current
// skip list entry
// 设置
setLastSkipData(level);
numSkipped[level] += skipInterval[level];
// numSkipped may overflow a signed int, so compare as unsigned.
if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
// this skip list is exhausted
skipDoc[level] = Integer.MAX_VALUE;
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
return false;
}
// read next skip entry
skipDoc[level] += readSkipData(level, skipStream[level]);
if (level != 0) {
// read the child pointer if we are not on the leaf level
// childPointer[level] = 下层的起始位置+下层的偏移量
// skipPointer[level - 1] : 下一层的其实地址
// skipStream[level].readVLong(): 下一层的偏移量
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
}
return true;
}
(此图引用自参考文档)
/** Copies the values of the last read skip entry on this level */
protected void setLastSkipData(int level) {
lastDoc = skipDoc[level];
lastChildPointer = childPointer[level];
}
初始化操作
org.apache.lucene.backward_codecs.lucene40.blocktree.SegmentTermsEnum postings
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsReader postings
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsReader reset
@Override
public int advance(int target) throws IOException {
if (target > nextSkipDoc) {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
skipper =
new Lucene84SkipReader(
docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads);
}
if (!skipped) {
assert skipOffset != -1;
// This is the first time this enum has skipped
// since reset() was called; load the skip data:
// 初始化, 传入跳表的位置信息
skipper.init(
docTermStartFP + skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
skipped = true;
}
final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > blockUpto - BLOCK_SIZE + docBufferUpto) {
// Skipper moved
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
blockUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc();
docIn.seek(skipper.getDocPointer());
posPendingFP = skipper.getPosPointer();
payPendingFP = skipper.getPayPointer();
posPendingCount = skipper.getPosBufferUpto();
lastStartOffset = 0; // new document
payloadByteUpto = skipper.getPayloadByteUpto();
}
nextSkipDoc = skipper.getNextSkipDoc();
}
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
// Now scan:
long doc;
while (true) {
doc = docBuffer[docBufferUpto];
freq = (int) freqBuffer[docBufferUpto];
posPendingCount += freq;
docBufferUpto++;
if (doc >= target) {
break;
}
}
position = 0;
lastStartOffset = 0;
return this.doc = (int) doc;
}
加载跳表
/** Initializes the reader, for reuse on a new term. */
public void init(long skipPointer, int df) throws IOException {
this.skipPointer[0] = skipPointer;
this.docCount = df;
assert skipPointer >= 0 && skipPointer <= skipStream[0].length()
: "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
Arrays.fill(skipDoc, 0);
Arrays.fill(numSkipped, 0);
Arrays.fill(childPointer, 0);
for (int i = 1; i < numberOfSkipLevels; i++) {
skipStream[i] = null;
}
loadSkipLevels();
}
/** Loads the skip levels */
private void loadSkipLevels() throws IOException {
if (docCount <= skipInterval[0]) {
numberOfSkipLevels = 1;
} else {
numberOfSkipLevels = 1+MathUtil.log(docCount/skipInterval[0], skipMultiplier);
}
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
numberOfSkipLevels = maxNumberOfSkipLevels;
}
skipStream[0].seek(skipPointer[0]);
int toBuffer = numberOfLevelsToBuffer;
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
// the length of the current level
long length = skipStream[0].readVLong();
// the start pointer of the current level
skipPointer[i] = skipStream[0].getFilePointer();
if (toBuffer > 0) {
// buffer this level
skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
toBuffer--;
} else {
// clone this stream, it is already at the start of the current level
skipStream[i] = skipStream[0].clone();
if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
((BufferedIndexInput) skipStream[i]).setBufferSize(Math.max(BufferedIndexInput.MIN_BUFFER_SIZE, (int) length));
}
// move base stream beyond the current level
// length:本层的长度
// skipStream[0] 本的层的地址
// 层与层之间时连续的所以可以这样计算
skipStream[0].seek(skipStream[0].getFilePointer() + length);
}
}
// use base stream for the lowest level
skipPointer[0] = skipStream[0].getFilePointer();
}
参考
索引文件的生成(四)-htmlhttps://www.amazingkoala.com.cn/Lucene/Index/2020/0106/124.html