2021SC@SDUSC
lucene调用链自上而下,大概可以分三层, 前四个步骤是逻辑调用层,IndexWriter类将整个Document作为参数调用addDocument方法, IndexWriter下的DocWriter再调用对应的updateDocument去更新文档,最后从线程池种拉出一个DocWriterPerThread对象来执行最终的updateDocument逻辑,再这一层实际上并没有什么实质性地发生
中间一层是索引链的处理逻辑,DefaultIndexingChain是一个非常核心的类,负责对当前文档个建索引的核心操作,它定义了什么时候该写倒排拉链,什么时候写DocValue,什么时候写入StoredField 等。 processDocument 是整个索引链个入口方法,它会负责将整个文档按照Field拆开,分别调用下面的processField方法:
DefaultIndexingChain.processDocument源码
@Override
public void processDocument() throws IOException {
// How many indexed field names we've seen (collapses
// multiple field instances by the same name):
// 这个document 有多少field,记住同名地field只算一次
int fieldCount = 0;
// 这个doc 的版本,每次更新就叠加+1
long fieldGen = nextFieldGen++;
termsHash.startDocument();
startStoredFields(docState.docID);
try {
for (IndexableField field : docState.doc) {
fieldCount = processField(field, fieldGen, fieldCount);
}
} finally {
if (docWriter.hasHitAbortingException() == false) {
// Finish each indexed field name seen in the document:
//依次处理所有term
for (int i=0;i<fieldCount;i++) {
fields[i].finish();
}
finishStoredFields();
}
}
try {
termsHash.finishDocument();
} catch (Throwable th) {
// Must abort, on the possibility that on-disk term
// vectors are now corrupt:
docWriter.onAbortingException(th);
throw th;
}
}
真正的索引链核心执行逻辑还是在processField里面:
DefaultIndexingChain.processField源码
private int processField(IndexableField field, long fieldGen, int fieldCount) throws IOException {
String fieldName = field.name();
IndexableFieldType fieldType = field.fieldType();
PerField fp = null;
if (fieldType.indexOptions() == null) {
throw new NullPointerException("IndexOptions must not be null (field: \"" + field.name() + "\")");
}
// Invert indexed fields:
// 在该Field上面建倒排表
if (fieldType.indexOptions() != IndexOptions.NONE) {
fp = getOrAddField(fieldName, fieldType, true);
boolean first = fp.fieldGen != fieldGen;
fp.invert(field, first);
if (first) {
fields[fieldCount++] = fp;
fp.fieldGen = fieldGen;
}
} else {
verifyUnIndexedFieldType(fieldName, fieldType);
}
// Add stored fields: 存储该field的storedField
if (fieldType.stored()) {
if (fp == null) {
fp = getOrAddField(fieldName, fieldType, false);
}
if (fieldType.stored()) {
String value = field.stringValue();
if (value != null && value.length() > IndexWriter.MAX_STORED_STRING_LENGTH) {
throw new IllegalArgumentException("stored field \"" + field.name() + "\" is too large (" + value.length() + " characters) to store");
}
try {
storedFieldsConsumer.writeField(fp.fieldInfo, field);
} catch (Throwable th) {
docWriter.onAbortingException(th);
throw th;
}
}
}
// 建docValue
DocValuesType dvType = fieldType.docValuesType();
if (dvType == null) {
throw new NullPointerException("docValuesType must not be null (field: \"" + fieldName + "\")");
}
if (dvType != DocValuesType.NONE) {
if (fp == null) {
fp = getOrAddField(fieldName, fieldType, false);
}
indexDocValue(fp, dvType, field);
}
if (fieldType.pointDataDimensionCount() != 0) {
if (fp == null) {
fp = getOrAddField(fieldName, fieldType, false);
}
indexPoint(fp, field);
}
return fieldCount;
}