DocFieldProcessorPerThread中的processDocument
/** Process the document. If there is
* something for this document to be done in docID order,
* you should encapsulate that as a
* DocumentsWriter.DocWriter and return it.
* DocumentsWriter then calls finish() on this object
* when it's its turn.
*/
public DocumentsWriter.DocWriter processDocument() throws IOException {
consumer.startDocument();
fieldsWriter.startDocument();
final Document doc = docState.doc;// 得到当前的document
assert docFieldProcessor.docWriter.writer
.testPoint("DocumentsWriter.ThreadState.init start");
fieldCount = 0;
final int thisFieldGen = fieldGen++;
final List<Fieldable> docFields = doc.getFields();// 得到Document中所有的Field
final int numDocFields = docFields.size();
// Absorb any new fields first seen in this document.
// Also absorb any changes to fields we had already
// seen before (eg suddenly turning on norms or
// vectors, etc.):
// 循环处理docment中的field。
for (int i = 0; i < numDocFields; i++) {
Fieldable field = docFields.get(i);
final String fieldName = field.name();
// Make sure we have a PerField allocated
// 得到field的name的hash值,通过hashMask得到hashPos
final int hashPos = fieldName.hashCode() & hashMask;
// fieldHash存放所以见过的fields的数组,而这个数据下标是和field的hash相关的
DocFieldProcessorPerField fp = fieldHash[hashPos];
// 循环fp,由此说明,fieldHash每一个元素是一个链表,类似于HashMap的存储机制。事实上
// 最终循环得到field
while (fp != null && !fp.fieldInfo.name.equals(fieldName))
fp = fp.next;
// 如果没有得到field就向fieldHash中添加一个field,否则更新
if (fp == null) {
// TODO FI: we need to genericize the "flags" that a
// field holds, and, how these flags are merged; it
// needs to be more "pluggable" such that if I want
// to have a new "thing" my Fields can do, I can
// easily add it
FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(),
field.isTermVectorStored(), field
.isStorePositionWithTermVector(), field
.isStoreOffsetWithTermVector(), field
.getOmitNorms(), false, field
.getOmitTermFreqAndPositions());
fp = new DocFieldProcessorPerField(this, fi);
fp.next = fieldHash[hashPos];
fieldHash[hashPos] = fp;
// 由于fieldHash元素是一个链表,totalFieldCount并不是field的个数,而是当前fieldHash已有元素个数
totalFieldCount++;
if (totalFieldCount >= fieldHash.length / 2)// 扩容,确保fieldHash长度是已有元素的两倍(为什么要是两倍呢?)
rehash();
} else
fp.fieldInfo.update(field.isIndexed(), field
.isTermVectorStored(), field
.isStorePositionWithTermVector(), field
.isStoreOffsetWithTermVector(), field.getOmitNorms(),
false, field.getOmitTermFreqAndPositions());
//将当前field存入fields
if (thisFieldGen != fp.lastGen) {
// First time we're seeing this field for this doc
fp.fieldCount = 0;
if (fieldCount == fields.length) {
final int newSize = fields.length * 2;
DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
System.arraycopy(fields, 0, newArray, 0, fieldCount);
fields = newArray;
}
fields[fieldCount++] = fp;//fields[]存放当前doc
fp.lastGen = thisFieldGen;
}
if (fp.fieldCount == fp.fields.length) {
Fieldable[] newArray = new Fieldable[fp.fields.length * 2];
System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount);
fp.fields = newArray;
}
fp.fields[fp.fieldCount++] = field;
if (field.isStored()) {
fieldsWriter.addField(field, fp.fieldInfo);
}
}
// If we are writing vectors then we must visit
// fields in sorted order so they are written in
// sorted order. TODO: we actually only need to
// sort the subset of fields that have vectors
// enabled; we could save [small amount of] CPU
// here.
quickSort(fields, 0, fieldCount - 1);
for (int i = 0; i < fieldCount; i++)
fields[i].consumer.processFields(fields[i].fields,
fields[i].fieldCount);
if (docState.maxTermPrefix != null && docState.infoStream != null)
docState.infoStream
.println("WARNING: document contains at least one immense term (longer than the max length "
+ DocumentsWriter.MAX_TERM_LENGTH
+ "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '"
+ docState.maxTermPrefix + "...'");
final DocumentsWriter.DocWriter one = fieldsWriter.finishDocument();
final DocumentsWriter.DocWriter two = consumer.finishDocument();
if (one == null) {
return two;
} else if (two == null) {
return one;
} else {
PerDoc both = getPerDoc();
both.docID = docState.docID;
assert one.docID == docState.docID;
assert two.docID == docState.docID;
both.one = one;
both.two = two;
return both;
}
}