DocumentsWriter::ThreadState* DocumentsWriter::getThreadState(Document* doc, Term* delTerm) {
SCOPED_LOCK_MUTEX(THIS_LOCK)
// First, find a thread state. If this thread already
// has affinity to a specific ThreadState, use that one
// again.
ThreadState* state = NULL;
if ( threadBindings.find(_LUCENE_CURRTHREADID) == threadBindings.end() ){
// First time this thread has called us since last flush
ThreadState* minThreadState = NULL;
for(size_t i=0;i<threadStates.length;i++) {
ThreadState* ts = threadStates[i];
if (minThreadState == NULL || ts->numThreads < minThreadState->numThreads)
minThreadState = ts;
}
if (minThreadState != NULL && (minThreadState->numThreads == 0 || threadStates.length == MAX_THREAD_STATE)) {
state = minThreadState;
state->numThreads++;
} else {
// Just create a new "private" thread state
threadStates.resize(1+threadStates.length);
//fill the new position
state = threadStates.values[threadStates.length-1] = _CLNEW ThreadState(this);
}
threadBindings.put(_LUCENE_CURRTHREADID, state);
}else{
state = threadBindings[_LUCENE_CURRTHREADID];
}
// Next, wait until my thread state is idle (in case
// it's shared with other threads) and for threads to
// not be paused nor a flush pending:
while(!closed && (!state->isIdle || pauseThreads != 0 || flushPending || abortCount > 0))
CONDITION_WAIT(THIS_LOCK, THIS_WAIT_CONDITION)
if (closed)
_CLTHROWA(CL_ERR_AlreadyClosed, "this IndexWriter is closed");
if (segment.empty())
segment = writer->newSegmentName();
state->isIdle = false;
try {
bool success = false;
try {
state->init(doc, nextDocID);
if (delTerm != NULL) {
addDeleteTerm(delTerm, state->docID);
state->doFlushAfter = timeToFlushDeletes();
}
// Only increment nextDocID & numDocsInRAM on successful init
nextDocID++;
numDocsInRAM++;
// We must at this point commit to flushing to ensure we
// always get N docs when we flush by doc count, even if
// > 1 thread is adding documents:
if (!flushPending && maxBufferedDocs != IndexWriter::DISABLE_AUTO_FLUSH
&& numDocsInRAM >= maxBufferedDocs) {
flushPending = true;
state->doFlushAfter = true;
}
success = true;
} _CLFINALLY (
if (!success) {
// Forcefully idle this ThreadState:
state->isIdle = true;
CONDITION_NOTIFYALL(THIS_WAIT_CONDITION)
if (state->doFlushAfter) {
state->doFlushAfter = false;
flushPending = false;
}
}
)
} catch (AbortException& ae) {
abort(&ae);
}
return state;
}
在Lucene中,对于同一个索引文件夹,只能够有一个IndexWriter打开它,在打开后,在文件夹中,生成文件
write.lock,当其他IndexWriter再试图打开此索引文件夹的时候,则会报错
在同一个进程中,对同一个索引文件夹,只能有一个IndexWriter打开它,因而如
果想多线程向此索引文件夹中添加文档,则必须共享一个IndexWriter,而且在以往的实现中,addDocument
函数是同步的(synchronized),也即多线程的索引并不能起到提高性能的效果。
于是为了支持多线程索引,不使IndexWriter成为瓶颈,对于每一个线程都有一个相应的文档集处理对象
(DocumentsWriterThreadState),这样对文档的索引过程可以多线程并行进行,从而增加索引的速度。
getThreadState函数是同步的(synchronized),DocumentsWriter有一个成员变量threadBindings,它是一
个HashMap,键为线程对象(Thread.currentThread()),值为此线程对应的DocumentsWriterThreadState
象。
如果state.isIdle为false,则此线程等待