Clucene中多线程处理文档集合

DocumentsWriter::ThreadState* DocumentsWriter::getThreadState(Document* doc, Term* delTerm) {
    SCOPED_LOCK_MUTEX(THIS_LOCK)

  // First, find a thread state.  If this thread already
  // has affinity to a specific ThreadState, use that one
  // again.
  ThreadState* state = NULL;
  if ( threadBindings.find(_LUCENE_CURRTHREADID) == threadBindings.end() ){
    // First time this thread has called us since last flush
    ThreadState* minThreadState = NULL;
    for(size_t i=0;i<threadStates.length;i++) {
      ThreadState* ts = threadStates[i];
      if (minThreadState == NULL || ts->numThreads < minThreadState->numThreads)
        minThreadState = ts;
    }
    if (minThreadState != NULL && (minThreadState->numThreads == 0 || threadStates.length == MAX_THREAD_STATE)) {
      state = minThreadState;
      state->numThreads++;
    } else {
      // Just create a new "private" thread state
      threadStates.resize(1+threadStates.length);
      //fill the new position
      state = threadStates.values[threadStates.length-1] = _CLNEW ThreadState(this);
    }
    threadBindings.put(_LUCENE_CURRTHREADID, state);
  }else{
    state = threadBindings[_LUCENE_CURRTHREADID];
  }

  // Next, wait until my thread state is idle (in case
  // it's shared with other threads) and for threads to
  // not be paused nor a flush pending:
  while(!closed && (!state->isIdle || pauseThreads != 0 || flushPending || abortCount > 0))
    CONDITION_WAIT(THIS_LOCK, THIS_WAIT_CONDITION)

  if (closed)
    _CLTHROWA(CL_ERR_AlreadyClosed, "this IndexWriter is closed");

  if (segment.empty())
    segment = writer->newSegmentName();

  state->isIdle = false;

  try {
    bool success = false;
    try {
      state->init(doc, nextDocID);
      if (delTerm != NULL) {
        addDeleteTerm(delTerm, state->docID);
        state->doFlushAfter = timeToFlushDeletes();
      }
      // Only increment nextDocID & numDocsInRAM on successful init
      nextDocID++;
      numDocsInRAM++;

      // We must at this point commit to flushing to ensure we
      // always get N docs when we flush by doc count, even if
      // > 1 thread is adding documents:
      if (!flushPending && maxBufferedDocs != IndexWriter::DISABLE_AUTO_FLUSH
          && numDocsInRAM >= maxBufferedDocs) {
        flushPending = true;
        state->doFlushAfter = true;
      }

      success = true;
    } _CLFINALLY (
      if (!success) {
        // Forcefully idle this ThreadState:
        state->isIdle = true;
        CONDITION_NOTIFYALL(THIS_WAIT_CONDITION)
        if (state->doFlushAfter) {
          state->doFlushAfter = false;
          flushPending = false;
        }
      }
    )
  } catch (AbortException& ae) {
    abort(&ae);
  }

  return state;
}

在Lucene中,对于同一个索引文件夹,只能够有一个IndexWriter打开它,在打开后,在文件夹中,生成文件
write.lock,当其他IndexWriter再试图打开此索引文件夹的时候,则会报错

在同一个进程中,对同一个索引文件夹,只能有一个IndexWriter打开它,因而如
果想多线程向此索引文件夹中添加文档,则必须共享一个IndexWriter,而且在以往的实现中,addDocument
函数是同步的(synchronized),也即多线程的索引并不能起到提高性能的效果。
于是为了支持多线程索引,不使IndexWriter成为瓶颈,对于每一个线程都有一个相应的文档集处理对象
(DocumentsWriterThreadState),这样对文档的索引过程可以多线程并行进行,从而增加索引的速度。

getThreadState函数是同步的(synchronized),DocumentsWriter有一个成员变量threadBindings,它是一
个HashMap,键为线程对象(Thread.currentThread()),值为此线程对应的DocumentsWriterThreadState
象。

如果state.isIdle为false,则此线程等待

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值