solr源码分析--addDocument

最新推荐文章于 2018-08-03 10:30:26 发布

ferraborghini

最新推荐文章于 2018-08-03 10:30:26 发布

阅读量1k

点赞数

分类专栏：大数据检索文章标签： solr 源码

本文链接：https://blog.csdn.net/u011426341/article/details/78937173

版权

马上2017就要结束了，今年研究了不少solr相关的问题，年末的时候还是做一个总结吧。明年说不定就开始搞ES了。

一、数据流传递过程

数据添加的API是以post的形式发送，此处以solrj数据导入为例，接收数据就用到了JavabinLoader。前面的SolrDispatcher流程暂时跳过，直接看handleRequestBody的处理过程。
ContentStreamHandlerBase.java

  @Override
  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    SolrParams params = req.getParams();
    UpdateRequestProcessorChain processorChain =
        req.getCore().getUpdateProcessorChain(params);

    UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp);

    try {
      ContentStreamLoader documentLoader = newLoader(req, processor); // 此处的loader就是JavaBinLoader


      Iterable<ContentStream> streams = req.getContentStreams();
      if (streams == null) {
        if (!RequestHandlerUtils.handleCommit(req, processor, params, false) && !RequestHandlerUtils.handleRollback(req, processor, params, false)) {
          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "missing content stream");
        }
      } else {

        for (ContentStream stream : streams) {
          documentLoader.load(req, rsp, stream, processor); // 具体的数据流处理流程
        }

        // Perhaps commit from the parameters
        RequestHandlerUtils.handleCommit(req, processor, params, false);
        RequestHandlerUtils.handleRollback(req, processor, params, false);
      }
    } finally {
      // finish the request
      processor.finish();
    }
  }

这些操作只算是数据的预处理。

public class JavabinLoader extends ContentStreamLoader {
   

  @Override
  public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream, UpdateRequestProcessor processor) throws Exception {
    InputStream is = null;
    try {
      is = stream.getStream();
      parseAndLoadDocs(req, rsp, is, processor);
    } finally {
      if(is != null) {
        is.close();
      }
    }
  }

  private void parseAndLoadDocs(final SolrQueryRequest req, SolrQueryResponse rsp, InputStream stream,
                                final UpdateRequestProcessor processor) throws IOException {
    UpdateRequest update = null;
    JavaBinUpdateRequestCodec.StreamingUpdateHandler handler = new JavaBinUpdateRequestCodec.StreamingUpdateHandler() { // 匿名类，定义doc的处理逻辑，关注update方法
      private AddUpdateCommand addCmd = null;

      @Override
      public void update(SolrInputDocument document, UpdateRequest updateRequest, Integer commitWithin, Boolean overwrite) { // 需要注意的是，solrJ可以提交多个doc，但在这里进行处理的时候，只会一各一个的串行处理。
        if (document == null) {
          // Perhaps commit from the parameters
          try {
            RequestHandlerUtils.handleCommit(req, processor, updateRequest.getParams(), false);
            RequestHandlerUtils.handleRollback(req, processor, updateRequest.getParams(), false);
          } catch (IOException e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ERROR handling commit/rollback");
          }
          return;
        }
        if (addCmd == null) {
          addCmd = getAddCommand(req, updateRequest.getParams());
        }
        addCmd.solrDoc = document;
        if (commitWithin != null) {
          addCmd.commitWithin = commitWithin;
        }
        if (overwrite != null) {
          addCmd.overwrite = overwrite;
        }

        if (updateRequest.isLastDocInBatch()) {
          // this is a hint to downstream code that indicates we've sent the last doc in a batch
          addCmd.isLastDocInBatch = true;
        }

        try {
          processor.processAdd(addCmd); // 使用构造的addCmd传入processor进行添加请求的处理。
          addCmd.clear();
        } catch (IOException e) {
          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ERROR adding document " + document, e);
        }
      }
    };
    FastInputStream in = FastInputStream.wrap(stream);
    for (; ; ) {
      try {
        update = new JavaBinUpdateRequestCodec().unmarshal(in, handler); // 最终会调用上面匿名类的update方法，进行doc添加操作
      } catch (EOFException e) {
        break; // this is expected
      }
      if (update.getDeleteByIdMap() != null || update.getDeleteQuery() != null) {
        delete(req, update, processor);
      }
    }
  }
......
......
}

　　具体看一下JavaBinUpdateRequestCodec的处理逻辑，主要就是定义数据流的处理方式，然后调用上面定义的handler。

  public UpdateRequest unmarshal(InputStream is, final StreamingUpdateHandler handler) throws IOException {
    final UpdateRequest updateRequest = new UpdateRequest();
    List<List<NamedList>> doclist;
    List<Entry<SolrInputDocument,Map<Object,Object>>>  docMap;
    List<String> delById;
    Map<String,Map<String,Object>> delByIdMap;
    List<String> delByQ;
    final NamedList[] namedList = new NamedList[1];
    JavaBinCodec codec = new JavaBinCodec() { // 定义url请求传入的stream的读取方式，并遍历stream中的每一个doc.

      // NOTE: this only works because this is an anonymous inner class 
      // which will only ever be used on a single stream -- if this class 
      // is ever refactored, this will not work.
      private boolean seenOuterMostDocIterator = false;

      @Override
      public NamedList readNamedList(DataInputInputStream dis) throws IOException {
        int sz = readSize(dis);
        NamedList nl = new NamedList();
        if (namedList[0] == null) {
          namedList[0] = nl;
        }
        for (int i = 0; i < sz; i++) {
          String name = (String) readVal(dis);
          Object val = readVal(dis);
          nl.add(name, val);
        }
        return nl;
      }

      @Override
      public List readIterator(DataInputInputStream fis) throws IOException {
        // default behavior for reading any regular Iterator in the stream
        if (seenOuterMostDocIterator) return super.readIterator(fis);

        // special treatment for first outermost Iterator 
        // (the list of documents)
        seenOuterMostDocIterator = true;
        return readOuterMostDocIterator(fis);
      }

      private List readOuterMostDocIterator(DataInputInputStream fis) throws IOException {
        NamedList params = (NamedList) namedList[0].get("params");
        updateRequest.setParams(new ModifiableSolrParams(SolrParams.toSolrParams(params)));
        if (handler == null) return super.readIterator(fis);
        Integer commitWithin = null;