Elasticsearch写入流程源码分析（二）

最新推荐文章于 2023-01-31 17:40:26 发布

FFFro_es

最新推荐文章于 2023-01-31 17:40:26 发布

阅读量339

点赞数

分类专栏： Elasticsearch 文章标签： elasticsearch

本文链接：https://blog.csdn.net/qq_42848795/article/details/107569198

版权

Elasticsearch 专栏收录该内容

6 篇文章 2 订阅

订阅专栏

接着上文源码分析（一），判断完pipeline以后就会进行pipeline处理：

        //上面判断完是否有pipeline，然后在这里进行执行
        //只要bulk请求中有一个请求有pipeline，就会走下面
        if (hasIndexRequestsWithPipelines) {
            // this method (doExecute) will be called again, but with the bulk requests updated from the ingest node processing but
            // also with IngestService.NOOP_PIPELINE_NAME on each request. This ensures that this on the second time through this method,
            // this path is never taken.
            try {
                //看本节点是否是ingest节点
                if (clusterService.localNode().isIngestNode()) {
                    //ingest处理请求
                    processBulkIndexIngestRequest(task, bulkRequest, listener);
                } else {
                    //如果本节点不是ingest节点，应该是去找ingest节点去处理请求
                    ingestForwarder.forwardIngestRequest(BulkAction.INSTANCE, bulkRequest, listener);
                }
            } catch (Exception e) {
                listener.onFailure(e);
            }
            return;
        }

然后需要判断是否可以自动创建索引

//判断是否可以自动创建索引
        if (needToCheck()) {
            // Attempt to create all the indices that we're going to need during the bulk before we start.
            // Step 1: collect all the indices in the request
            //获取请求中索引，过滤掉删除等
            final Set<String> indices = bulkRequest.requests.stream()
                    // delete requests should not attempt to create the index (if the index does not
                    // exists), unless an external versioning is used
                .filter(request -> request.opType() != DocWriteRequest.OpType.DELETE
                        || request.versionType() == VersionType.EXTERNAL
                        || request.versionType() == VersionType.EXTERNAL_GTE)
                .map(DocWriteRequest::index)
                .collect(Collectors.toSet());
            /* Step 2: filter that to indices that don't exist and we can create. At the same time build a map of indices we can't create
             * that we'll use when we try to run the requests. */
            //请求中不能创建索引的的map
            final Map<String, IndexNotFoundException> indicesThatCannotBeCreated = new HashMap<>();
            Set<String> autoCreateIndices = new HashSet<>();
            ClusterState state = clusterService.state();
            for (String index : indices) {
                boolean shouldAutoCreate;
                try {
                    //判断是否需要自动创建索引
                    shouldAutoCreate = shouldAutoCreate(index, state);
                } catch (IndexNotFoundException e) {
                    shouldAutoCreate = false;
                    indicesThatCannotBeCreated.put(index, e);
                }
                if (shouldAutoCreate) {
                    autoCreateIndices.add(index);
                }
            }
            // Step 3: create all the indices that are missing, if there are any missing. start the bulk after all the creates come back.
            //没有需要创建的索引则带上不能创建索引的map执行操作
            if (autoCreateIndices.isEmpty()) {
                executeBulk(task, bulkRequest, startTime, listener, responses, indicesThatCannotBeCreated);
            } else {
                //使用监听器用到原子型的integer
                final AtomicInteger counter = new AtomicInteger(autoCreateIndices.size());
                for (String index : autoCreateIndices) {
                    createIndex(index, bulkRequest.timeout(), new ActionListener<CreateIndexResponse>() {
                        @Override
                        public void onResponse(CreateIndexResponse result) {
                            //等索引都创建完以后再使用线程池来执行请求（为什么上面没用）
                            if (counter.decrementAndGet() == 0) {
                                threadPool.executor(ThreadPool.Names.WRITE).execute(
                                    () -> executeBulk(task, bulkRequest, startTime, listener, responses, indicesThatCannotBeCreated));
                            }
                        }

                        @Override
                        public void onFailure(Exception e) {
                            if (!(ExceptionsHelper.unwrapCause(e) instanceof ResourceAlreadyExistsException)) {
                                // fail all requests involving this index, if create didn't work
                                for (int i = 0; i < bulkRequest.requests.size(); i++) {
                                    DocWriteRequest<?> request = bulkRequest.requests.get(i);
                                    //对于执行创建索引失败的请求设置失败返回值
                                    if (request != null && setResponseFailureIfIndexMatches(responses, i, request, index, e)) {
                                        //将请求设为null
                                        bulkRequest.requests.set(i, null);
                                    }
                                }
                            }
                            if (counter.decrementAndGet() == 0) {
                            // 都执行完以后去执行bulk操作
                               executeBulk(task, bulkRequest, startTime, ActionListener.wrap(listener::onResponse, inner -> {
                                    inner.addSuppressed(e);
                                    listener.onFailure(inner);
                                }), responses, indicesThatCannotBeCreated);
                            }
                        }
                    });
                }
            }
        } else {
            executeBulk(task, bulkRequest, startTime, listener, responses, emptyMap());
        }
    }

executeBulk这个方法后会调用org.elasticsearch.action.bulk.TransportBulkAction.BulkOperation#doRun这个线程方法

//线程执行bulk操作
        @Override
        protected void doRun() throws Exception {
            //通过定义好的观察者获取集群的一个转发
            final ClusterState clusterState = observer.setAndGetObservedState();
            //如果集群阻塞则不执行
            if (handleBlockExceptions(clusterState)) {
                return;
            }

            //获取当前的索引信息，包括不可以创建的索引
            final ConcreteIndices concreteIndices = new ConcreteIndices(clusterState, indexNameExpressionResolver);
            //集群的metaData信息
            MetaData metaData = clusterState.metaData();
            for (int i = 0; i < bulkRequest.requests.size(); i++) {
                DocWriteRequest<?> docWriteRequest = bulkRequest.requests.get(i);
                //the request can only be null because we set it to null in the previous step, so it gets ignored
                if (docWriteRequest == null) {
                    continue;
                }
                //如果索引不可用的，添加失败操作
                if (addFailureIfIndexIsUnavailable(docWriteRequest, i, concreteIndices, metaData)) {
                    continue;
                }
                Index concreteIndex = concreteIndices.resolveIfAbsent(docWriteRequest);
                try {
                    switch (docWriteRequest.opType()) {
                        case CREATE:
                        case INDEX:
                            IndexRequest indexRequest = (IndexRequest) docWriteRequest;
                            //获取索引信息
                            final IndexMetaData indexMetaData = metaData.index(concreteIndex);
                            //获取mapping
                            MappingMetaData mappingMd = indexMetaData.mappingOrDefault();
                            Version indexCreated = indexMetaData.getCreationVersion();
                            indexRequest.resolveRouting(metaData);
                            //判断routing和id
                            indexRequest.process(indexCreated, mappingMd, concreteIndex.getName());
                            break;
                        case UPDATE:
                            //update的操作
                            TransportUpdateAction.resolveAndValidateRouting(metaData, concreteIndex.getName(),
                                (UpdateRequest) docWriteRequest);
                            break;
                        case DELETE:
                            docWriteRequest.routing(metaData.resolveWriteIndexRouting(docWriteRequest.routing(), docWriteRequest.index()));
                            // check if routing is required, if so, throw error if routing wasn't specified
                            if (docWriteRequest.routing() == null && metaData.routingRequired(concreteIndex.getName())) {
                                throw new RoutingMissingException(concreteIndex.getName(), docWriteRequest.type(), docWriteRequest.id());
                            }
                            break;
                        default: throw new AssertionError("request type not supported: [" + docWriteRequest.opType() + "]");
                    }
                } catch (ElasticsearchParseException | IllegalArgumentException | RoutingMissingException e) {
                    BulkItemResponse.Failure failure = new BulkItemResponse.Failure(concreteIndex.getName(), docWriteRequest.type(),
                        docWriteRequest.id(), e);
                    BulkItemResponse bulkItemResponse = new BulkItemResponse(i, docWriteRequest.opType(), failure);
                    responses.set(i, bulkItemResponse);
                    // make sure the request gets never processed again
                    bulkRequest.requests.set(i, null);
                }
            }

            // first, go over all the requests and create a ShardId -> Operations mapping
            // 首先，检查所有请求并创建一个ShardId-> Operations映射 把在一个分片上执行的集合起来
            Map<ShardId, List<BulkItemRequest>> requestsByShard = new HashMap<>();
            for (int i = 0; i < bulkRequest.requests.size(); i++) {
                DocWriteRequest<?> request = bulkRequest.requests.get(i);
                if (request == null) {
                    continue;
                }
                String concreteIndex = concreteIndices.getConcreteIndex(request.index()).getName();
                ShardId shardId = clusterService.operationRouting().indexShards(clusterState, concreteIndex, request.id(),
                    request.routing()).shardId();
                List<BulkItemRequest> shardRequests = requestsByShard.computeIfAbsent(shardId, shard -> new ArrayList<>());
                shardRequests.add(new BulkItemRequest(i, request));
            }

            //分片为空，则返回失败
            if (requestsByShard.isEmpty()) {
                listener.onResponse(new BulkResponse(responses.toArray(new BulkItemResponse[responses.length()]),
                    buildTookInMillis(startTimeNanos)));
                return;
            }

            // 又是原子Integer操作
            final AtomicInteger counter = new AtomicInteger(requestsByShard.size());
            String nodeId = clusterService.localNode().getId();
            for (Map.Entry<ShardId, List<BulkItemRequest>> entry : requestsByShard.entrySet()) {
                final ShardId shardId = entry.getKey();
                final List<BulkItemRequest> requests = entry.getValue();
                BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, bulkRequest.getRefreshPolicy(),
                        requests.toArray(new BulkItemRequest[requests.size()]));
                // 可用分片
                bulkShardRequest.waitForActiveShards(bulkRequest.waitForActiveShards());
                // 超时
                bulkShardRequest.timeout(bulkRequest.timeout());
                // 任务
                if (task != null) {
                    bulkShardRequest.setParentTask(nodeId, task.getId());
                }
                // 去分片上执行操作
                // org.elasticsearch.action.support.replication.TransportReplicationAction 执行doExecute这个方法
                shardBulkAction.execute(bulkShardRequest, new ActionListener<BulkShardResponse>() {
                    @Override
                    public void onResponse(BulkShardResponse bulkShardResponse) {
                        for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) {
                            // we may have no response if item failed
                            if (bulkItemResponse.getResponse() != null) {
                                bulkItemResponse.getResponse().setShardInfo(bulkShardResponse.getShardInfo());
                            }
                            //给返回值
                            responses.set(bulkItemResponse.getItemId(), bulkItemResponse);
                        }
                        //全部都执行完
                        if (counter.decrementAndGet() == 0) {
                            finishHim();
                        }
                    }

                    @Override
                    public void onFailure(Exception e) {
                        // create failures for all relevant requests
                        for (BulkItemRequest request : requests) {
                            final String indexName = concreteIndices.getConcreteIndex(request.index()).getName();
                            DocWriteRequest<?> docWriteRequest = request.request();
                            responses.set(request.id(), new BulkItemResponse(request.id(), docWriteRequest.opType(),
                                    new BulkItemResponse.Failure(indexName, docWriteRequest.type(), docWriteRequest.id(), e)));
                        }
                        //全部都执行完
                        if (counter.decrementAndGet() == 0) {
                            finishHim();
                        }
                    }

                    // 结束请求
                    private void finishHim() {
                        listener.onResponse(new BulkResponse(responses.toArray(new BulkItemResponse[responses.length()]),
                            buildTookInMillis(startTimeNanos)));
                    }
                });
            }
        }

将请求在同一个分片上的集合起来，然后去执行请求org.elasticsearch.action.support.replication.TransportReplicationAction.ReroutePhase#doRun

@Override
        protected void doRun() {
            setPhase(task, "routing");
            // 获取集群状态
            final ClusterState state = observer.setAndGetObservedState();
            // 获取当前的索引
            final String concreteIndex = concreteIndex(state, request);
            // 查看集群是否阻塞
            final ClusterBlockException blockException = blockExceptions(state, concreteIndex);
            if (blockException != null) {
                if (blockException.retryable()) {
                    logger.trace("cluster is blocked, scheduling a retry", blockException);
                    retry(blockException);
                } else {
                    finishAsFailed(blockException);
                }
            } else {
                // request does not have a shardId yet, we need to pass the concrete index to resolve shardId
                // 获取索引信息
                final IndexMetaData indexMetaData = state.metaData().index(concreteIndex);
                // 查看索引信息是否存在
                if (indexMetaData == null) {
                    retry(new IndexNotFoundException(concreteIndex));
                    return;
                }
                // 查看索引是否关闭状态
                if (indexMetaData.getState() == IndexMetaData.State.CLOSE) {
                    throw new IndexClosedException(indexMetaData.getIndex());
                }

                // resolve all derived request fields, so we can route and apply it
                // 设定执行需要存在的分片数
                resolveRequest(indexMetaData, request);
                assert request.waitForActiveShards() != ActiveShardCount.DEFAULT :
                    "request waitForActiveShards must be set in resolveRequest";

                // 查看分片的主分片信息
                final ShardRouting primary = primary(state);
                if (retryIfUnavailable(state, primary)) {
                    return;
                }
                // 获取主分片上所在的节点信息
                final DiscoveryNode node = state.nodes().get(primary.currentNodeId());
                // 如果主分片在本地，则执行本地的方法
                if (primary.currentNodeId().equals(state.nodes().getLocalNodeId())) {
                    performLocalAction(state, primary, node, indexMetaData);
                } else {
                    // 如果主分片不再本地，则调用远程的方法
                    performRemoteAction(state, primary, node);
                }
            }
        }

FFFro_es

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
Elasticsearch写入流程源码分析（二）

接着上文源码分析（一），判断完pipeline以后就会进行pipeline处理： //上面判断完是否有pipeline，然后在这里进行执行 //只要bulk请求中有一个请求有pipeline，就会走下面 if (hasIndexRequestsWithPipelines) { // this method (doExecute) will be called again, but with the bulk requests updat
复制链接

扫一扫