Hive 自动设置任务并发度的执行过程

参数设置

Hive 参数hive.tez.auto.reducer.parallelism 设置是否可以自动设置 reduce 任务的并行度,默认是 false。
默认并行度的范围是从 默认reduceNum * 0.25 到 默认reduceNum * 2。系统根据参数 hive.exec.reducers.bytes.per.reducer,如果本阶段所有 reduce 需要shuffle 的数据量,除以 hive.exec.reducers.bytes.per.reducer,得到估计的 reduce 的数量。
在 reducer 的上一个阶段,还是按照默认的 reducer 的数量来输出数据到本地磁盘。如原来设置 reducer 的个数为 10,则上一阶段在运行的时候,shuffle 的数据还是分成 10 份。上一阶段的各 task 在结束的时候,汇报本 task 需要 shuffle 的数据量。

ShuffleVertexManager 根据 shuffle 的数据量和最大分区数量,最小分区数据计算目标的分区数量。
如 source vertex 的每个 task 的输出 partition 的数量是10,代表了原来默认target vertex 的并行度是 10. 现在改成 4, 除了最后一个 task 计算 1 份 output 外,其余都计算 3 份 output.

GenTezUtils

createReduceWork

if (reduceWork.isAutoReduceParallelism()) {
      edgeProp =
          new TezEdgeProperty(context.conf, edgeType, true, reduceWork.isSlowStart(),
              reduceWork.getMinReduceTasks(), reduceWork.getMaxReduceTasks(), bytesPerReducer);
}

DagUtils#setupAutoReducerParallelism

private void setupAutoReducerParallelism(TezEdgeProperty edgeProp, Vertex v)
    throws IOException {
    if (edgeProp.isAutoReduce()) {
      Configuration pluginConf = new Configuration(false);
      VertexManagerPluginDescriptor desc =
          VertexManagerPluginDescriptor.create(ShuffleVertexManager.class.getName());
      pluginConf.setBoolean(
          ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true);
      pluginConf.setInt(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM,
          edgeProp.getMinReducer());
      pluginConf.setLong(
          ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
          edgeProp.getInputSizePerReducer());
      UserPayload payload = TezUtils.createUserPayloadFromConf(pluginConf);
      desc.setUserPayload(payload);
      v.setVertexManagerPlugin(desc);
    }
  }

ShuffleVertexManager#initConfiguration

根据配置生成 ShuffleVertexManagerBaseConfig 对象。

ShuffleVertexManagerBaseConfig initConfiguration() {
    float slowStartMinFraction = conf.getFloat(
        TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION,
        TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT);

    mgrConfig = new ShuffleVertexManagerConfig(
        conf.getBoolean(
            TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
            TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL_DEFAULT),
        conf.getLong(
            TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
            TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE_DEFAULT),
        slowStartMinFraction,
        conf.getFloat(
            TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION,
            Math.max(slowStartMinFraction,
            TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION_DEFAULT)),
        Math.max(1, conf
            .getInt(TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM,
            TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM_DEFAULT)));
    return mgrConfig;
  }

ShuffleVertexManagerBase#processPendingTasks

This method is called onVertexStarted, onSourceTaskCompleted, handleVertexStateUpdate.

private void processPendingTasks(TaskAttemptIdentifier completedSourceAttempt) {
  
    if(config.isAutoParallelismEnabled()) {
      if (!determineParallelismAndApply()) {
        //try to determine parallelism later when more info is available.
        return;
      }
    }
    processPendingTasks();
    schedulePendingTasks(completedSourceAttempt);
  }
 private boolean determineParallelismAndApply() {
    return determineParallelismAndApply(
        getMinSourceVertexCompletedTaskFraction());
  }
@VisibleForTesting
  boolean determineParallelismAndApply(
      float minSourceVertexCompletedTaskFraction) {
        ReconfigVertexParams params = computeRouting();
        if (params != null) {
          reconfigVertex(params.getFinalParallelism());
          updatePendingTasks();
          postReconfigVertex();
        }
  }

computeRouting

computeRouting 确定任务最终的并行度,并且为每个 SourceVertexInfo 对象分配一个 EdgeManagerPluginDescriptor 对象。

ReconfigVertexParams computeRouting() {
// Omit some lines
    EdgeManagerPluginDescriptor descriptor =
        EdgeManagerPluginDescriptor.create(CustomShuffleEdgeManager.class.getName());
    descriptor.setUserPayload(edgeManagerConfig.toUserPayload());

    Iterable<Map.Entry<String, SourceVertexInfo>> bipartiteItr = getBipartiteInfo();
    for(Map.Entry<String, SourceVertexInfo> entry : bipartiteItr) {
      entry.getValue().newDescriptor = descriptor;
    }
    ReconfigVertexParams params =
        new ReconfigVertexParams(finalTaskParallelism, null);
    return params;
  }

reconfigVertex

private void reconfigVertex(final int finalTaskParallelism) {
    Map<String, EdgeProperty> edgeProperties =
        new HashMap<String, EdgeProperty>(bipartiteSources);
   // Omit some lines 
    getContext().reconfigureVertex(finalTaskParallelism, null, edgeProperties);
  }

VertexManager$VertexManagerPluginContextImpl#reconfigureVertex

 @Override
public synchronized void reconfigureVertex(int parallelism, VertexLocationHint vertexLocationHint,
     Map<String, EdgeProperty> sourceEdgeProperties,
     Map<String, InputSpecUpdate> rootInputSpecUpdate) {
   checkAndThrowIfDone();
   try {
     managedVertex.reconfigureVertex(parallelism, vertexLocationHint, sourceEdgeProperties,
         rootInputSpecUpdate);
   } catch (AMUserCodeException e) {
     throw new TezUncheckedException(e);
   }
 }

VertexImpl#reconfigureVertex

 @Override
  public void reconfigureVertex(int parallelism,
      @Nullable VertexLocationHint locationHint,
      @Nullable Map<String, EdgeProperty> sourceEdgeProperties) throws AMUserCodeException {
    setParallelismWrapper(parallelism, locationHint, sourceEdgeProperties, null, true);
  }

VertexImpl#setParallelismWrapper

重新调整 reduces 任务。

private void setParallelismWrapper(int parallelism, VertexLocationHint vertexLocationHint,
      Map<String, EdgeProperty> sourceEdgeProperties,
      Map<String, InputSpecUpdate> rootInputSpecUpdates,
      boolean fromVertexManager) throws AMUserCodeException {

    this.setParallelismCalledFlag = true;
    try {
     
      // Input initializer/Vertex Manager/1-1 split expected to set parallelism.
      if (numTasks == -1) {
        stateChangeNotifier.stateChanged(vertexId,
            new VertexStateUpdateParallelismUpdated(vertexName, numTasks, oldNumTasks));
        this.createTasks();
        setVertexLocationHint(vertexLocationHint);
        LOG.info("Vertex " + getLogIdentifier() +
            " parallelism set to " + parallelism);
        if (canInitVertex()) {
          getEventHandler().handle(new VertexEvent(getVertexId(), VertexEventType.V_READY_TO_INIT));
        }
      } else {

          if (parallelism > numTasks) {
            addTasks((parallelism));
          } else if (parallelism < numTasks) {
            removeTasks(parallelism);
          }
        
        // notify listeners
        stateChangeNotifier.stateChanged(vertexId,
            new VertexStateUpdateParallelismUpdated(vertexName, numTasks, oldNumTasks));
    } finally {
      writeLock.unlock();
    }
  }

postReconfigVertex

postReconfigVertex 重新定义 source vertex 的 outputs 和 target vertex 的对应关系。
如源 target vertex 的并行度是 10, source vertex 在计算时,把 output 分成 10 份,每份对应 target vertex 的一个 task。
现在把 target vertex 的的并行度重新调整为 4,那么前三个 task,每个 task 处理 3 份 output 数据,最后一个 task 处理 1 份 output 数据。

@Override
  void postReconfigVertex() {
      configureTargetMapping(pendingTasks.size());
  }

把每个 task 处理的 outputs 对应的数据放到二维数组 targetIndexes 里。
此数组第1维的长度等于 task 的数量,targetIndexes[taskId] 对应的数组,就是此 task 要处理的 count。

private void configureTargetMapping(int tasks) {
    targetIndexes = new int[tasks][];
    for (int idx = 0; idx < tasks; ++idx) {
      int partitionRange = basePartitionRange;
      if (idx == (tasks - 1)) {
        partitionRange = ((remainderRangeForLastShuffler > 0)
            ? remainderRangeForLastShuffler : basePartitionRange);
      }
      // skip the basePartitionRange per destination task
      targetIndexes[idx] = createIndices(partitionRange, idx, basePartitionRange);
      if (LOG.isDebugEnabled()) {
        LOG.debug("targetIdx[{}] to {}", idx,
            Arrays.toString(targetIndexes[idx]));
      }
    }
  }

TaskAttemptImpl

当 task attempt 成功时,向 vertex 发送一个 VertexEventRouteEvent event.

List<TezEvent> tezEvents = taFinishedEvent.getTAGeneratedEvents();
if (tezEvents != null && !tezEvents.isEmpty()) {
  ta.sendEvent(new VertexEventRouteEvent(ta.getVertexID(), tezEvents));
}

VertexImpl

private static class RouteEventTransition  implements
  MultipleArcTransition<VertexImpl, VertexEvent, VertexState> {
    @Override
    public VertexState transition(VertexImpl vertex, VertexEvent event) {
      VertexEventRouteEvent rEvent = (VertexEventRouteEvent) event;
      List<TezEvent> tezEvents = rEvent.getEvents();
      try {
        vertex.handleRoutedTezEvents(tezEvents, false);
      } catch (AMUserCodeException e) {
       ...
      }
      return vertex.getState();
    }
  }

handleRoutedTezEvents

vertex 发送事件给 target vertex.

case DATA_MOVEMENT_EVENT:
case COMPOSITE_DATA_MOVEMENT_EVENT:
{
  if (isEventFromVertex(this, sourceMeta)) {
    // event from this vertex. send to destination vertex
    TezTaskAttemptID srcTaId = sourceMeta.getTaskAttemptID();
    if (tezEvent.getEventType() == EventType.DATA_MOVEMENT_EVENT) {
      ((DataMovementEvent) tezEvent.getEvent()).setVersion(srcTaId.getId());
    } else if (tezEvent.getEventType() == EventType.COMPOSITE_DATA_MOVEMENT_EVENT) {
      ((CompositeDataMovementEvent) tezEvent.getEvent()).setVersion(srcTaId.getId());
    } else {
      ((InputFailedEvent) tezEvent.getEvent()).setVersion(srcTaId.getId());
    }
    Vertex destVertex = getDAG().getVertex(sourceMeta.getEdgeVertexName());
    Edge destEdge = targetVertices.get(destVertex);
    
    eventHandler.handle(new VertexEventRouteEvent(destVertex
        .getVertexId(), Collections.singletonList(tezEvent)));
}

handleRoutedTezEvents

Target vertex 收到此 event 后,如果没有调度 task,则放到 pendingTaskEvents 队列里,等待调度之后,再重新调度 pendingTaskEvents 队列的事件。
如果已经调度,则会调用 processOnDemandEvent。

if (tasksNotYetScheduled) {
  // this is only needed to support mixed mode routing. Else for
  // on demand routing events can be directly added to taskEvents
  // when legacy routing is removed then pending task events can be
  // removed.
  pendingTaskEvents.add(tezEvent);
} else {
  // event not from this vertex. must have come from source vertex.
  int srcTaskIndex = sourceMeta.getTaskAttemptID().getTaskID().getId();
  Vertex edgeVertex = getDAG().getVertex(sourceMeta.getTaskVertexName());
  Edge srcEdge = sourceVertices.get(edgeVertex);
  if (srcEdge.hasOnDemandRouting()) {
    processOnDemandEvent(tezEvent, srcEdge, srcTaskIndex);
  } else {
    // send to tasks            
    srcEdge.sendTezEventToDestinationTasks(tezEvent);
  }
}

Vertex#processOnDemandEvent

private void processOnDemandEvent(TezEvent tezEvent, Edge srcEdge, int srcTaskIndex) {
   onDemandRouteEvents.add(new EventInfo(tezEvent, srcEdge, srcTaskIndex));
  }

VertexImpl#getTaskAttemptTezEvents

The target vertex task attempt get its events.

@Override
  public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID,
      int fromEventId, int preRoutedFromEventId, int maxEvents) {
    Task task = getTask(attemptID.getTaskID());
    ArrayList<TezEvent> events = task.getTaskAttemptTezEvents(
        attemptID, preRoutedFromEventId, maxEvents);
    int nextPreRoutedFromEventId = preRoutedFromEventId + events.size();
    int nextFromEventId = fromEventId;
    try {
      int currEventCount = onDemandRouteEvents.size();
      
          for (nextFromEventId = fromEventId; nextFromEventId < currEventCount; ++nextFromEventId) {
            boolean earlyExit = false;
            if (events.size() == maxEvents) {
              break;
            }
            EventInfo eventInfo = onDemandRouteEvents.get(nextFromEventId);
            if (eventInfo.isObsolete) {
              // ignore obsolete events
              firstEventObsoleted = true;
              continue;
            }
            TezEvent tezEvent = eventInfo.tezEvent;
            switch(tezEvent.getEventType()) {
            case INPUT_FAILED_EVENT:
            case DATA_MOVEMENT_EVENT:
            case COMPOSITE_DATA_MOVEMENT_EVENT:
              {
                int srcTaskIndex = eventInfo.eventTaskIndex;
                Edge srcEdge = eventInfo.eventEdge;
                PendingEventRouteMetadata pendingRoute = null;
                if (isFirstEvent) {
                  // the first event is the one that can have pending routes because its expanded
                  // events had not been completely sent in the last round.
                  isFirstEvent = false;
                  pendingRoute = srcEdge.removePendingEvents(attemptID);
                  if (pendingRoute != null) {
                    // the first event must match the pending route event
                    // the only reason it may not match is if in between rounds that event got
                    // obsoleted
                    if(tezEvent != pendingRoute.getTezEvent()) {
                      Preconditions.checkState(firstEventObsoleted);
                      // pending routes can be ignored for obsoleted events
                      pendingRoute = null;
                    }
                  }
                }
                if (!srcEdge.maybeAddTezEventForDestinationTask(tezEvent, attemptID, srcTaskIndex,
                    events, maxEvents, pendingRoute)) {
                  // not enough space left for this iteration events.
                  // Exit and start from here next time
                  earlyExit = true;
                }
              }
              break;
            
    return new TaskAttemptEventInfo(nextFromEventId, events, nextPreRoutedFromEventId);
  }
public boolean maybeAddTezEventForDestinationTask(TezEvent tezEvent, TezTaskAttemptID attemptID,
      int srcTaskIndex, List<TezEvent> listToAdd, int listMaxSize, 
      PendingEventRouteMetadata pendingRoutes) 
          throws AMUserCodeException {

      try {
        EdgeManagerPluginOnDemand edgeManagerOnDemand = (EdgeManagerPluginOnDemand) edgeManager;
        int taskIndex = attemptID.getTaskID().getId();
        switch (tezEvent.getEventType()) {
        case COMPOSITE_DATA_MOVEMENT_EVENT:
          {
            CompositeDataMovementEvent compEvent = (CompositeDataMovementEvent) tezEvent.getEvent(); 
            CompositeEventRouteMetadata routeMeta = edgeManagerOnDemand
                  .routeCompositeDataMovementEventToDestination(srcTaskIndex, taskIndex);

            if (routeMeta != null) {
              CompositeRoutedDataMovementEvent edme = compEvent.expandRouted(routeMeta);
              TezEvent tezEventToSend = new TezEvent(edme, tezEvent.getSourceInfo(), tezEvent.getEventReceivedTime());
              tezEventToSend.setDestinationInfo(destinationMetaInfo);
              listToAdd.add(tezEventToSend);
            }
          }
          break;

ShuffleVertexManager$CustomShuffleEdgeManager

此对象管理和 source vertex 和 target vertex 并行度之间的对应关系。

@Override
    public @Nullable CompositeEventRouteMetadata routeCompositeDataMovementEventToDestination(
        int sourceTaskIndex, int destinationTaskIndex)
        throws Exception {
      int[] targetIndicesToSend;
      int partitionRange;
      if(destinationTaskIndex == (numDestinationTasks-1)) {
        if (remainderRangeForLastShuffler != basePartitionRange) {
          targetIndicesToSend = createTargetIndicesForRemainder(sourceTaskIndex);
        } else {
          targetIndicesToSend = targetIndices[sourceTaskIndex];
        }
        partitionRange = remainderRangeForLastShuffler;
      } else {
        targetIndicesToSend = targetIndices[sourceTaskIndex];
        partitionRange = basePartitionRange;
      }

      return CompositeEventRouteMetadata.create(partitionRange, targetIndicesToSend[0], 
          sourceIndices[destinationTaskIndex][0]);
    }
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值