spark2.3源码分析之server端处理拉取shuffle block请求的流程

ExternalShuffleBlockHandler

概述

服务端的RPC处理器,可以从executor进程的外部提供shuffle block。

处理器注册executor,并且从executor打开shuffle block。shuffle block通过“一对一”策略注册,即传输层的chunk对应于spark应用层的block。

成员变量

final ExternalShuffleBlockResolver blockManager;
private final OneForOneStreamManager streamManager;

 ManagedBufferIterator内部类

ShuffleWriter在输出文件到磁盘时,文件名的格式为(reduceId为0): "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId;

在初始化ManagedBufferIterator时,只是记录本次拉取请求的appId、execId、shuffle file name集合,分别赋值到他的成员变量中。真正拉取文件延迟到next()方法中。

private class ManagedBufferIterator implements Iterator<ManagedBuffer> {

    private int index = 0;
    private final String appId;
    private final String execId;
    private final int shuffleId;
    // An array containing mapId and reduceId pairs.
//记录mapId和reduceId pair对的数组
    private final int[] mapIdAndReduceIds;

/*@param String[] blockIds 请求拉取的shuffle文件名的集合,这些文件名都拥有相同的shuffleId
*/ 
    ManagedBufferIterator(String appId, String execId, String[] blockIds) {
      this.appId = appId;
      this.execId = execId;
      String[] blockId0Parts = blockIds[0].split("_");
      if (blockId0Parts.length != 4 || !blockId0Parts[0].equals("shuffle")) {
        throw new IllegalArgumentException("Unexpected shuffle block id format: " + blockIds[0]);
      }
//将文件名的shuffleId设置到成员变量shuffleId
      this.shuffleId = Integer.parseInt(blockId0Parts[1]);
      mapIdAndReduceIds = new int[2 * blockIds.length];
      for (int i = 0; i < blockIds.length; i++) {
        String[] blockIdParts = blockIds[i].split("_");
        if (blockIdParts.length != 4 || !blockIdParts[0].equals("shuffle")) {
          throw new IllegalArgumentException("Unexpected shuffle block id format: " + blockIds[i]);
        }
        if (Integer.parseInt(blockIdParts[1]) != shuffleId) {
          throw new IllegalArgumentException("Expected shuffleId=" + shuffleId +
            ", got:" + blockIds[i]);
        }
//将文件名的mapId和reduceId设置到成员变量mapId和ReduceIds
        mapIdAndReduceIds[2 * i] = Integer.parseInt(blockIdParts[2]);
        mapIdAndReduceIds[2 * i + 1] = Integer.parseInt(blockIdParts[3]);
      }
    }

    @Override
    public boolean hasNext() {
      return index < mapIdAndReduceIds.length;
    }

    @Override
    public ManagedBuffer next() {
//ExternalShuffleBlockResolver#getBlockData()方法根据shuffleId、mapId、reduceId获取数据
      final ManagedBuffer block = blockManager.getBlockData(appId, execId, shuffleId,
        mapIdAndReduceIds[index], mapIdAndReduceIds[index + 1]);
      index += 2;
      metrics.blockTransferRateBytes.mark(block != null ? block.size() : 0);
      return block;
    }
  }

handleMessage()方法

protected void handleMessage(
      BlockTransferMessage msgObj,
      TransportClient client,
      RpcResponseCallback callback) {
//如果是处理OpenBlocks消息类型
    if (msgObj instanceof OpenBlocks) {
      final Timer.Context responseDelayContext = metrics.openBlockRequestLatencyMillis.time();
      try {
        OpenBlocks msg = (OpenBlocks) msgObj;
        checkAuth(client, msg.appId);
        long streamId = streamManager.registerStream(client.getClientId(),
          new ManagedBufferIterator(msg.appId, msg.execId, msg.blockIds), client.getChannel());
        if (logger.isTraceEnabled()) {
          logger.trace("Registered streamId {} with {} buffers for client {} from host {}",
                       streamId,
                       msg.blockIds.length,
                       client.getClientId(),
                       getRemoteAddress(client.getChannel()));
        }
        callback.onSuccess(new StreamHandle(streamId, msg.blockIds.length).toByteBuffer());
      } finally {
        responseDelayContext.stop();
      }
//如果是处理registerExecutor消息类型
    } else if (msgObj instanceof RegisterExecutor) {
      final Timer.Context responseDelayContext =
        metrics.registerExecutorRequestLatencyMillis.time();
      try {
        RegisterExecutor msg = (RegisterExecutor) msgObj;
        checkAuth(client, msg.appId);
        blockManager.registerExecutor(msg.appId, msg.execId, msg.executorInfo);
        callback.onSuccess(ByteBuffer.wrap(new byte[0]));
      } finally {
        responseDelayContext.stop();
      }

    } else {
      throw new UnsupportedOperationException("Unexpected message: " + msgObj);
    }
  }

ExternalShuffleBlockResolver

getBlockData()方法

/**
   * Obtains a FileSegmentManagedBuffer from (shuffleId, mapId, reduceId). We make assumptions
   * about how the hash and sort based shuffles store their data.
   */
  public ManagedBuffer getBlockData(
      String appId,
      String execId,
      int shuffleId,
      int mapId,
      int reduceId) {
    ExecutorShuffleInfo executor = executors.get(new AppExecId(appId, execId));
    if (executor == null) {
      throw new RuntimeException(
        String.format("Executor is not registered (appId=%s, execId=%s)", appId, execId));
    }
//返回FileSegmentManagedBuffer
    return getSortBasedShuffleBlockData(executor, shuffleId, mapId, reduceId);
  }

getSortBasedShuffledData()方法

server端从存储shuffle file的本地目录,获取shuffle block数据。

 /**
   * Sort-based shuffle data uses an index called "shuffle_ShuffleId_MapId_0.index" into a data file
   * called "shuffle_ShuffleId_MapId_0.data". This logic is from IndexShuffleBlockResolver,
   * and the block id format is from ShuffleDataBlockId and ShuffleIndexBlockId.
   */
  private ManagedBuffer getSortBasedShuffleBlockData(
    ExecutorShuffleInfo executor, int shuffleId, int mapId, int reduceId) {
//ExecutorShuffleInfo保存着shuffle file的本地存储目录信息、本地存储子目录信息
//获取shuffle file的index文件
    File indexFile = getFile(executor.localDirs, executor.subDirsPerLocalDir,
      "shuffle_" + shuffleId + "_" + mapId + "_0.index");

    try {
      ShuffleIndexInformation shuffleIndexInformation = shuffleIndexCache.get(indexFile);
      ShuffleIndexRecord shuffleIndexRecord = shuffleIndexInformation.getIndex(reduceId);
      return new FileSegmentManagedBuffer(
        conf,
        getFile(executor.localDirs, executor.subDirsPerLocalDir,
          "shuffle_" + shuffleId + "_" + mapId + "_0.data"),
        shuffleIndexRecord.getOffset(),
        shuffleIndexRecord.getLength());
    } catch (ExecutionException e) {
      throw new RuntimeException("Failed to open file: " + indexFile, e);
    }
  }

OneForOneStreamManager

概述

该StreamManager允许ManagedBuffer的注册,每一个注册的buffer就是一个chunk,可以被客户端通过网络拉取。

OneForOneStreamManager只能按顺序一个接一个的读取一个流的数据块。它借助于内部的StreamSate类,。要读的块编号必须为当前块编号加1。

成员变量

 private final AtomicLong nextStreamId;
  private final ConcurrentHashMap<Long, StreamState> streams;

StreamState内部类

StreamState类里存放了客户端编号,ManagedBufffer的迭代器,以及当前读的块编号.

final String appId;
final Iterator<ManagedBuffer> buffers;

// The channel associated to the stream
final Channel associatedChannel;
 // Used to keep track of the index of the buffer that the user has retrieved, just to ensure
// that the caller only requests each chunk one at a time, in order.
int curChunk = 0;

registerStream()方法

 /**
   * Registers a stream of ManagedBuffers which are served as individual chunks one at a time to
   * callers. Each ManagedBuffer will be release()'d after it is transferred on the wire. If a
   * client connection is closed before the iterator is fully drained, then the remaining buffers
   * will all be release()'d.
   *
   * If an app ID is provided, only callers who've authenticated with the given app ID will be
   * allowed to fetch from this stream.
   *
   * This method also associates the stream with a single client connection, which is guaranteed
   * to be the only reader of the stream. Once the connection is closed, the stream will never
   * be used again, enabling cleanup by `connectionTerminated`.
   */
  public long registerStream(String appId, Iterator<ManagedBuffer> buffers, Channel channel) {
//生成下一个StreamId
    long myStreamId = nextStreamId.getAndIncrement();
//记录下一个StreamId对应的StreamState,并放入内部成员变量concurrentMap集合中
    streams.put(myStreamId, new StreamState(appId, buffers, channel));
    return myStreamId;
  }

getChunk()方法

@Override
  public ManagedBuffer getChunk(long streamId, int chunkIndex) {
    StreamState state = streams.get(streamId);
    if (chunkIndex != state.curChunk) {
      throw new IllegalStateException(String.format(
        "Received out-of-order chunk index %s (expected %s)", chunkIndex, state.curChunk));
    } else if (!state.buffers.hasNext()) {
      throw new IllegalStateException(String.format(
        "Requested chunk index beyond end %s", chunkIndex));
    }
    state.curChunk += 1;
    ManagedBuffer nextChunk = state.buffers.next();

    if (!state.buffers.hasNext()) {
      logger.trace("Removing stream id {}", streamId);
      streams.remove(streamId);
    }

    return nextChunk;
  }

 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值