Spark2 code

substrait plan 入口
  override def compute(split: Partition, context: TaskContext): Iterator[ColumnarBatch] = {
    ExecutorManager.tryTaskSet(numaBindingInfo)

    val inputPartition = castNativePartition(split)
    if (rdds.isEmpty) {
      BackendsApiManager.getIteratorApiInstance.genFirstStageIterator(
        inputPartition,
        loadNative,
        outputAttributes,
        context,
        pipelineTime,
        updateMetrics,
        updateNativeMetrics)
    } else {
      val partitions = split.asInstanceOf[FirstZippedPartitionsPartition].partitions
      val inputIterators = (rdds zip partitions).map {
        case (rdd, partition) => rdd.iterator(partition, context)
      }
      BackendsApiManager.getIteratorApiInstance.genFirstStageIterator(
        inputPartition,
        loadNative,
        outputAttributes,
        context,
        pipelineTime,
        updateMetrics,
        updateNativeMetrics,
        inputIterators)
    }
  }
ArrowWritableColumnVector.java

recordbatch => ArrowWritableColumnVector

public static ArrowWritableColumnVector[] loadColumns(int capacity, Schema arrowSchema,
                                                        ArrowRecordBatch recordBatch,
                                                        BufferAllocator allocator) {
    VectorSchemaRoot root = VectorSchemaRoot.create(arrowSchema, allocator);
    VectorLoader loader = new VectorLoader(root);
    loader.load(recordBatch);
    return loadColumns(capacity, root.getFieldVectors());
  }
ArrowColumnarBatches.java
offload
  public static ColumnarBatch offload(BufferAllocator allocator, ColumnarBatch input) {
    if (!isArrowColumnarBatch(input)) {
      throw new IllegalArgumentException("batch is not Arrow columnar batch");
    }
    if (input.numCols() == 0) {
      return input;
    }
    try (ArrowArray cArray = ArrowArray.allocateNew(allocator);
         ArrowSchema cSchema = ArrowSchema.allocateNew(allocator)) {
      GlutenArrowAbiUtil.exportFromSparkColumnarBatch(
          ArrowBufferAllocators.contextInstance(), input, cSchema, cArray);
      long handle = ColumnarBatchJniWrapper.INSTANCE.createWithArrowArray(cSchema.memoryAddress(),
          cArray.memoryAddress());
      ColumnarBatch output = GlutenColumnarBatches.create(handle);

      // Follow input's reference count. This might be optimized using
      // automatic clean-up or once the extensibility of ColumnarBatch is enriched
      long refCnt = -1L;
      for (int i = 0; i < input.numCols(); i++) {
        ArrowWritableColumnVector col = ((ArrowWritableColumnVector) input.column(i));
        long colRefCnt = col.refCnt();
        if (refCnt == -1L) {
          refCnt = colRefCnt;
        } else {
          if (colRefCnt != refCnt) {
            throw new IllegalStateException();
          }
        }
      }
      if (refCnt == -1L) {
        throw new IllegalStateException();
      }
      final GlutenIndicatorVector giv = (GlutenIndicatorVector) output.column(0);
      for (long i = 0; i < (refCnt - 1); i++) {
        giv.retain();
      }

      // close the input one
      for (long i = 0; i < refCnt; i++) {
        input.close();
      }

      // populate new vectors to input
      transferVectors(output, input);
      return input;
    }
  }
JNI ArrowCStructColumnarBatch
JNIEXPORT jlong JNICALL Java_io_glutenproject_columnarbatch_ColumnarBatchJniWrapper_createWithArrowArray(
    JNIEnv* env,
    jobject,
    jlong c_schema,
    jlong c_array) {
  JNI_METHOD_START
  std::unique_ptr<ArrowSchema> target_schema = std::make_unique<ArrowSchema>();
  std::unique_ptr<ArrowArray> target_array = std::make_unique<ArrowArray>();
  auto* arrow_schema = reinterpret_cast<ArrowSchema*>(c_schema);
  auto* arrow_array = reinterpret_cast<ArrowArray*>(c_array);
  ArrowArrayMove(arrow_array, target_array.get());
  ArrowSchemaMove(arrow_schema, target_schema.get());
  std::shared_ptr<ColumnarBatch> batch =
      std::make_shared<ArrowCStructColumnarBatch>(std::move(target_schema), std::move(target_array));
  return gluten_columnarbatch_holder_.Insert(batch);
  JNI_METHOD_END(-1L)
}
export RecordBatch
  def exportFromSparkColumnarBatch(allocator: BufferAllocator, columnarBatch: ColumnarBatch,
                                   cSchema: ArrowSchema, cArray: ArrowArray): Unit = {
    val loaded = ArrowColumnarBatches.ensureLoaded(allocator, columnarBatch)
    val schema = GlutenArrowUtil.toSchema(loaded)
    val rb = GlutenArrowUtil.createArrowRecordBatch(loaded)
    try {
      exportFromArrowRecordBatch(allocator, rb, schema, cSchema, cArray)
    } finally {
      GlutenArrowUtil.releaseArrowRecordBatch(rb)
    }
  }
ArrowInIterator.java
ublic long next() {
    final ColumnarBatch batch = nextColumnarBatch();
    final ColumnarBatch offloaded =
        ArrowColumnarBatches.ensureOffloaded(ArrowBufferAllocators.contextInstance(), batch);
    return GlutenColumnarBatches.getNativeHandle(offloaded);
  }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值