Arrow 之 Array Concatenate

ConcatenateImpl
ConcatenateImpl(const ArrayDataVector& in, MemoryPool* pool)
      : in_(std::move(in)), pool_(pool), out_(std::make_shared<ArrayData>()) {
    out_->type = in[0]->type;
    for (size_t i = 0; i < in_.size(); ++i) {
      out_->length = SafeSignedAdd(out_->length, in[i]->length);
      if (out_->null_count == kUnknownNullCount ||
          in[i]->null_count == kUnknownNullCount) {
        out_->null_count = kUnknownNullCount;
        continue;
      }
      out_->null_count = SafeSignedAdd(out_->null_count.load(), in[i]->null_count.load());
    }
    out_->buffers.resize(in[0]->buffers.size());
    out_->child_data.resize(in[0]->child_data.size());
    for (auto& data : out_->child_data) {
      data = std::make_shared<ArrayData>();
    }
  }
Concatenate()

底层并没有通过 Builder,而是基于Buffer 的操作

Status Concatenate(std::shared_ptr<ArrayData>* out) && {
    if (out_->null_count != 0 && internal::HasValidityBitmap(out_->type->id())) {
      RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0]));
    }
    RETURN_NOT_OK(VisitTypeInline(*out_->type, this));
    *out = std::move(out_);
    return Status::OK();
  }
Status Visit(const FixedWidthType& fixed) {
    // Handles numbers, decimal128, decimal256, fixed_size_binary
    ARROW_ASSIGN_OR_RAISE(auto buffers, Buffers(1, fixed));
    return ConcatenateBuffers(buffers, pool_).Value(&out_->buffers[1]);
  }
Result<std::shared_ptr<Buffer>> ConcatenateBuffers(
    const std::vector<std::shared_ptr<Buffer>>& buffers, MemoryPool* pool) {
  int64_t out_length = 0;
  for (const auto& buffer : buffers) {
    out_length += buffer->size();
  }
  ARROW_ASSIGN_OR_RAISE(auto out, AllocateBuffer(out_length, pool));
  auto out_data = out->mutable_data();
  for (const auto& buffer : buffers) {
    std::memcpy(out_data, buffer->data(), buffer->size());
    out_data += buffer->size();
  }
  return std::move(out);
}
Concatenate ListArray
TEST(TestConcatenate, ListType) {
  auto nestype = list(arrow::int8());
  auto array1 = ArrayFromJSON(nestype, "[[1, 2], [3, 4]]");
  auto array2 = ArrayFromJSON(nestype, "[[5, 6, 7], null]");
  auto array3 = ArrayFromJSON(nestype, "[[8], [9, 10]]");

  // ARROW_ASSIGN_OR_RAISE(auto bigArr, Concatenate({array1, array2, array3}, default_memory_pool()));
  ASSERT_OK_AND_ASSIGN(auto bigArr, Concatenate({array1, array2, array3}, default_memory_pool()));
  std::cout << "bigArr->ToString()\n:" <<  bigArr->ToString() << std::endl; 
  std::cout << "bigArr->length()\n:" <<  bigArr->length() << std::endl; 

}

./arrow-array-test --gtest_filter=TestConcatenate.ListType

bigArr->ToString()
[
  [
    1,
    2
  ],
  [
    3,
    4
  ],
  [
    5,
    6,
    7
  ],
  null,
  [
    8
  ],
  [
    9,
    10
  ]
]
bigArr->length()
:6

Concatenate MapArray
TEST(TestConcatenate, MapType) {
  auto nestype = map(utf8(), utf8());
  // auto array1 = ArrayFromJSON(nestype, R"([[["name1", "alice1"]]])");
  // auto array2 = ArrayFromJSON(nestype, R"([[["name2", "alice2"]]])");
  // auto array3 = ArrayFromJSON(nestype, R"([[["name3", "alice3"]]])");

  auto array1 = ArrayFromJSON(nestype,  R"([[["name1", "alice1"]], [["name2", "alice2"]]])");
  auto array2 = ArrayFromJSON(nestype,  R"([[["name1", "alice1"]], [["name2", "alice2"]]])");
  auto array3 = ArrayFromJSON(nestype,  R"([[["name1", "alice1"]], [["name2", "alice2"]]])");

  // ARROW_ASSIGN_OR_RAISE(auto bigArr, Concatenate({array1, array2, array3}, default_memory_pool()));
  ASSERT_OK_AND_ASSIGN(auto bigArr, Concatenate({array1, array2, array3}, default_memory_pool()));
  std::cout << "bigArr->ToString()\n:" <<  bigArr->ToString() << std::endl; 
  std::cout << "bigArr->length()\n:" <<  bigArr->length() << std::endl; 

}

./arrow-array-test --gtest_filter=TestConcatenate.MapType

bigArr->ToString()
[
  keys:
  [
    "name1"
  ]
  values:
  [
    "alice1"
  ],
  keys:
  [
    "name2"
  ]
  values:
  [
    "alice2"
  ],
  keys:
  [
    "name1"
  ]
  values:
  [
    "alice1"
  ],
  keys:
  [
    "name2"
  ]
  values:
  [
    "alice2"
  ],
  keys:
  [
    "name1"
  ]
  values:
  [
    "alice1"
  ],
  keys:
  [
    "name2"
  ]
  values:
  [
    "alice2"
  ]
]
bigArr->length()
:6

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值