ConcatenateImpl
ConcatenateImpl(const ArrayDataVector& in, MemoryPool* pool)
: in_(std::move(in)), pool_(pool), out_(std::make_shared<ArrayData>()) {
out_->type = in[0]->type;
for (size_t i = 0; i < in_.size(); ++i) {
out_->length = SafeSignedAdd(out_->length, in[i]->length);
if (out_->null_count == kUnknownNullCount ||
in[i]->null_count == kUnknownNullCount) {
out_->null_count = kUnknownNullCount;
continue;
}
out_->null_count = SafeSignedAdd(out_->null_count.load(), in[i]->null_count.load());
}
out_->buffers.resize(in[0]->buffers.size());
out_->child_data.resize(in[0]->child_data.size());
for (auto& data : out_->child_data) {
data = std::make_shared<ArrayData>();
}
}
Concatenate()
底层并没有通过 Builder,而是基于Buffer 的操作
Status Concatenate(std::shared_ptr<ArrayData>* out) && {
if (out_->null_count != 0 && internal::HasValidityBitmap(out_->type->id())) {
RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0]));
}
RETURN_NOT_OK(VisitTypeInline(*out_->type, this));
*out = std::move(out_);
return Status::OK();
}
Status Visit(const FixedWidthType& fixed) {
// Handles numbers, decimal128, decimal256, fixed_size_binary
ARROW_ASSIGN_OR_RAISE(auto buffers, Buffers(1, fixed));
return ConcatenateBuffers(buffers, pool_).Value(&out_->buffers[1]);
}
Result<std::shared_ptr<Buffer>> ConcatenateBuffers(
const std::vector<std::shared_ptr<Buffer>>& buffers, MemoryPool* pool) {
int64_t out_length = 0;
for (const auto& buffer : buffers) {
out_length += buffer->size();
}
ARROW_ASSIGN_OR_RAISE(auto out, AllocateBuffer(out_length, pool));
auto out_data = out->mutable_data();
for (const auto& buffer : buffers) {
std::memcpy(out_data, buffer->data(), buffer->size());
out_data += buffer->size();
}
return std::move(out);
}
Concatenate ListArray
TEST(TestConcatenate, ListType) {
auto nestype = list(arrow::int8());
auto array1 = ArrayFromJSON(nestype, "[[1, 2], [3, 4]]");
auto array2 = ArrayFromJSON(nestype, "[[5, 6, 7], null]");
auto array3 = ArrayFromJSON(nestype, "[[8], [9, 10]]");
// ARROW_ASSIGN_OR_RAISE(auto bigArr, Concatenate({array1, array2, array3}, default_memory_pool()));
ASSERT_OK_AND_ASSIGN(auto bigArr, Concatenate({array1, array2, array3}, default_memory_pool()));
std::cout << "bigArr->ToString()\n:" << bigArr->ToString() << std::endl;
std::cout << "bigArr->length()\n:" << bigArr->length() << std::endl;
}
./arrow-array-test --gtest_filter=TestConcatenate.ListType
bigArr->ToString()
[
[
1,
2
],
[
3,
4
],
[
5,
6,
7
],
null,
[
8
],
[
9,
10
]
]
bigArr->length()
:6
Concatenate MapArray
TEST(TestConcatenate, MapType) {
auto nestype = map(utf8(), utf8());
// auto array1 = ArrayFromJSON(nestype, R"([[["name1", "alice1"]]])");
// auto array2 = ArrayFromJSON(nestype, R"([[["name2", "alice2"]]])");
// auto array3 = ArrayFromJSON(nestype, R"([[["name3", "alice3"]]])");
auto array1 = ArrayFromJSON(nestype, R"([[["name1", "alice1"]], [["name2", "alice2"]]])");
auto array2 = ArrayFromJSON(nestype, R"([[["name1", "alice1"]], [["name2", "alice2"]]])");
auto array3 = ArrayFromJSON(nestype, R"([[["name1", "alice1"]], [["name2", "alice2"]]])");
// ARROW_ASSIGN_OR_RAISE(auto bigArr, Concatenate({array1, array2, array3}, default_memory_pool()));
ASSERT_OK_AND_ASSIGN(auto bigArr, Concatenate({array1, array2, array3}, default_memory_pool()));
std::cout << "bigArr->ToString()\n:" << bigArr->ToString() << std::endl;
std::cout << "bigArr->length()\n:" << bigArr->length() << std::endl;
}
./arrow-array-test --gtest_filter=TestConcatenate.MapType
bigArr->ToString()
[
keys:
[
"name1"
]
values:
[
"alice1"
],
keys:
[
"name2"
]
values:
[
"alice2"
],
keys:
[
"name1"
]
values:
[
"alice1"
],
keys:
[
"name2"
]
values:
[
"alice2"
],
keys:
[
"name1"
]
values:
[
"alice1"
],
keys:
[
"name2"
]
values:
[
"alice2"
]
]
bigArr->length()
:6