主要的 append 函数
builder_binary.h
Status Append(const uint8_t* value, offset_type length) {
ARROW_RETURN_NOT_OK(Reserve(1));
ARROW_RETURN_NOT_OK(AppendNextOffset());
// Safety check for UBSAN.
if (ARROW_PREDICT_TRUE(length > 0)) {
ARROW_RETURN_NOT_OK(ValidateOverflow(length));
ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
}
UnsafeAppendToBitmap(true);
return Status::OK();
}
buffer_builder.h
/// \brief Append the given data to the buffer
///
/// The buffer is automatically expanded if necessary.
Status Append(const void* data, const int64_t length) {
if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) {
ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false));
}
UnsafeAppend(data, length);
return Status::OK();
}
// Unsafe methods don't check existing size
void UnsafeAppend(const void* data, const int64_t length) {
memcpy(data_ + size_, data, static_cast<size_t>(length));
size_ += length;
}
void Reset() {
buffer_ = NULLPTR;
capacity_ = size_ = 0;
}
Buffer 相关接口
cpp/src/arrow/buffer.cc
Status Resize(const int64_t new_size, bool shrink_to_fit = true) override {
if (ARROW_PREDICT_FALSE(new_size < 0)) {
return Status::Invalid("Negative buffer resize: ", new_size);
}
if (mutable_data_ && shrink_to_fit && new_size <= size_) {
// Buffer is non-null and is not growing, so shrink to the requested size without
// excess space.
int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size);
if (capacity_ != new_capacity) {
// Buffer hasn't got yet the requested size.
RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
data_ = mutable_data_;
capacity_ = new_capacity;
}
} else {
RETURN_NOT_OK(Reserve(new_size));
}
size_ = new_size;
return Status::OK();
}
Status Reserve(const int64_t capacity) override {
if (capacity < 0) {
return Status::Invalid("Negative buffer capacity: ", capacity);
}
if (!mutable_data_ || capacity > capacity_) {
uint8_t* new_data;
int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
if (mutable_data_) {
RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
} else {
RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
mutable_data_ = new_data;
}
data_ = mutable_data_;
capacity_ = new_capacity;
}
return Status::OK();
}
cpp/src/arrow/memory_pool.cc
/// Resize an already allocated memory section.
///
/// As by default most default allocators on a platform don't support aligned
/// reallocation, this function can involve a copy of the underlying data.
virtual Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) = 0;
Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
if (new_size < 0) {
return Status::Invalid("negative realloc size");
}
if (static_cast<uint64_t>(new_size) >= std::numeric_limits<size_t>::max()) {
return Status::CapacityError("realloc overflows size_t");
}
RETURN_NOT_OK(Allocator::ReallocateAligned(old_size, new_size, ptr));
#ifndef NDEBUG
// Poison data
if (new_size > old_size) {
DCHECK_NE(*ptr, nullptr);
(*ptr)[old_size] = kReallocPoison;
(*ptr)[new_size - 1] = kReallocPoison;
}
#endif
stats_.UpdateAllocatedBytes(new_size - old_size);
return Status::OK();
}
static Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
uint8_t* previous_ptr = *ptr;
if (previous_ptr == zero_size_area) {
DCHECK_EQ(old_size, 0);
return AllocateAligned(new_size, ptr);
}
if (new_size == 0) {
DeallocateAligned(previous_ptr, old_size);
*ptr = zero_size_area;
return Status::OK();
}
// Note: We cannot use realloc() here as it doesn't guarantee alignment.
// Allocate new chunk
uint8_t* out = nullptr;
RETURN_NOT_OK(AllocateAligned(new_size, &out));
DCHECK(out);
// Copy contents and release old memory chunk
memcpy(out, *ptr, static_cast<size_t>(std::min(new_size, old_size)));
#ifdef _WIN32
_aligned_free(*ptr);
#else
free(*ptr);
#endif // defined(_WIN32)
*ptr = out;
return Status::OK();
}