// ----------------------------------------------------------------------
// Generic array data container
/// \class ArrayData
/// \brief Mutable container for generic Arrow array data
///
/// This data structure is a self-contained representation of the memory and
/// metadata inside an Arrow array data structure (called vectors in Java). The
/// classes arrow::Array and its subclasses provide strongly-typed accessors
/// with support for the visitor pattern and other affordances.
///
/// This class is designed for easy internal data manipulation, analytical data
/// processing, and data transport to and from IPC messages. For example, we
/// could cast from int64 to float64 like so:
///
/// Int64Array arr = GetMyData();
/// auto new_data = arr.data()->Copy();
/// new_data->type = arrow::float64();
/// DoubleArray double_arr(new_data);
///
/// This object is also useful in an analytics setting where memory may be
/// reused. For example, if we had a group of operations all returning doubles,
/// say:
///
/// Log(Sqrt(Expr(arr)))
///
/// Then the low-level implementations of each of these functions could have
/// the signatures
///
/// void Log(const ArrayData& values, ArrayData* out);
///
/// As another example a function may consume one or more memory buffers in an
/// input array and replace them with newly-allocated data, changing the output
/// data type as well.
struct ARROW_EXPORT ArrayData {
ArrayData() : length(0), null_count(0), offset(0) {}
ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
int64_t null_count = kUnknownNullCount, int64_t offset = 0)
: type(type), length(length), null_count(null_count), offset(offset) {}
ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
std::vector<std::shared_ptr<Buffer>> buffers,
int64_t null_count = kUnknownNullCount, int64_t offset = 0)
: ArrayData(type, length, null_count, offset) {
this->buffers = std::move(buffers);
}
ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
std::vector<std::shared_ptr<Buffer>> buffers,
std::vector<std::shared_ptr<ArrayData>> child_data,
int64_t null_count = kUnknownNullCount, int64_t offset = 0)
: ArrayData(type, length, null_count, offset) {
this->buffers = std::move(buffers);
this->child_data = std::move(child_data);
}
static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
int64_t length,
std::vector<std::shared_ptr<Buffer>> buffers,
int64_t null_count = kUnknownNullCount,
int64_t offset = 0);
static std::shared_ptr<ArrayData> Make(
const std::shared_ptr<DataType>& type, int64_t length,
std::vector<std::shared_ptr<Buffer>> buffers,
std::vector<std::shared_ptr<ArrayData>> child_data,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);
static std::shared_ptr<ArrayData> Make(
const std::shared_ptr<DataType>& type, int64_t length,
std::vector<std::shared_ptr<Buffer>> buffers,
std::vector<std::shared_ptr<ArrayData>> child_data,
std::shared_ptr<ArrayData> dictionary, int64_t null_count = kUnknownNullCount,
int64_t offset = 0);
static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
int64_t length,
int64_t null_count = kUnknownNullCount,
int64_t offset = 0);
// Move constructor
ArrayData(ArrayData&& other) noexcept
: type(std::move(other.type)),
length(other.length),
offset(other.offset),
buffers(std::move(other.buffers)),
child_data(std::move(other.child_data)),
dictionary(std::move(other.dictionary)) {
SetNullCount(other.null_count);
}
// Copy constructor
ArrayData(const ArrayData& other) noexcept
: type(other.type),
length(other.length),
offset(other.offset),
buffers(other.buffers),
child_data(other.child_data),
dictionary(other.dictionary) {
SetNullCount(other.null_count);
}
// Move assignment
ArrayData& operator=(ArrayData&& other) {
type = std::move(other.type);
length = other.length;
SetNullCount(other.null_count);
offset = other.offset;
buffers = std::move(other.buffers);
child_data = std::move(other.child_data);
dictionary = std::move(other.dictionary);
return *this;
}
// Copy assignment
ArrayData& operator=(const ArrayData& other) {
type = other.type;
length = other.length;
SetNullCount(other.null_count);
offset = other.offset;
buffers = other.buffers;
child_data = other.child_data;
dictionary = other.dictionary;
return *this;
}
std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }
// Access a buffer's data as a typed C pointer
template <typename T>
inline const T* GetValues(int i, int64_t absolute_offset) const {
if (buffers[i]) {
return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
} else {
return NULLPTR;
}
}
template <typename T>
inline const T* GetValues(int i) const {
return GetValues<T>(i, offset);
}
// Like GetValues, but returns NULLPTR instead of aborting if the underlying
// buffer is not a CPU buffer.
template <typename T>
inline const T* GetValuesSafe(int i, int64_t absolute_offset) const {
if (buffers[i] && buffers[i]->is_cpu()) {
return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
} else {
return NULLPTR;
}
}
template <typename T>
inline const T* GetValuesSafe(int i) const {
return GetValuesSafe<T>(i, offset);
}
// Access a buffer's data as a typed C pointer
template <typename T>
inline T* GetMutableValues(int i, int64_t absolute_offset) {
if (buffers[i]) {
return reinterpret_cast<T*>(buffers[i]->mutable_data()) + absolute_offset;
} else {
return NULLPTR;
}
}
template <typename T>
inline T* GetMutableValues(int i) {
return GetMutableValues<T>(i, offset);
}
/// \brief Construct a zero-copy slice of the data with the given offset and length
std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
/// \brief Input-checking variant of Slice
///
/// An Invalid Status is returned if the requested slice falls out of bounds.
/// Note that unlike Slice, `length` isn't clamped to the available buffer size.
Result<std::shared_ptr<ArrayData>> SliceSafe(int64_t offset, int64_t length) const;
void SetNullCount(int64_t v) { null_count.store(v); }
/// \brief Return null count, or compute and set it if it's not known
int64_t GetNullCount() const;
bool MayHaveNulls() const {
// If an ArrayData is slightly malformed it may have kUnknownNullCount set
// but no buffer
return null_count.load() != 0 && buffers[0] != NULLPTR;
}
std::shared_ptr<DataType> type;
int64_t length;
mutable std::atomic<int64_t> null_count;
// The logical start point into the physical buffers (in values, not bytes).
// Note that, for child data, this must be *added* to the child data's own offset.
int64_t offset;
std::vector<std::shared_ptr<Buffer>> buffers;
std::vector<std::shared_ptr<ArrayData>> child_data;
// The dictionary for this Array, if any. Only used for dictionary type
std::shared_ptr<ArrayData> dictionary;
};
ArrayData// ----------------------------------------------------------------------// Generic array data container/// \class ArrayData/// \brief Mutable container for generic Arrow array data////// This data structure is a self-contained representati