// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <cstring>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "arrow/device.h"
#include "arrow/status.h"
#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"
#include "arrow/util/span.h"
#include "arrow/util/visibility.h"
namespace arrow {
// ----------------------------------------------------------------------
// Buffer classes
/// \class Buffer
/// \brief Object containing a pointer to a piece of contiguous memory with a
/// particular size.
///
/// Buffers have two related notions of length: size and capacity. Size is
/// the number of bytes that might have valid data. Capacity is the number
/// of bytes that were allocated for the buffer in total.
///
/// The Buffer base class does not own its memory, but subclasses often do.
///
/// The following invariant is always true: Size <= Capacity
class ARROW_EXPORT Buffer {
public:
ARROW_DISALLOW_COPY_AND_ASSIGN(Buffer);
/// \brief Construct from buffer and size without copying memory
///
/// \param[in] data a memory buffer
/// \param[in] size buffer size
///
/// \note The passed memory must be kept alive through some other means
Buffer(const uint8_t* data, int64_t size)
: is_mutable_(false),
is_cpu_(true),
data_(data),
size_(size),
capacity_(size),
device_type_(DeviceAllocationType::kCPU) {
SetMemoryManager(default_cpu_memory_manager());
}
Buffer(const uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm,
std::shared_ptr<Buffer> parent = NULLPTR,
std::optional<DeviceAllocationType> device_type_override = std::nullopt)
: is_mutable_(false),
data_(data),
size_(size),
capacity_(size),
parent_(std::move(parent)) {
// SetMemoryManager will also set device_type_
SetMemoryManager(std::move(mm));
// If a device type is specified, use that instead. Example of when this can be
// useful: the CudaMemoryManager can set device_type_ to kCUDA, but you can specify
// device_type_override=kCUDA_HOST as the device type to override it.
if (device_type_override != std::nullopt) {
device_type_ = *device_type_override;
}
}
Buffer(uintptr_t address, int64_t size, std::shared_ptr<MemoryManager> mm,
std::shared_ptr<Buffer> parent = NULLPTR)
: Buffer(reinterpret_cast<const uint8_t*>(address), size, std::move(mm),
std::move(parent)) {}
/// \brief Construct from string_view without copying memory
///
/// \param[in] data a string_view object
///
/// \note The memory viewed by data must not be deallocated in the lifetime of the
/// Buffer; temporary rvalue strings must be stored in an lvalue somewhere
explicit Buffer(std::string_view data)
: Buffer(reinterpret_cast<const uint8_t*>(data.data()),
static_cast<int64_t>(data.size())) {}
virtual ~Buffer() = default;
/// An offset into data that is owned by another buffer, but we want to be
/// able to retain a valid pointer to it even after other shared_ptr's to the
/// parent buffer have been destroyed
///
/// This method makes no assertions about alignment or padding of the buffer but
/// in general we expected buffers to be aligned and padded to 64 bytes. In the future
/// we might add utility methods to help determine if a buffer satisfies this contract.
Buffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, const int64_t size)
: Buffer(parent->data_ + offset, size) {
parent_ = parent;
SetMemoryManager(parent->memory_manager_);
}
uint8_t operator[](std::size_t i) const { return data_[i]; }
/// \brief Construct a new std::string with a hexadecimal representation of the buffer.
/// \return std::string
std::string ToHexString();
/// Return true if both buffers are the same size and contain the same bytes
/// up to the number of compared bytes
bool Equals(const Buffer& other, int64_t nbytes) const;
/// Return true if both buffers are the same size and contain the same bytes
bool Equals(const Buffer& other) const;
/// Copy a section of the buffer into a new Buffer.
Result<std::shared_ptr<Buffer>> CopySlice(
const int64_t start, const int64_t nbytes,
MemoryPool* pool = default_memory_pool()) const;
/// Zero bytes in padding, i.e. bytes between size_ and capacity_.
void ZeroPadding() {
#ifndef NDEBUG
CheckMutable();
#endif
// A zero-capacity buffer can have a null data pointer
if (capacity_ != 0) {
memset(mutable_data() + size_, 0, static_cast<size_t>(capacity_ - size_));
}
}
/// \brief Construct an immutable buffer that takes ownership of the contents
/// of an std::string (without copying it).
///
/// \param[in] data a string to own
/// \return a new Buffer instance
static std::shared_ptr<Buffer> FromString(std::string data);
/// \brief Construct an immutable buffer that takes ownership of the contents
/// of an std::vector (without copying it). Only vectors of TrivialType objects
/// (integers, floating point numbers, ...) can be wrapped by this function.
///
/// \param[in] vec a vector to own
/// \return a new Buffer instance
template <typename T>
static std::shared_ptr<Buffer> FromVector(std::vector<T> vec) {
static_assert(std::is_trivial_v<T>,
"Buffer::FromVector can only wrap vectors of trivial objects");
if (vec.empty()) {
return std::shared_ptr<Buffer>{new Buffer()};
}
auto* data = reinterpret_cast<uint8_t*>(vec.data());
auto size_in_bytes = static_cast<int64_t>(vec.size() * sizeof(T));
return std::shared_ptr<Buffer>{
new Buffer{data, size_in_bytes},
// Keep the vector's buffer alive inside the shared_ptr's destructor until after
// we have deleted the Buffer. Note we can't use this trick in FromString since
// std::string's data is inline for short strings so moving invalidates pointers
// into the string's buffer.
[vec = std::move(vec)](Buffer* buffer) { delete buffer; }};
}
/// \brief Create buffer referencing typed memory with some length without
/// copying
/// \param[in] data the typed memory as C array
/// \param[in] length the number of values in the array
/// \return a new shared_ptr<Buffer>
template <typename T, typename SizeType = int64_t>
static std::shared_ptr<Buffer> Wrap(const T* data, SizeType length) {
return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data),
static_cast<int64_t>(sizeof(T) * length));
}
/// \brief Create buffer referencing std::vector with some length without
/// copying
/// \param[in] data the vector to be referenced. If this vector is changed,
/// the buffer may become invalid
/// \return a new shared_ptr<Buffer>
template <typename T>
static std::shared_ptr<Buffer> Wrap(const std::vector<T>& data) {
return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data.data()),
static_cast<int64_t>(sizeof(T) * data.size()));
}
/// \brief Copy buffer contents into a new std::string
/// \return std::string
/// \note Can throw std::bad_alloc if buffer is large
std::string ToString() const;
/// \brief View buffer contents as a std::string_view
/// \return std::string_view
explicit operator std::string_view() const {
return {reinterpret_cast<const char*>(data_), static_cast<size_t>(size_)};
}
/// \brief Return a pointer to the buffer's data
///
/// The buffer has to be a CPU buffer (`is_cpu()` is true).
/// Otherwise, an assertion may be thrown or a null pointer may be returned.
///
/// To get the buffer's data address regardless of its device, call `address()`.
const uint8_t* data() const {
#ifndef NDEBUG
CheckCPU();
#endif
return ARROW_PREDICT_TRUE(is_cpu_) ? data_ : NULLPTR;
}
/// \brief Return a pointer to the buffer's data cast to a specific type
///
/// The buffer has to be a CPU buffer (`is_cpu()` is true).
/// Otherwise, an assertion may be thrown or a null pointer may be returned.
template <typename T>
const T* data_as() const {
return reinterpret_cast<const T*>(data());
}
/// \brief Return the buffer's data as a span
template <typename T>
util::span<const T> span_as() const {
return util::span(data_as<T>(), static_cast<size_t>(size() / sizeof(T)));
}
/// \brief Return a writable pointer to the buffer's data
///
/// The buffer has to be a mutable CPU buffer (`is_cpu()` and `is_mutable()`
/// are true). Otherwise, an assertion may be thrown or a null pointer may
/// be returned.
///
/// To get the buffer's mutable data address regardless of its device, call
/// `mutable_address()`.
uint8_t* mutable_data() {
#ifndef NDEBUG
CheckCPU();
CheckMutable();
#endif
return ARROW_PREDICT_TRUE(is_cpu_ && is_mutable_) ? const_cast<uint8_t*>(data_)
: NULLPTR;
}
/// \brief Return a writable pointer to the buffer's data cast to a specific type
///
/// The buffer has to be a mutable CPU buffer (`is_cpu()` and `is_mutable()`
/// are true). Otherwise, an assertion may be thrown or a null pointer may
/// be returned.
template <typename T>
T* mutable_data_as() {
return reinterpret_cast<T*>(mutable_data());
}
/// \brief Return the buffer's mutable data as a span
template <typename T>
util::span<T> mutable_span_as() {
return util::span(mutable_data_as<T>(), static_cast<size_t>(size() / sizeof(T)));
}
/// \brief Return the device address of the buffer's data
uintptr_t address() const { return reinterpret_cast<uintptr_t>(data_); }
/// \brief Return a writable device address to the buffer's data
///
/// The buffer has to be a mutable buffer (`is_mutable()` is true).
/// Otherwise, an assertion may be thrown or 0 may be returned.
uintptr_t mutable_address() const {
#ifndef NDEBUG
CheckMutable();
#endif
return ARROW_PREDICT_TRUE(is_mutable_) ? reinterpret_cast<uintptr_t>(data_) : 0;
}
/// \brief Return the buffer's size in bytes
int64_t size() const { return size_; }
/// \brief Return the buffer's capacity (number of allocated bytes)
int64_t capacity() const { return capacity_; }
/// \brief Whether the buffer is directly CPU-accessible
///
/// If this function returns true, you can read directly from the buffer's
/// `data()` pointer. Otherwise, you'll have to `View()` or `Copy()` it.
bool is_cpu() const { return is_cpu_; }
/// \brief Whether the buffer is mutable
///
/// If this function returns true, you are allowed to modify buffer contents
/// using the pointer returned by `mutable_data()` or `mutable_address()`.
bool is_mutable() const { return is_mutable_; }
const std::shared_ptr<Device>& device() const { return memory_manager_->device(); }
const std::shared_ptr<MemoryManager>& memory_manager() const { return memory_manager_; }
DeviceAllocationType device_type() const { return device_type_; }
std::shared_ptr<Buffer> parent() const { return parent_; }
/// \brief Get a RandomAccessFile for reading a buffer
///
/// The returned file object reads from this buffer's underlying memory.
static Result<std::shared_ptr<io::RandomAccessFile>> GetReader(std::shared_ptr<Buffer>);
/// \brief Get a OutputStream for writing to a buffer
///
/// The buffer must be mutable. The returned stream object writes into the buffer's
/// underlying memory (but it won't resize it).
static Result<std::shared_ptr<io::OutputStream>> GetWriter(std::shared_ptr<Buffer>);
/// \brief Copy buffer
///
/// The buffer contents will be copied into a new buffer allocated by the
/// given MemoryManager. This function supports cross-device copies.
static Result<std::shared_ptr<Buffer>> Copy(std::shared_ptr<Buffer> source,
const std::shared_ptr<MemoryManager>& to);
/// \brief Copy a non-owned buffer
///
/// This is useful for cases where the source memory area is externally managed
/// (its lifetime not tied to the source Buffer), otherwise please use Copy().
static Result<std::unique_ptr<Buffer>> CopyNonOwned(
const Buffer& source, const std::shared_ptr<MemoryManager>& to);
/// \brief View buffer
///
/// Return a Buffer that reflects this buffer, seen potentially from another
/// device, without making an explicit copy of the contents. The underlying
/// mechanism is typically implemented by the kernel or device driver, and may
Loading ...