// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef NANOARROW_IPC_H_INCLUDED
#define NANOARROW_IPC_H_INCLUDED
#include "nanoarrow.h"
#ifdef NANOARROW_NAMESPACE
#define ArrowIpcCheckRuntime NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcCheckRuntime)
#define ArrowIpcSharedBufferIsThreadSafe \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcSharedBufferIsThreadSafe)
#define ArrowIpcSharedBufferInit \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcSharedBufferInit)
#define ArrowIpcSharedBufferReset \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcSharedBufferReset)
#define ArrowIpcDecoderInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderInit)
#define ArrowIpcDecoderReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderReset)
#define ArrowIpcDecoderPeekHeader \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderPeekHeader)
#define ArrowIpcDecoderVerifyHeader \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderVerifyHeader)
#define ArrowIpcDecoderDecodeHeader \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderDecodeHeader)
#define ArrowIpcDecoderDecodeSchema \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderDecodeSchema)
#define ArrowIpcDecoderDecodeArrayView \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderDecodeArrayView)
#define ArrowIpcDecoderDecodeArray \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderDecodeArray)
#define ArrowIpcDecoderDecodeArrayFromShared \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderDecodeArrayFromShared)
#define ArrowIpcDecoderSetSchema \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderSetSchema)
#define ArrowIpcDecoderSetEndianness \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderSetEndianness)
#define ArrowIpcDecoderPeekFooter \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderPeekFooter)
#define ArrowIpcDecoderVerifyFooter \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderVerifyFooter)
#define ArrowIpcDecoderDecodeFooter \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderDecodeFooter)
#define ArrowIpcInputStreamInitBuffer \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcInputStreamInitBuffer)
#define ArrowIpcInputStreamInitFile \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcInputStreamInitFile)
#define ArrowIpcInputStreamMove \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcInputStreamMove)
#define ArrowIpcArrayStreamReaderInit \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcArrayStreamReaderInit)
#define ArrowIpcEncoderInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcEncoderInit)
#define ArrowIpcEncoderReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcEncoderReset)
#define ArrowIpcEncoderFinalizeBuffer \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcEncoderFinalizeBuffer)
#define ArrowIpcEncoderEncodeSchema \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcEncoderEncodeSchema)
#define ArrowIpcEncoderEncodeSimpleRecordBatch \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcEncoderEncodeSimpleRecordBatch)
#define ArrowIpcOutputStreamInitBuffer \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcOutputStreamInitBuffer)
#define ArrowIpcOutputStreamInitFile \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcOutputStreamInitFile)
#define ArrowIpcOutputStreamWrite \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcOutputStreamWrite)
#define ArrowIpcOutputStreamMove \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcOutputStreamMove)
#define ArrowIpcWriterInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterInit)
#define ArrowIpcWriterReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterReset)
#define ArrowIpcWriterWriteSchema \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterWriteSchema)
#define ArrowIpcWriterWriteArrayView \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterWriteArrayView)
#define ArrowIpcWriterWriteArrayStream \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterWriteArrayStream)
#define ArrowIpcWriterStartFile \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterStartFile)
#define ArrowIpcWriterFinalizeFile \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterFinalizeFile)
#define ArrowIpcFooterInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcFooterInit)
#define ArrowIpcFooterReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcFooterReset)
#define ArrowIpcEncoderEncodeFooter \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcEncoderEncodeFooter)
#endif
#ifdef __cplusplus
extern "C" {
#endif
/// \defgroup nanoarrow_ipc Nanoarrow IPC extension
///
/// Except where noted, objects are not thread-safe and clients should
/// take care to serialize accesses to methods.
///
/// Because this library is intended to be vendored, it provides full type
/// definitions and encourages clients to stack or statically allocate
/// where convenient.
///
/// @{
/// \brief Metadata version enumerator
enum ArrowIpcMetadataVersion {
NANOARROW_IPC_METADATA_VERSION_V1,
NANOARROW_IPC_METADATA_VERSION_V2,
NANOARROW_IPC_METADATA_VERSION_V3,
NANOARROW_IPC_METADATA_VERSION_V4,
NANOARROW_IPC_METADATA_VERSION_V5
};
/// \brief Message type enumerator
enum ArrowIpcMessageType {
NANOARROW_IPC_MESSAGE_TYPE_UNINITIALIZED,
NANOARROW_IPC_MESSAGE_TYPE_SCHEMA,
NANOARROW_IPC_MESSAGE_TYPE_DICTIONARY_BATCH,
NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH,
NANOARROW_IPC_MESSAGE_TYPE_TENSOR,
NANOARROW_IPC_MESSAGE_TYPE_SPARSE_TENSOR
};
/// \brief Endianness enumerator
enum ArrowIpcEndianness {
NANOARROW_IPC_ENDIANNESS_UNINITIALIZED,
NANOARROW_IPC_ENDIANNESS_LITTLE,
NANOARROW_IPC_ENDIANNESS_BIG
};
/// \brief Compression type enumerator
enum ArrowIpcCompressionType {
NANOARROW_IPC_COMPRESSION_TYPE_NONE,
NANOARROW_IPC_COMPRESSION_TYPE_LZ4_FRAME,
NANOARROW_IPC_COMPRESSION_TYPE_ZSTD
};
/// \brief Feature flag for a stream that uses dictionary replacement
#define NANOARROW_IPC_FEATURE_DICTIONARY_REPLACEMENT 1
/// \brief Feature flag for a stream that uses compression
#define NANOARROW_IPC_FEATURE_COMPRESSED_BODY 2
/// \brief Checks the nanoarrow runtime to make sure the run/build versions match
ArrowErrorCode ArrowIpcCheckRuntime(struct ArrowError* error);
/// \brief Get the endianness of the current runtime
static inline enum ArrowIpcEndianness ArrowIpcSystemEndianness(void) {
uint32_t check = 1;
char first_byte;
memcpy(&first_byte, &check, sizeof(char));
if (first_byte) {
return NANOARROW_IPC_ENDIANNESS_LITTLE;
} else {
return NANOARROW_IPC_ENDIANNESS_BIG;
}
}
/// \brief A structure representing a reference-counted buffer that may be passed to
/// ArrowIpcDecoderDecodeArrayFromShared().
struct ArrowIpcSharedBuffer {
struct ArrowBuffer private_src;
};
/// \brief Initialize the contents of a ArrowIpcSharedBuffer struct
///
/// If NANOARROW_OK is returned, the ArrowIpcSharedBuffer takes ownership of
/// src.
ArrowErrorCode ArrowIpcSharedBufferInit(struct ArrowIpcSharedBuffer* shared,
struct ArrowBuffer* src);
/// \brief Release the caller's copy of the shared buffer
///
/// When finished, the caller must relinquish its own copy of the shared data
/// using this function. The original buffer will continue to exist until all
/// ArrowArray objects that refer to it have also been released.
void ArrowIpcSharedBufferReset(struct ArrowIpcSharedBuffer* shared);
/// \brief Check for shared buffer thread safety
///
/// Thread-safe shared buffers require C11 and the stdatomic.h header.
/// If either are unavailable, shared buffers are still possible but
/// the resulting arrays must not be passed to other threads to be released.
int ArrowIpcSharedBufferIsThreadSafe(void);
/// \brief Decoder for Arrow IPC messages
///
/// This structure is intended to be allocated by the caller,
/// initialized using ArrowIpcDecoderInit(), and released with
/// ArrowIpcDecoderReset(). These fields should not be modified
/// by the caller but can be read following a call to
/// ArrowIpcDecoderPeekHeader(), ArrowIpcDecoderVerifyHeader(), or
/// ArrowIpcDecoderDecodeHeader().
struct ArrowIpcDecoder {
/// \brief The last verified or decoded message type
enum ArrowIpcMessageType message_type;
/// \brief The metadata version as indicated by the current schema message
enum ArrowIpcMetadataVersion metadata_version;
/// \brief Buffer endianness as indicated by the current schema message
enum ArrowIpcEndianness endianness;
/// \brief Arrow IPC Features used as indicated by the current Schema message
int32_t feature_flags;
/// \brief Compression used by the current RecordBatch message
enum ArrowIpcCompressionType codec;
/// \brief The number of bytes in the current header message
///
/// This value includes the 8 bytes before the start of the header message
/// content and any padding bytes required to make the header message size
/// be a multiple of 8 bytes.
int32_t header_size_bytes;
/// \brief The number of bytes in the forthcoming body message.
int64_t body_size_bytes;
/// \brief The last decoded Footer
///
/// \warning This API is currently only public for use in integration testing;
/// use at your own risk.
struct ArrowIpcFooter* footer;
/// \brief Private resources managed by this library
void* private_data;
};
/// \brief Initialize a decoder
ArrowErrorCode ArrowIpcDecoderInit(struct ArrowIpcDecoder* decoder);
/// \brief Release all resources attached to a decoder
void ArrowIpcDecoderReset(struct ArrowIpcDecoder* decoder);
/// \brief Peek at a message header
///
/// The first 8 bytes of an Arrow IPC message are 0xFFFFFFFF followed by the size
/// of the header as a little-endian 32-bit integer. ArrowIpcDecoderPeekHeader() reads
/// these bytes and returns ESPIPE if there are not enough remaining bytes in data to read
/// the entire header message, EINVAL if the first 8 bytes are not valid, ENODATA if the
/// Arrow end-of-stream indicator has been reached, or NANOARROW_OK otherwise.
ArrowErrorCode ArrowIpcDecoderPeekHeader(struct ArrowIpcDecoder* decoder,
struct ArrowBufferView data,
struct ArrowError* error);
/// \brief Verify a message header
///
/// Runs ArrowIpcDecoderPeekHeader() to ensure data is sufficiently large but additionally
/// runs flatbuffer verification to ensure that decoding the data will not access
/// memory outside of the buffer specified by data. ArrowIpcDecoderVerifyHeader() will
/// also set decoder.header_size_bytes, decoder.body_size_bytes, decoder.metadata_version,
/// and decoder.message_type.
///
/// Returns as ArrowIpcDecoderPeekHeader() and additionally will
/// return EINVAL if flatbuffer verification fails.
ArrowErrorCode ArrowIpcDecoderVerifyHeader(struct ArrowIpcDecoder* decoder,
struct ArrowBufferView data,
struct ArrowError* error);
/// \brief Decode a message header
///
/// Runs ArrowIpcDecoderPeekHeader() to ensure data is sufficiently large and decodes
/// the content of the message header. If data contains a schema message,
/// decoder.endianness and decoder.feature_flags is set and ArrowIpcDecoderDecodeSchema()
/// can be used to obtain the decoded schema. If data contains a record batch message,
/// decoder.codec is set and a successful call can be followed by a call to
/// ArrowIpcDecoderDecodeArray().
///
/// In almost all cases this should be preceded by a call to
/// ArrowIpcDecoderVerifyHeader() to ensure decoding does not access data outside of the
/// specified buffer.
///
/// Returns EINVAL if the content of the message cannot be decoded or ENOTSUP if the
/// content of the message uses features not supported by this library.
ArrowErrorCode ArrowIpcDecoderDecodeHeader(struct ArrowIpcDecoder* decoder,
struct ArrowBufferView data,
struct ArrowError* error);
/// \brief Decode an ArrowSchema
///
/// After a successful call to ArrowIpcDecoderDecodeHeader(), retrieve an ArrowSchema.
/// The caller is responsible for releasing the schema if NANOARROW_OK is returned.
///
/// Returns EINVAL if the decoder did not just decode a schema message or
/// NANOARROW_OK otherwise.
ArrowErrorCode ArrowIpcDecoderDecodeSchema(struct ArrowIpcDecoder* decoder,
struct ArrowSchema* out,
struct ArrowError* error);
/// \brief Set the ArrowSchema used to decode future record batch messages
///
/// Prepares the decoder for future record batch messages
/// of this type. The decoder takes ownership of schema if NANOARROW_OK is returned.
/// Note that you must call this explicitly after decoding a
/// Schema message (i.e., the decoder does not assume that the last-decoded
/// schema message applies to future record batch messages).
///
/// Returns EINVAL if schema validation fails or NANOARROW_OK otherwise.
ArrowErrorCode ArrowIpcDecoderSetSchema(struct ArrowIpcDecoder* decoder,
struct ArrowSchema* schema,
struct ArrowError* error);
/// \brief Set the endianness used to decode future record batch messages
///
/// Prepares the decoder for future record batch messages with the specified
/// endianness. Note that you must call this explicitly after decoding a
/// Schema message (i.e., the decoder does not assume that the last-decoded
/// schema message applies to future record batch messages).
///
/// Returns NANOARROW_OK on success.
ArrowErrorCode ArrowIpcDecoderSetEndianness(struct ArrowIpcDecoder* decoder,
enum ArrowIpcEndianness endianness);
/// \brief Decode an ArrowArrayView
///
/// After a successful call to ArrowIpcDecoderDecodeHeader(), deserialize the content
/// of body into an internally-managed ArrowArrayView and return it. Note that field index
/// does not equate to column index if any columns contain nested types. Use a value of -1
/// to decode the entire array into a struct. The pointed-to ArrowArrayView is owned by
/// the ArrowIpcDecoder and must not be released.
///
/// For streams that match system endianness and do not use compression, this operation
/// will not perform any heap allocations; however, the buffers referred to by the
/// returned ArrowArrayView are only valid as long as the buffer referred to by body stays
/// valid.
ArrowErrorCode ArrowIpcDecoderDecodeArrayView(struct ArrowIpcDecoder* decoder,
struct ArrowBufferView body, int64_t i,
struct ArrowArrayView** out,
struct ArrowError* error);
/// \brief Decode an ArrowArray
///
/// After a successful call to ArrowIpcDecoderDecodeHeader(), assemble an ArrowArray given
Loading ...