Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / nanoarrow   python

Repository URL to install this package:

Version: 0.7.0.dev132 

/ vendor / flatcc / flatcc_builder.h

#ifndef FLATCC_BUILDER_H
#define FLATCC_BUILDER_H

#ifdef __cplusplus
extern "C" {
#endif

/**
 * Library for building untyped FlatBuffers. Intended as a support
 * library for generated C code to produce typed builders, but might
 * also be useful in runtime environments and as support for scripting
 * languages.
 *
 * The builder has two API layers: a stack based `start/end` approach,
 * and a direct `create`, and they may be mixed freely. The direct
 * approach may be used as part of more specialized optimizations such
 * as rewriting buffers while the stack approach is convenient for state
 * machine driven parsers without a stack, or with a very simple stack
 * without extra allocations.
 *
 * The builder emits partial buffer sequences to a user provided emitter
 * function and does not require a full buffer reprensenation in memory.
 * For this reason it also does not support sorting or other operations
 * that requires representing the buffer, but post-processors can easily
 * do this, and the generated schema specific code and provide functions
 * to handle this.
 *
 * A custom allocator with a default realloc implementation can place
 * restraints on resource consumption and provide initial allocation
 * sizes for various buffers and stacks in use.
 *
 * A buffer under construction uses a virtual address space for the
 * completed part of the buffer, starting at 0 and growing in both
 * directions, or just down depending on whether vtables should be
 * clustered at the end or not. Clustering may help caching and
 * preshipping that part of the buffer.
 *
 * Because an offset cannot be known before its reference location is
 * defined, every completed table, vector, etc. returns a reference into
 * the virtual address range. If the final buffer keeps the 0 offset,
 * these references remain stable an may be used for external references
 * into the buffer.
 *
 * The maximum buffer that can be constructed is in praxis limited to
 * half the UOFFSET_MAX size, typically 2^31 bytes, not counting
 * clustered vtables that may consume and additional 2^31 bytes
 * (positive address range), but in praxis cannot because vtable
 * references are signed and thus limited to 2^31 bytes (or equivalent
 * depending on the flatbuffer types chosen).
 *
 * CORRECTION: in various places rules are mentioned about nesting and using
 * a reference at most once. In fact, DAG's are also valid flatbuffers.
 * This means a reference may be reused as long as each individual use
 * obeys the rules and, for example, circular references are not
 * constructed (circular types are ok, but objects graphs with cycles
 * are not permitted). Be especially aware of the offset vector create
 * call which translates the references into offsets - this can be
 * reverted by noting the reference in vector and calculate the base
 * used for the offset to restore the original references after the
 * vector has been emitted.
 */

#include <stdlib.h>
#ifndef UINT8_MAX
#include <stdint.h>
#endif

#include "flatcc_flatbuffers.h"
#include "flatcc_emitter.h"
#include "flatcc_refmap.h"

/* It is possible to enable logging here. */
#ifndef FLATCC_BUILDER_ASSERT
#define FLATCC_BUILDER_ASSERT(cond, reason) FLATCC_ASSERT(cond)
#endif

/*
 * Eror handling is not convenient and correct use should not cause
 * errors beyond possibly memory allocation, but assertions are a
 * good way to trace problems.
 *
 * Note: some internal assertion will remain if disabled.
 */
#ifndef FLATCC_BUILDER_ASSERT_ON_ERROR
#define FLATCC_BUILDER_ASSERT_ON_ERROR 1
#endif

/*
 * If set, checks user input agains state and returns error,
 * otherwise errors are ignored (assuming they won't happen).
 * Errors will be asserted if enabled and checks are not skipped.
 */
#ifndef FLATCC_BUILDER_SKIP_CHECKS
#define FLATCC_BUILDER_SKIP_CHECKS 0
#endif


/*
 * When adding the same field to a table twice this is either an error
 * or the existing field is returned, potentially introducing garbage
 * if the type is a vector, table, or string. When implementing parsers
 * it may be convenient to not treat this as an error.
 */
#ifndef FLATCC_BUILDER_ALLOW_REPEAT_TABLE_ADD
#define FLATCC_BUILDER_ALLOW_REPEAT_TABLE_ADD 0
#endif

/**
 * This type must have same size as `flatbuffers_uoffset_t`
 * and must be a signed type.
 */
typedef flatbuffers_soffset_t flatcc_builder_ref_t;
typedef flatbuffers_utype_t flatcc_builder_utype_t;

/**
 * This type must be compatible with code generation that
 * creates union specific ref types.
 */
typedef struct flatcc_builder_union_ref {
    flatcc_builder_utype_t type;
    flatcc_builder_ref_t value;
} flatcc_builder_union_ref_t;

typedef struct flatcc_builder_union_vec_ref {
    flatcc_builder_ref_t type;
    flatcc_builder_ref_t value;
} flatcc_builder_union_vec_ref_t;

/**
 * Virtual tables are off by one to avoid being mistaken for error at
 * position 0, and it makes them detectable as such because no other
 * reference is uneven. Vtables are emitted at their actual location
 * which is one less than the reference value.
 */
typedef flatbuffers_soffset_t flatcc_builder_vt_ref_t;

typedef flatbuffers_uoffset_t flatcc_builder_identifier_t;

/**
 * Hints to custom allocators so they can provide initial alloc sizes
 * etc. There will be at most one buffer for each allocation type per
 * flatcc_builder instance. Buffers containing only structs may avoid
 * allocation altogether using a `create` call. The vs stack must hold
 * vtable entries for all open tables up to their requested max id, but
 * unused max id overlap on the stack. The final vtables only store the
 * largest id actually added. The fs stack must hold stack frames for
 * the nesting levels expected in the buffer, each about 50-100 bytes.
 * The ds stack holds open vectors, table data, and nested buffer state.
 * `create` calls bypass the `ds` and `fs` stack and are thus faster.
 * The vb buffer holds a copy of all vtables seen and emitted since last
 * vtable flush. The patch log holds a uoffset for every table field
 * added to currently open tables. The hash table holds a uoffset entry
 * for each hash slot where the allocator decides how many to provide
 * above a certain minimum. The vd buffer allocates vtable descriptors
 * which is a reference to an emitted vtable, an offset to a cached
 * vtable, and a link to next descriptor with same hash. Calling `reset`
 * after build can either keep the allocation levels for the next
 * buffer, or reduce the buffers already allocated by requesting 1 byte
 * allocations (meaning provide a default).
 *
 * The user stack is not automatically allocated, but when entered
 * explicitly, the boundary is rembered in the current live
 * frame.
 */
enum flatcc_builder_alloc_type {
    /* The stack where vtables are build. */
    flatcc_builder_alloc_vs,
    /* The stack where data structures are build. */
    flatcc_builder_alloc_ds,
    /* The virtual table buffer cache, holds a copy of each vt seen. */
    flatcc_builder_alloc_vb,
    /* The patch log, remembers table fields with outstanding offset refs. */
    flatcc_builder_alloc_pl,
    /* The stack of frames for nested types. */
    flatcc_builder_alloc_fs,
    /* The hash table part of the virtual table cache. */
    flatcc_builder_alloc_ht,
    /* The vtable descriptor buffer, i.e. list elements for emitted vtables. */
    flatcc_builder_alloc_vd,
    /* User stack frame for custom data. */
    flatcc_builder_alloc_us,

    /* Number of allocation buffers. */
    flatcc_builder_alloc_buffer_count
};

/** Must reflect the `flatcc_builder_alloc_type` enum. */
#define FLATCC_BUILDER_ALLOC_BUFFER_COUNT flatcc_builder_alloc_buffer_count

#ifndef FLATCC_BUILDER_ALLOC
#define FLATCC_BUILDER_ALLOC(n) FLATCC_ALLOC(n)
#endif

#ifndef FLATCC_BUILDER_FREE
#define FLATCC_BUILDER_FREE(p) FLATCC_FREE(p)
#endif

#ifndef FLATCC_BUILDER_REALLOC
#define FLATCC_BUILDER_REALLOC(p, n) FLATCC_REALLOC(p, n)
#endif

#ifndef FLATCC_BUILDER_ALIGNED_ALLOC
#define FLATCC_BUILDER_ALIGNED_ALLOC(a, n) FLATCC_ALIGNED_ALLOC(a, n)
#endif

#ifndef FLATCC_BUILDER_ALIGNED_FREE
#define FLATCC_BUILDER_ALIGNED_FREE(p) FLATCC_ALIGNED_FREE(p)
#endif

/**
 * Emits data to a conceptual deque by appending to either front or
 * back, starting from offset 0.
 *
 * Each emit call appends a strictly later or earlier sequence than the
 * last emit with same offset sign. Thus a buffer is gradually grown at
 * both ends. `len` is the combined length of all iov entries such that
 * `offset + len` yields the former offset for negative offsets and
 * `offset + len` yields the next offset for non-negative offsets.
 * The bulk of the data will be in the negative range, possibly all of
 * it. The first emitted emitted range will either start or end at
 * offset 0. If offset 0 is emitted, it indicates the start of clustered
 * vtables. The last positive (non-zero) offset may be zero padding to
 * place the buffer in a full multiple of `block_align`, if set.
 *
 * No iov entry is empty, 0 < iov_count <= FLATCC_IOV_COUNT_MAX.
 *
 * The source data are in general ephemeral and should be consumed
 * immediately, as opposed to caching iov.
 *
 * For high performance applications:
 *
 * The `create` calls may reference longer living data, but header
 * fields etc. will still be short lived. If an emitter wants to
 * reference data in another buffer rather than copying, it should
 * inspect the memory range. The length of an iov entry may also be used
 * since headers are never very long (anything starting at 16 bytes can
 * safely be assumed to be user provided, or static zero padding). It is
 * guaranteed that data pointers in `create` calls receive a unique slot
 * separate from temporary headers, in the iov table which may be used
 * for range checking or hashing (`create_table` is the only call that
 * mutates the data buffer). It is also guaranteed (with the exception
 * of `create_table` and `create_cached_vtable`) that data provided to
 * create calls are not referenced at all by the builder, and these data
 * may therefore de-facto be handles rather than direct pointers when
 * the emitter and data provider can agree on such a protocol. This does
 * NOT apply to any start/end/add/etc. calls which do copy to stack.
 * `flatcc_builder_padding_base` may be used to test if an iov entry is
 * zero padding which always begins at that address.
 *
 * Future: the emit interface could be extended with a type code
 * and return an existing object insted of the emitted if, for
 * example, they are identical. Outside this api level, generated
 * code could provide a table comparison function to help such
 * deduplication. It would be optional because two equal objects
 * are not necessarily identical. The emitter already receives
 * one object at time.
 *
 * Returns 0 on success and otherwise causes the flatcc_builder
 * to fail.
 */
typedef int flatcc_builder_emit_fun(void *emit_context,
        const flatcc_iovec_t *iov, int iov_count, flatbuffers_soffset_t offset, size_t len);

/*
 * Returns a pointer to static padding used in emitter calls. May
 * sometimes also be used for empty defaults such as identifier.
 */
extern const uint8_t flatcc_builder_padding_base[];

/**
 * `request` is a minimum size to be returned, but allocation is
 * expected to grow exponentially or in reasonable chunks. Notably,
 * `alloc_type = flatcc_builder_alloc_ht` will only use highest available
 * power of 2. The allocator may shrink if `request` is well below
 * current size but should avoid repeated resizing on small changes in
 * request sizes. If `zero_fill` is non-zero, allocated data beyond
 * the current size must be zeroed. The buffer `b` may be null with 0
 * length initially. `alloc_context` is completely implementation
 * dependendent, and not needed when just relying on realloc. The
 * resulting buffer may be the same or different with moved data, like
 * realloc. Returns -1 with unmodified buffer on failure or 0 on
 * success. The `alloc_type` identifies the buffer type. This may be
 * used to cache buffers between instances of builders, or to decide a
 * default allocation size larger than requested. If `need` is zero the
 * buffer should be deallocate if non-zero, and return success (0)
 * regardless.
 */
typedef int flatcc_builder_alloc_fun(void *alloc_context,
        flatcc_iovec_t *b, size_t request, int zero_fill, int alloc_type);

/*
 * The number of hash slots there will be allocated space for. The
 * allocator may provide more. The size returned should be
 * `sizeof(flatbuffers_uoffset_t) * count`, where the size is a power of
 * 2 (or the rest is wasted). The hash table can store many more entries
 * than slots using linear search. The table does not resize.
 */
#ifndef FLATCC_BUILDER_MIN_HASH_COUNT
#define FLATCC_BUILDER_MIN_HASH_COUNT 64
#endif

typedef struct __flatcc_builder_buffer_frame __flatcc_builder_buffer_frame_t;
struct __flatcc_builder_buffer_frame {
    flatcc_builder_identifier_t identifier;
    flatcc_builder_ref_t mark;
    flatbuffers_uoffset_t vs_end;
    flatbuffers_uoffset_t nest_id;
    uint16_t flags;
    uint16_t block_align;
};

typedef struct __flatcc_builder_vector_frame __flatcc_builder_vector_frame_t;
struct __flatcc_builder_vector_frame {
    flatbuffers_uoffset_t elem_size;
    flatbuffers_uoffset_t count;
    flatbuffers_uoffset_t max_count;
};

typedef struct __flatcc_builder_table_frame __flatcc_builder_table_frame_t;
struct __flatcc_builder_table_frame {
    flatbuffers_uoffset_t vs_end;
    flatbuffers_uoffset_t pl_end;
    uint32_t vt_hash;
    flatbuffers_voffset_t id_end;
};

/*
 * Store state for nested structures such as buffers, tables and vectors.
 *
 * For less busy data and data where access to a previous state is
 * irrelevant, the frame may store the current state directly. Otherwise
 * the current state is maintained in the flatcc_builder_t structure in a
 * possibly derived form (e.g. ds pointer instead of ds_end offset) and
 * the frame is used to store the previous state when the frame is
 * entered.
 *
 * Most operations have a start/update/end cycle the decides the
 * liftetime of a frame, but these generally also have a direct form
 * (create) that does not use a frame at all. These still do some
 * state updates notably passing min_align to parent which may also be
 * an operation without a frame following the child level operation
 * (e.g. create struct, create buffer). Ending a frame results in the
 * same kind of updates.
 */
typedef struct __flatcc_builder_frame __flatcc_builder_frame_t;
Loading ...