Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

arrow-nightlies / nanoarrow   python

Repository URL to install this package:

Version: 0.7.0.dev132 

/ vendor / nanoarrow.c

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include <errno.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "nanoarrow.h"

const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; }

int ArrowNanoarrowVersionInt(void) { return NANOARROW_VERSION_INT; }

ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
  if (error == NULL) {
    return NANOARROW_OK;
  }

  memset(error->message, 0, sizeof(error->message));

  va_list args;
  va_start(args, fmt);
  int chars_needed = vsnprintf(error->message, sizeof(error->message), fmt, args);
  va_end(args);

  if (chars_needed < 0) {
    return EINVAL;
  } else if (((size_t)chars_needed) >= sizeof(error->message)) {
    return ERANGE;
  } else {
    return NANOARROW_OK;
  }
}

void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
  layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY;
  layout->buffer_data_type[0] = NANOARROW_TYPE_BOOL;
  layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
  layout->buffer_data_type[1] = storage_type;
  layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE;
  layout->buffer_data_type[2] = NANOARROW_TYPE_UNINITIALIZED;

  layout->element_size_bits[0] = 1;
  layout->element_size_bits[1] = 0;
  layout->element_size_bits[2] = 0;

  layout->child_size_elements = 0;

  switch (storage_type) {
    case NANOARROW_TYPE_UNINITIALIZED:
    case NANOARROW_TYPE_NA:
    case NANOARROW_TYPE_RUN_END_ENCODED:
      layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
      layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED;
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
      layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
      layout->element_size_bits[0] = 0;
      break;

    case NANOARROW_TYPE_LIST:
    case NANOARROW_TYPE_MAP:
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
      layout->element_size_bits[1] = 32;
      break;

    case NANOARROW_TYPE_LARGE_LIST:
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
      layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
      layout->element_size_bits[1] = 64;
      break;

    case NANOARROW_TYPE_STRUCT:
    case NANOARROW_TYPE_FIXED_SIZE_LIST:
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
      layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
      break;

    case NANOARROW_TYPE_BOOL:
      layout->element_size_bits[1] = 1;
      break;

    case NANOARROW_TYPE_UINT8:
    case NANOARROW_TYPE_INT8:
      layout->element_size_bits[1] = 8;
      break;

    case NANOARROW_TYPE_UINT16:
    case NANOARROW_TYPE_INT16:
    case NANOARROW_TYPE_HALF_FLOAT:
      layout->element_size_bits[1] = 16;
      break;

    case NANOARROW_TYPE_UINT32:
    case NANOARROW_TYPE_INT32:
    case NANOARROW_TYPE_FLOAT:
      layout->element_size_bits[1] = 32;
      break;
    case NANOARROW_TYPE_INTERVAL_MONTHS:
      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
      layout->element_size_bits[1] = 32;
      break;

    case NANOARROW_TYPE_UINT64:
    case NANOARROW_TYPE_INT64:
    case NANOARROW_TYPE_DOUBLE:
    case NANOARROW_TYPE_INTERVAL_DAY_TIME:
      layout->element_size_bits[1] = 64;
      break;

    case NANOARROW_TYPE_DECIMAL128:
    case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
      layout->element_size_bits[1] = 128;
      break;

    case NANOARROW_TYPE_DECIMAL256:
      layout->element_size_bits[1] = 256;
      break;

    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
      layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY;
      break;

    case NANOARROW_TYPE_DENSE_UNION:
      layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
      layout->buffer_data_type[0] = NANOARROW_TYPE_INT8;
      layout->element_size_bits[0] = 8;
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET;
      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
      layout->element_size_bits[1] = 32;
      break;

    case NANOARROW_TYPE_SPARSE_UNION:
      layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
      layout->buffer_data_type[0] = NANOARROW_TYPE_INT8;
      layout->element_size_bits[0] = 8;
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
      layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
      break;

    case NANOARROW_TYPE_STRING:
    case NANOARROW_TYPE_BINARY:
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
      layout->element_size_bits[1] = 32;
      layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
      layout->buffer_data_type[2] = storage_type;
      break;

    case NANOARROW_TYPE_LARGE_STRING:
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
      layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
      layout->element_size_bits[1] = 64;
      layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
      layout->buffer_data_type[2] = NANOARROW_TYPE_STRING;
      break;
    case NANOARROW_TYPE_LARGE_BINARY:
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
      layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
      layout->element_size_bits[1] = 64;
      layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
      layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY;
      break;

    case NANOARROW_TYPE_BINARY_VIEW:
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
      layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY_VIEW;
      layout->element_size_bits[1] = 128;
      break;
    case NANOARROW_TYPE_STRING_VIEW:
      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
      layout->buffer_data_type[1] = NANOARROW_TYPE_STRING_VIEW;
      layout->element_size_bits[1] = 128;

    default:
      break;
  }
}

void* ArrowMalloc(int64_t size) { return malloc(size); }

void* ArrowRealloc(void* ptr, int64_t size) { return realloc(ptr, size); }

void ArrowFree(void* ptr) { free(ptr); }

static uint8_t* ArrowBufferAllocatorMallocReallocate(
    struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size,
    int64_t new_size) {
  NANOARROW_UNUSED(allocator);
  NANOARROW_UNUSED(old_size);
  return (uint8_t*)ArrowRealloc(ptr, new_size);
}

static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator* allocator,
                                           uint8_t* ptr, int64_t size) {
  NANOARROW_UNUSED(allocator);
  NANOARROW_UNUSED(size);
  if (ptr != NULL) {
    ArrowFree(ptr);
  }
}

static struct ArrowBufferAllocator ArrowBufferAllocatorMalloc = {
    &ArrowBufferAllocatorMallocReallocate, &ArrowBufferAllocatorMallocFree, NULL};

struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void) {
  return ArrowBufferAllocatorMalloc;
}

static uint8_t* ArrowBufferDeallocatorReallocate(struct ArrowBufferAllocator* allocator,
                                                 uint8_t* ptr, int64_t old_size,
                                                 int64_t new_size) {
  NANOARROW_UNUSED(new_size);

  // Attempting to reallocate a buffer with a custom deallocator is
  // a programming error. In debug mode, crash here.
#if defined(NANOARROW_DEBUG)
  NANOARROW_PRINT_AND_DIE(ENOMEM,
                          "It is an error to reallocate a buffer whose allocator is "
                          "ArrowBufferDeallocator()");
#endif

  // In release mode, ensure the the deallocator is called exactly
  // once using the pointer it was given and return NULL, which
  // will trigger the caller to return ENOMEM.
  allocator->free(allocator, ptr, old_size);
  *allocator = ArrowBufferAllocatorDefault();
  return NULL;
}

struct ArrowBufferAllocator ArrowBufferDeallocator(
    void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
                        int64_t size),
    void* private_data) {
  struct ArrowBufferAllocator allocator;
  allocator.reallocate = &ArrowBufferDeallocatorReallocate;
  allocator.free = custom_free;
  allocator.private_data = private_data;
  return allocator;
}

static const int kInt32DecimalDigits = 9;

static const uint64_t kUInt32PowersOfTen[] = {
    1ULL,      10ULL,      100ULL,      1000ULL,      10000ULL,
    100000ULL, 1000000ULL, 10000000ULL, 100000000ULL, 1000000000ULL};

// Adapted from Arrow C++ to use 32-bit words for better C portability
// https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L524-L544
static void ShiftAndAdd(struct ArrowStringView value, uint32_t* out, int64_t out_size) {
  // We use strtoll for parsing, which needs input that is null-terminated
  char chunk_string[16];

  for (int64_t posn = 0; posn < value.size_bytes;) {
    int64_t remaining = value.size_bytes - posn;

    int64_t group_size;
    if (remaining > kInt32DecimalDigits) {
      group_size = kInt32DecimalDigits;
    } else {
      group_size = remaining;
    }

    const uint64_t multiple = kUInt32PowersOfTen[group_size];

    memcpy(chunk_string, value.data + posn, group_size);
    chunk_string[group_size] = '\0';
    uint32_t chunk = (uint32_t)strtoll(chunk_string, NULL, 10);

    for (int64_t i = 0; i < out_size; i++) {
      uint64_t tmp = out[i];
      tmp *= multiple;
      tmp += chunk;
      out[i] = (uint32_t)(tmp & 0xFFFFFFFFULL);
      chunk = (uint32_t)(tmp >> 32);
    }
    posn += group_size;
  }
}

ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal,
                                     struct ArrowStringView value) {
  // Check for sign
  int is_negative = value.data[0] == '-';
  int has_sign = is_negative || value.data[0] == '+';
  value.data += has_sign;
  value.size_bytes -= has_sign;

  // Check all characters are digits that are not the negative sign
  for (int64_t i = 0; i < value.size_bytes; i++) {
    char c = value.data[i];
    if (c < '0' || c > '9') {
      return EINVAL;
    }
  }

  // Skip over leading 0s
  int64_t n_leading_zeroes = 0;
  for (int64_t i = 0; i < value.size_bytes; i++) {
    if (value.data[i] == '0') {
      n_leading_zeroes++;
    } else {
      break;
    }
  }

  value.data += n_leading_zeroes;
  value.size_bytes -= n_leading_zeroes;

  // Use 32-bit words for portability
  uint32_t words32[8];
  int n_words32 = decimal->n_words * 2;
  NANOARROW_DCHECK(n_words32 <= 8);
  memset(words32, 0, sizeof(words32));

  ShiftAndAdd(value, words32, n_words32);

  if (decimal->low_word_index == 0) {
    memcpy(decimal->words, words32, sizeof(uint32_t) * n_words32);
  } else {
    uint64_t lo;
    uint64_t hi;

    for (int i = 0; i < decimal->n_words; i++) {
      lo = (uint64_t)words32[i * 2];
      hi = (uint64_t)words32[i * 2 + 1] << 32;
      decimal->words[decimal->n_words - i - 1] = lo | hi;
    }
Loading ...