// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <errno.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "nanoarrow.h"
const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; }
int ArrowNanoarrowVersionInt(void) { return NANOARROW_VERSION_INT; }
ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
if (error == NULL) {
return NANOARROW_OK;
}
memset(error->message, 0, sizeof(error->message));
va_list args;
va_start(args, fmt);
int chars_needed = vsnprintf(error->message, sizeof(error->message), fmt, args);
va_end(args);
if (chars_needed < 0) {
return EINVAL;
} else if (((size_t)chars_needed) >= sizeof(error->message)) {
return ERANGE;
} else {
return NANOARROW_OK;
}
}
void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY;
layout->buffer_data_type[0] = NANOARROW_TYPE_BOOL;
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->buffer_data_type[1] = storage_type;
layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE;
layout->buffer_data_type[2] = NANOARROW_TYPE_UNINITIALIZED;
layout->element_size_bits[0] = 1;
layout->element_size_bits[1] = 0;
layout->element_size_bits[2] = 0;
layout->child_size_elements = 0;
switch (storage_type) {
case NANOARROW_TYPE_UNINITIALIZED:
case NANOARROW_TYPE_NA:
case NANOARROW_TYPE_RUN_END_ENCODED:
layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED;
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
layout->element_size_bits[0] = 0;
break;
case NANOARROW_TYPE_LIST:
case NANOARROW_TYPE_MAP:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
layout->element_size_bits[1] = 32;
break;
case NANOARROW_TYPE_LARGE_LIST:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
layout->element_size_bits[1] = 64;
break;
case NANOARROW_TYPE_STRUCT:
case NANOARROW_TYPE_FIXED_SIZE_LIST:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
break;
case NANOARROW_TYPE_BOOL:
layout->element_size_bits[1] = 1;
break;
case NANOARROW_TYPE_UINT8:
case NANOARROW_TYPE_INT8:
layout->element_size_bits[1] = 8;
break;
case NANOARROW_TYPE_UINT16:
case NANOARROW_TYPE_INT16:
case NANOARROW_TYPE_HALF_FLOAT:
layout->element_size_bits[1] = 16;
break;
case NANOARROW_TYPE_UINT32:
case NANOARROW_TYPE_INT32:
case NANOARROW_TYPE_FLOAT:
layout->element_size_bits[1] = 32;
break;
case NANOARROW_TYPE_INTERVAL_MONTHS:
layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
layout->element_size_bits[1] = 32;
break;
case NANOARROW_TYPE_UINT64:
case NANOARROW_TYPE_INT64:
case NANOARROW_TYPE_DOUBLE:
case NANOARROW_TYPE_INTERVAL_DAY_TIME:
layout->element_size_bits[1] = 64;
break;
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
layout->element_size_bits[1] = 128;
break;
case NANOARROW_TYPE_DECIMAL256:
layout->element_size_bits[1] = 256;
break;
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY;
break;
case NANOARROW_TYPE_DENSE_UNION:
layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
layout->buffer_data_type[0] = NANOARROW_TYPE_INT8;
layout->element_size_bits[0] = 8;
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET;
layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
layout->element_size_bits[1] = 32;
break;
case NANOARROW_TYPE_SPARSE_UNION:
layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
layout->buffer_data_type[0] = NANOARROW_TYPE_INT8;
layout->element_size_bits[0] = 8;
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
break;
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_BINARY:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
layout->element_size_bits[1] = 32;
layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
layout->buffer_data_type[2] = storage_type;
break;
case NANOARROW_TYPE_LARGE_STRING:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
layout->element_size_bits[1] = 64;
layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
layout->buffer_data_type[2] = NANOARROW_TYPE_STRING;
break;
case NANOARROW_TYPE_LARGE_BINARY:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
layout->element_size_bits[1] = 64;
layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY;
break;
case NANOARROW_TYPE_BINARY_VIEW:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY_VIEW;
layout->element_size_bits[1] = 128;
break;
case NANOARROW_TYPE_STRING_VIEW:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->buffer_data_type[1] = NANOARROW_TYPE_STRING_VIEW;
layout->element_size_bits[1] = 128;
default:
break;
}
}
void* ArrowMalloc(int64_t size) { return malloc(size); }
void* ArrowRealloc(void* ptr, int64_t size) { return realloc(ptr, size); }
void ArrowFree(void* ptr) { free(ptr); }
static uint8_t* ArrowBufferAllocatorMallocReallocate(
struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size,
int64_t new_size) {
NANOARROW_UNUSED(allocator);
NANOARROW_UNUSED(old_size);
return (uint8_t*)ArrowRealloc(ptr, new_size);
}
static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator* allocator,
uint8_t* ptr, int64_t size) {
NANOARROW_UNUSED(allocator);
NANOARROW_UNUSED(size);
if (ptr != NULL) {
ArrowFree(ptr);
}
}
static struct ArrowBufferAllocator ArrowBufferAllocatorMalloc = {
&ArrowBufferAllocatorMallocReallocate, &ArrowBufferAllocatorMallocFree, NULL};
struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void) {
return ArrowBufferAllocatorMalloc;
}
static uint8_t* ArrowBufferDeallocatorReallocate(struct ArrowBufferAllocator* allocator,
uint8_t* ptr, int64_t old_size,
int64_t new_size) {
NANOARROW_UNUSED(new_size);
// Attempting to reallocate a buffer with a custom deallocator is
// a programming error. In debug mode, crash here.
#if defined(NANOARROW_DEBUG)
NANOARROW_PRINT_AND_DIE(ENOMEM,
"It is an error to reallocate a buffer whose allocator is "
"ArrowBufferDeallocator()");
#endif
// In release mode, ensure the the deallocator is called exactly
// once using the pointer it was given and return NULL, which
// will trigger the caller to return ENOMEM.
allocator->free(allocator, ptr, old_size);
*allocator = ArrowBufferAllocatorDefault();
return NULL;
}
struct ArrowBufferAllocator ArrowBufferDeallocator(
void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
int64_t size),
void* private_data) {
struct ArrowBufferAllocator allocator;
allocator.reallocate = &ArrowBufferDeallocatorReallocate;
allocator.free = custom_free;
allocator.private_data = private_data;
return allocator;
}
static const int kInt32DecimalDigits = 9;
static const uint64_t kUInt32PowersOfTen[] = {
1ULL, 10ULL, 100ULL, 1000ULL, 10000ULL,
100000ULL, 1000000ULL, 10000000ULL, 100000000ULL, 1000000000ULL};
// Adapted from Arrow C++ to use 32-bit words for better C portability
// https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L524-L544
static void ShiftAndAdd(struct ArrowStringView value, uint32_t* out, int64_t out_size) {
// We use strtoll for parsing, which needs input that is null-terminated
char chunk_string[16];
for (int64_t posn = 0; posn < value.size_bytes;) {
int64_t remaining = value.size_bytes - posn;
int64_t group_size;
if (remaining > kInt32DecimalDigits) {
group_size = kInt32DecimalDigits;
} else {
group_size = remaining;
}
const uint64_t multiple = kUInt32PowersOfTen[group_size];
memcpy(chunk_string, value.data + posn, group_size);
chunk_string[group_size] = '\0';
uint32_t chunk = (uint32_t)strtoll(chunk_string, NULL, 10);
for (int64_t i = 0; i < out_size; i++) {
uint64_t tmp = out[i];
tmp *= multiple;
tmp += chunk;
out[i] = (uint32_t)(tmp & 0xFFFFFFFFULL);
chunk = (uint32_t)(tmp >> 32);
}
posn += group_size;
}
}
ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal,
struct ArrowStringView value) {
// Check for sign
int is_negative = value.data[0] == '-';
int has_sign = is_negative || value.data[0] == '+';
value.data += has_sign;
value.size_bytes -= has_sign;
// Check all characters are digits that are not the negative sign
for (int64_t i = 0; i < value.size_bytes; i++) {
char c = value.data[i];
if (c < '0' || c > '9') {
return EINVAL;
}
}
// Skip over leading 0s
int64_t n_leading_zeroes = 0;
for (int64_t i = 0; i < value.size_bytes; i++) {
if (value.data[i] == '0') {
n_leading_zeroes++;
} else {
break;
}
}
value.data += n_leading_zeroes;
value.size_bytes -= n_leading_zeroes;
// Use 32-bit words for portability
uint32_t words32[8];
int n_words32 = decimal->n_words * 2;
NANOARROW_DCHECK(n_words32 <= 8);
memset(words32, 0, sizeof(words32));
ShiftAndAdd(value, words32, n_words32);
if (decimal->low_word_index == 0) {
memcpy(decimal->words, words32, sizeof(uint32_t) * n_words32);
} else {
uint64_t lo;
uint64_t hi;
for (int i = 0; i < decimal->n_words; i++) {
lo = (uint64_t)words32[i * 2];
hi = (uint64_t)words32[i * 2 + 1] << 32;
decimal->words[decimal->n_words - i - 1] = lo | hi;
}
Loading ...