// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include "arrow/buffer.h"
#include "arrow/compare.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
namespace arrow {
static inline bool is_tensor_supported(Type::type type_id) {
switch (type_id) {
case Type::UINT8:
case Type::INT8:
case Type::UINT16:
case Type::INT16:
case Type::UINT32:
case Type::INT32:
case Type::UINT64:
case Type::INT64:
case Type::HALF_FLOAT:
case Type::FLOAT:
case Type::DOUBLE:
return true;
default:
break;
}
return false;
}
namespace internal {
ARROW_EXPORT
Status ComputeRowMajorStrides(const FixedWidthType& type,
const std::vector<int64_t>& shape,
std::vector<int64_t>* strides);
ARROW_EXPORT
Status ComputeColumnMajorStrides(const FixedWidthType& type,
const std::vector<int64_t>& shape,
std::vector<int64_t>* strides);
ARROW_EXPORT
bool IsTensorStridesContiguous(const std::shared_ptr<DataType>& type,
const std::vector<int64_t>& shape,
const std::vector<int64_t>& strides);
ARROW_EXPORT
Status ValidateTensorParameters(const std::shared_ptr<DataType>& type,
const std::shared_ptr<Buffer>& data,
const std::vector<int64_t>& shape,
const std::vector<int64_t>& strides,
const std::vector<std::string>& dim_names);
ARROW_EXPORT
Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major,
MemoryPool* pool, std::shared_ptr<Tensor>* tensor);
} // namespace internal
class ARROW_EXPORT Tensor {
public:
/// \brief Create a Tensor with full parameters
///
/// This factory function will return Status::Invalid when the parameters are
/// inconsistent
///
/// \param[in] type The data type of the tensor values
/// \param[in] data The buffer of the tensor content
/// \param[in] shape The shape of the tensor
/// \param[in] strides The strides of the tensor
/// (if this is empty, the data assumed to be row-major)
/// \param[in] dim_names The names of the tensor dimensions
static inline Result<std::shared_ptr<Tensor>> Make(
const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
const std::vector<int64_t>& shape, const std::vector<int64_t>& strides = {},
const std::vector<std::string>& dim_names = {}) {
ARROW_RETURN_NOT_OK(
internal::ValidateTensorParameters(type, data, shape, strides, dim_names));
return std::make_shared<Tensor>(type, data, shape, strides, dim_names);
}
virtual ~Tensor() = default;
/// Constructor with no dimension names or strides, data assumed to be row-major
Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
const std::vector<int64_t>& shape);
/// Constructor with non-negative strides
Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
const std::vector<int64_t>& shape, const std::vector<int64_t>& strides);
/// Constructor with non-negative strides and dimension names
Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
const std::vector<int64_t>& shape, const std::vector<int64_t>& strides,
const std::vector<std::string>& dim_names);
std::shared_ptr<DataType> type() const { return type_; }
std::shared_ptr<Buffer> data() const { return data_; }
const uint8_t* raw_data() const { return data_->data(); }
uint8_t* raw_mutable_data() { return data_->mutable_data(); }
const std::vector<int64_t>& shape() const { return shape_; }
const std::vector<int64_t>& strides() const { return strides_; }
int ndim() const { return static_cast<int>(shape_.size()); }
const std::vector<std::string>& dim_names() const { return dim_names_; }
const std::string& dim_name(int i) const;
/// Total number of value cells in the tensor
int64_t size() const;
/// Return true if the underlying data buffer is mutable
bool is_mutable() const { return data_->is_mutable(); }
/// Either row major or column major
bool is_contiguous() const;
/// AKA "C order"
bool is_row_major() const;
/// AKA "Fortran order"
bool is_column_major() const;
Type::type type_id() const;
bool Equals(const Tensor& other, const EqualOptions& = EqualOptions::Defaults()) const;
/// Compute the number of non-zero values in the tensor
Result<int64_t> CountNonZero() const;
/// Return the offset of the given index on the given strides
static int64_t CalculateValueOffset(const std::vector<int64_t>& strides,
const std::vector<int64_t>& index) {
const int64_t n = static_cast<int64_t>(index.size());
int64_t offset = 0;
for (int64_t i = 0; i < n; ++i) {
offset += index[i] * strides[i];
}
return offset;
}
int64_t CalculateValueOffset(const std::vector<int64_t>& index) const {
return Tensor::CalculateValueOffset(strides_, index);
}
/// Returns the value at the given index without data-type and bounds checks
template <typename ValueType>
const typename ValueType::c_type& Value(const std::vector<int64_t>& index) const {
using c_type = typename ValueType::c_type;
const int64_t offset = CalculateValueOffset(index);
const c_type* ptr = reinterpret_cast<const c_type*>(raw_data() + offset);
return *ptr;
}
Status Validate() const {
return internal::ValidateTensorParameters(type_, data_, shape_, strides_, dim_names_);
}
protected:
Tensor() {}
std::shared_ptr<DataType> type_;
std::shared_ptr<Buffer> data_;
std::vector<int64_t> shape_;
std::vector<int64_t> strides_;
/// These names are optional
std::vector<std::string> dim_names_;
template <typename SparseIndexType>
friend class SparseTensorImpl;
private:
ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor);
};
template <typename TYPE>
class NumericTensor : public Tensor {
public:
using TypeClass = TYPE;
using value_type = typename TypeClass::c_type;
/// \brief Create a NumericTensor with full parameters
///
/// This factory function will return Status::Invalid when the parameters are
/// inconsistent
///
/// \param[in] data The buffer of the tensor content
/// \param[in] shape The shape of the tensor
/// \param[in] strides The strides of the tensor
/// (if this is empty, the data assumed to be row-major)
/// \param[in] dim_names The names of the tensor dimensions
static Result<std::shared_ptr<NumericTensor<TYPE>>> Make(
const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
const std::vector<int64_t>& strides = {},
const std::vector<std::string>& dim_names = {}) {
ARROW_RETURN_NOT_OK(internal::ValidateTensorParameters(
TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names));
return std::make_shared<NumericTensor<TYPE>>(data, shape, strides, dim_names);
}
/// Constructor with non-negative strides and dimension names
NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
const std::vector<int64_t>& strides,
const std::vector<std::string>& dim_names)
: Tensor(TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names) {}
/// Constructor with no dimension names or strides, data assumed to be row-major
NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape)
: NumericTensor(data, shape, {}, {}) {}
/// Constructor with non-negative strides
NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
const std::vector<int64_t>& strides)
: NumericTensor(data, shape, strides, {}) {}
const value_type& Value(const std::vector<int64_t>& index) const {
return Tensor::Value<TypeClass>(index);
}
};
} // namespace arrow