#ifndef CAFFE2_CORE_QTENSOR_H_
#define CAFFE2_CORE_QTENSOR_H_
#include <algorithm>
#include <climits>
#include <cstddef>
#include <vector>
#include "caffe2/core/common.h"
#include "caffe2/core/context.h"
#include "caffe2/core/tensor.h"
#include <c10/util/typeid.h>
namespace caffe2 {
template <class Context>
class C10_EXPORT QTensor {
public:
QTensor() {}
virtual ~QTensor() {}
/**
* @brief Creates a quantized tensor of the given dimension.
*
* Note that the actual data allocation is not going to be carried out until
* the first time mutable_data() is called.
*
* The underlying storage of the quantized tensor interleaves elements
* by bit depth.
*
* Labeled memory for tensor of size 6, precision 3
* [ E1[0] E2[0] E3[0] E4[0] E5[0] E6[0] ] // Least significant Bits
* [ E1[1] E2[1] E3[1] E4[1] E5[1] E6[1] ]
* [ E1[2] E2[2] E3[2] E4[2] E5[2] E6[2] ]
*
* In the case of sign bits (see enable_sign argument), an extra bit
* per element is added:
*
* Labeled memory for tensor of size 6, precision 3, sign bit enabled
* [ E1[0] E2[0] E3[0] E4[0] E5[0] E6[0] ]
* [ E1[1] E2[1] E3[1] E4[1] E5[1] E6[1] ]
* [ E1[2] E2[2] E3[2] E4[2] E5[2] E6[2] ]
* [ E1[s] E2[s] E3[s] E4[s] E5[s] E6[s] ]
* Where 's' is 1 if E is negative
*
* The reason for this layout is the ability to efficiently multiply
* many low precision integers as a sum of popcnt(A & B) * 1 << bit.
* Explained here: https://arxiv.org/abs/1606.06160
*/
// TODO: changing at::ArrayRef<int> to at::ArrayRef<int64_t>?
explicit QTensor(
at::ArrayRef<int> dims,
const unsigned char precision,
const bool signbit = false)
: precision_(precision), signed_(signbit) {
Resize(dims);
}
void Resize(at::ArrayRef<int> dim_source) {
if (dims_ != dim_source) {
size_t source_size = std::accumulate(
dim_source.begin(), dim_source.end(), 1, std::multiplies<int>());
if ((source_size * (precision_ + signed_)) > capacity_) {
data_ptr_.clear();
capacity_ = 0;
}
dims_ = dim_source.vec();
size_ = source_size;
}
}
void
SetBitAtIndex(const unsigned char bit, const size_t index, const bool value) {
// Get the mutable data at bit depth `bit`.
unsigned char* d = mutable_data();
CAFFE_ENFORCE(
bit < precision_ + signed_,
"Attempted to a set a bit that is not allocated.");
CAFFE_ENFORCE(bit * aligned_size() < capacity_);
auto idx = (aligned_size() * bit) / CHAR_BIT;
d = &d[idx];
idx = index / CHAR_BIT;
auto shift = CHAR_BIT - (index % CHAR_BIT) - 1;
if (value) {
d[idx] |= 1 << shift;
} else {
d[idx] &= ~(1 << shift);
}
}
bool GetBitAtIndex(const unsigned char bit, const size_t index) const {
// Get the data at bit depth `bit`
const unsigned char* d = data();
auto idx = (aligned_size() * bit) / CHAR_BIT;
d = &d[idx];
idx = index / CHAR_BIT;
auto shift = CHAR_BIT - (index % CHAR_BIT) - 1;
return d[idx] & (1 << shift);
}
void SetPrecision(const unsigned char precision) {
precision_ = precision;
data_ptr_.clear();
}
void SetSigned(const bool make_signed = true) {
signed_ = make_signed;
data_ptr_.clear();
}
void SetScale(const double scale) {
scale_ = scale;
}
void SetBias(const double bias) {
bias_ = bias;
}
unsigned char* mutable_data() {
if (!data_ptr_) {
data_ptr_ = Context::New(nbytes());
capacity_ = nbytes() * CHAR_BIT;
}
CAFFE_ENFORCE(capacity_ == nbytes() * CHAR_BIT);
return static_cast<unsigned char*>(data_ptr_.get());
}
inline const unsigned char* data() const {
return static_cast<unsigned char*>(data_ptr_.get());
}
inline size_t size() const {
return size_;
}
inline unsigned char alignment() const {
return alignment_;
}
inline unsigned char precision() const {
return precision_;
}
inline at::ArrayRef<int> sizes() const {
return dims_;
}
// TODO: deprecate?
inline at::ArrayRef<int> dims() const {
return dims_;
}
inline bool is_signed() const {
return signed_;
}
/**
* Returns the number of dimensions of the data.
*/
inline int ndim() const {
return dims_.size();
}
inline size_t aligned_size() const {
return alignment_ * ((size_ + alignment_ - 1) / alignment_);
}
inline size_t nbytes() const {
return (aligned_size() * (precision_ + signed_)) / CHAR_BIT;
}
inline double scale() const {
return scale_;
}
inline double bias() const {
return bias_;
}
/**
* Returns the i-th dimension of the qtensor in int.
*/
inline int dim32(const int i) const {
DCHECK_LT(i, dims_.size()) << "Exceeding ndim limit " << dims_.size();
DCHECK_GE(i, 0) << "Cannot have negative index";
CAFFE_ENFORCE_LT(dims_[i], std::numeric_limits<int>::max());
return static_cast<int>(dims_[i]);
}
/**
* Returns the 'canonical' version of a (usually) user-specified axis,
* allowing for negative indexing (e.g., -1 for the last axis).
*
* @param axis_index the axis index.
* If 0 <= index < ndim(), return index.
* If -ndim <= index <= -1, return (ndim() - (-index)),
* e.g., the last axis index (ndim() - 1) if index == -1,
* the second to last if index == -2, etc.
* Dies on out of range index.
*/
inline int canonical_axis_index(int axis_index) const {
CAFFE_ENFORCE_GE(axis_index, -ndim());
CAFFE_ENFORCE_LT(axis_index, ndim());
if (axis_index < 0) {
return axis_index + ndim();
}
return axis_index;
}
/**
* Return product of all dimensions starting from K.
*/
inline int64_t size_from_dim(int k) const {
int64_t r = 1;
for (int i = k; i < dims_.size(); ++i) {
r *= dims_[i];
}
return r;
}
/**
* Product of all dims up to.
*/
inline int64_t size_to_dim(int k) const {
CAFFE_ENFORCE(k < dims_.size());
int64_t r = 1;
for (int i = 0; i < k; ++i) {
r *= dims_[i];
}
return r;
}
protected:
std::vector<int> dims_;
size_t size_ = 0;
// Precision in bits.
unsigned char precision_ = CHAR_BIT;
// Bit alignment.
unsigned char alignment_ = CHAR_BIT;
// Allocated data.
at::DataPtr data_ptr_;
// value = scale_ * (x + bias_)
double scale_;
double bias_;
bool signed_ = false;
// Capacity in bits.
size_t capacity_ = 0;
};
} // namespace caffe2
#endif // CAFFE2_CORE_QTENSOR_H_