#pragma once
#include <cstdint>
namespace caffe2 {
namespace math {
// Returns the quantized and compressed values of floating inputs
// The "fused" representation stores the [bitwidth][tail][min][max]
// with the quantized data in one array. Since we store 8/bitwidth
// quantized data in one byte, the last buckets of some bytes may have
// unused bits. There are totally tail buckets are unused.
// We encode *bitwidth* and *tail* at the beginning,
// following by 32-bit floating data respresenting min and max.
// | bitwidth | tail | min | max | ... int8 data ... |
// | 1B | 1B | 4B | 4B | ...output_data....|
// In output_data: the b-th bucket of the i-th byte stores
// the i-th data of the b-th segment of input row
void quantize_and_compress(
const float* input_data,
std::uint8_t* output_data,
std::uint64_t input_size,
std::uint64_t bitwidth,
bool random,
const float* random_buffer);
void decompress_and_dequantize(
const std::uint8_t* input_data,
float* output_data,
std::uint64_t input_size);
} // namespace math
} // namespace caffe2