Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

neilisaac / torch   python

Repository URL to install this package:

/ include / caffe2 / serialize / crc_alt.h

#pragma once 

// //////////////////////////////////////////////////////////
// Crc32.h
// Copyright (c) 2011-2019 Stephan Brumme. All rights reserved.
// Slicing-by-16 contributed by Bulat Ziganshin
// Tableless bytewise CRC contributed by Hagai Gold
// see http://create.stephan-brumme.com/disclaimer.html
//

// if running on an embedded system, you might consider shrinking the
// big Crc32Lookup table by undefining these lines:
#define CRC32_USE_LOOKUP_TABLE_BYTE
#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_4
#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_8
#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_16
// - crc32_bitwise  doesn't need it at all
// - crc32_halfbyte has its own small lookup table
// - crc32_1byte_tableless and crc32_1byte_tableless2 don't need it at all
// - crc32_1byte    needs only Crc32Lookup[0]
// - crc32_4bytes   needs only Crc32Lookup[0..3]
// - crc32_8bytes   needs only Crc32Lookup[0..7]
// - crc32_4x8bytes needs only Crc32Lookup[0..7]
// - crc32_16bytes  needs all of Crc32Lookup
// using the aforementioned #defines the table is automatically fitted to your needs

// uint8_t, uint32_t, int32_t
#include <stdint.h>
// size_t
#include <cstddef>

// crc32_fast selects the fastest algorithm depending on flags (CRC32_USE_LOOKUP_...)
/// compute CRC32 using the fastest algorithm for large datasets on modern CPUs
uint32_t crc32_fast    (const void* data, size_t length, uint32_t previousCrc32 = 0);

/// merge two CRC32 such that result = crc32(dataB, lengthB, crc32(dataA, lengthA))
uint32_t crc32_combine (uint32_t crcA, uint32_t crcB, size_t lengthB);

/// compute CRC32 (bitwise algorithm)
uint32_t crc32_bitwise (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (half-byte algoritm)
uint32_t crc32_halfbyte(const void* data, size_t length, uint32_t previousCrc32 = 0);

#ifdef CRC32_USE_LOOKUP_TABLE_BYTE
/// compute CRC32 (standard algorithm)
uint32_t crc32_1byte   (const void* data, size_t length, uint32_t previousCrc32 = 0);
#endif

/// compute CRC32 (byte algorithm) without lookup tables
uint32_t crc32_1byte_tableless (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (byte algorithm) without lookup tables
uint32_t crc32_1byte_tableless2(const void* data, size_t length, uint32_t previousCrc32 = 0);

#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_4
/// compute CRC32 (Slicing-by-4 algorithm)
uint32_t crc32_4bytes  (const void* data, size_t length, uint32_t previousCrc32 = 0);
#endif

#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_8
/// compute CRC32 (Slicing-by-8 algorithm)
uint32_t crc32_8bytes  (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (Slicing-by-8 algorithm), unroll inner loop 4 times
uint32_t crc32_4x8bytes(const void* data, size_t length, uint32_t previousCrc32 = 0);
#endif

#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_16
/// compute CRC32 (Slicing-by-16 algorithm)
uint32_t crc32_16bytes (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (Slicing-by-16 algorithm, prefetch upcoming data blocks)
uint32_t crc32_16bytes_prefetch(const void* data, size_t length, uint32_t previousCrc32 = 0, size_t prefetchAhead = 256);
#endif

// //////////////////////////////////////////////////////////
// Crc32.cpp
// Copyright (c) 2011-2019 Stephan Brumme. All rights reserved.
// Slicing-by-16 contributed by Bulat Ziganshin
// Tableless bytewise CRC contributed by Hagai Gold
// see http://create.stephan-brumme.com/disclaimer.html
//

// if running on an embedded system, you might consider shrinking the
// big Crc32Lookup table:
// - crc32_bitwise  doesn't need it at all
// - crc32_halfbyte has its own small lookup table
// - crc32_1byte    needs only Crc32Lookup[0]
// - crc32_4bytes   needs only Crc32Lookup[0..3]
// - crc32_8bytes   needs only Crc32Lookup[0..7]
// - crc32_4x8bytes needs only Crc32Lookup[0..7]
// - crc32_16bytes  needs all of Crc32Lookup


#ifndef __LITTLE_ENDIAN
  #define __LITTLE_ENDIAN 1234
#endif
#ifndef __BIG_ENDIAN
  #define __BIG_ENDIAN    4321
#endif

// define endianess and some integer data types
#if defined(_MSC_VER) || defined(__MINGW32__)
  // Windows always little endian
  #define __BYTE_ORDER __LITTLE_ENDIAN

  // intrinsics / prefetching
  #include <xmmintrin.h>
  #ifdef __MINGW32__
    #define PREFETCH(location) __builtin_prefetch(location)
  #else
    #define PREFETCH(location) _mm_prefetch(location, _MM_HINT_T0)
  #endif
#elif defined(__APPLE__)
  #include <TargetConditionals.h>
    #if TARGET_IPHONE_SIMULATOR
      #define __BYTE_ORDER __LITTLE_ENDIAN
    #elif TARGET_OS_IPHONE
      #define __BYTE_ORDER __LITTLE_ENDIAN
    #elif TARGET_OS_MAC
      #include <machine/endian.h>
      #if defined(__BIG_ENDIAN__)
          #define __BYTE_ORDER __BIG_ENDIAN
      #endif
      #if defined(__LITTLE_ENDIAN__)
        #define __BYTE_ORDER __LITTLE_ENDIAN
      #endif
    #else
      # error "Unknown Apple platform"
    #endif
#elif defined(__ARMEB__)
  #define __BYTE_ORDER __BIG_ENDIAN
#elif defined(__BYTE_ORDER__)
  #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
      #define __BYTE_ORDER __BIG_ENDIAN
  #else
      #define __BYTE_ORDER __LITTLE_ENDIAN
  #endif
#else
  // defines __BYTE_ORDER as __LITTLE_ENDIAN or __BIG_ENDIAN
  #include <sys/param.h>
#endif

// intrinsics / prefetching
#ifdef __GNUC__
  #define PREFETCH(location) __builtin_prefetch(location)
#else
  // no prefetching
  #define PREFETCH(location) ;
#endif

// abort if byte order is undefined
#ifndef __BYTE_ORDER
#error undefined byte order, compile with -D__BYTE_ORDER=1234 (if little endian) or -D__BYTE_ORDER=4321 (big endian)
#endif


namespace
{
  /// zlib's CRC32 polynomial
  const uint32_t Polynomial = 0xEDB88320;

  /// swap endianess
  static inline uint32_t swap(uint32_t x)
  {
  #if defined(__GNUC__) || defined(__clang__)
    return __builtin_bswap32(x);
  #else
    return (x >> 24) |
          ((x >>  8) & 0x0000FF00) |
          ((x <<  8) & 0x00FF0000) |
           (x << 24);
  #endif
  }

  /// Slicing-By-16
  #ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_16
  const size_t MaxSlice = 16;
  #elif defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_8)
  const size_t MaxSlice = 8;
  #elif defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_4)
  const size_t MaxSlice = 4;
  #elif defined(CRC32_USE_LOOKUP_TABLE_BYTE)
  const size_t MaxSlice = 1;
  #else
    #define NO_LUT // don't need Crc32Lookup at all
  #endif

} // anonymous namespace

#ifndef NO_LUT
/// forward declaration, table is at the end of this file
extern const uint32_t Crc32Lookup[MaxSlice][256]; // extern is needed to keep compiler happy
#endif


/// compute CRC32 (bitwise algorithm)
uint32_t crc32_bitwise(const void* data, size_t length, uint32_t previousCrc32)
{
  uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
  const uint8_t* current = (const uint8_t*) data;

  while (length-- != 0)
  {
    crc ^= *current++;

    for (int j = 0; j < 8; j++)
    {
      // branch-free
      crc = (crc >> 1) ^ (-int32_t(crc & 1) & Polynomial);

      // branching, much slower:
      //if (crc & 1)
      //  crc = (crc >> 1) ^ Polynomial;
      //else
      //  crc =  crc >> 1;
    }
  }

  return ~crc; // same as crc ^ 0xFFFFFFFF
}


/// compute CRC32 (half-byte algoritm)
uint32_t crc32_halfbyte(const void* data, size_t length, uint32_t previousCrc32)
{
  uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
  const uint8_t* current = (const uint8_t*) data;

  /// look-up table for half-byte, same as crc32Lookup[0][16*i]
  static const uint32_t Crc32Lookup16[16] =
  {
    0x00000000,0x1DB71064,0x3B6E20C8,0x26D930AC,0x76DC4190,0x6B6B51F4,0x4DB26158,0x5005713C,
    0xEDB88320,0xF00F9344,0xD6D6A3E8,0xCB61B38C,0x9B64C2B0,0x86D3D2D4,0xA00AE278,0xBDBDF21C
  };

  while (length-- != 0)
  {
    crc = Crc32Lookup16[(crc ^  *current      ) & 0x0F] ^ (crc >> 4);
    crc = Crc32Lookup16[(crc ^ (*current >> 4)) & 0x0F] ^ (crc >> 4);
    current++;
  }

  return ~crc; // same as crc ^ 0xFFFFFFFF
}


#ifdef CRC32_USE_LOOKUP_TABLE_BYTE
/// compute CRC32 (standard algorithm)
uint32_t crc32_1byte(const void* data, size_t length, uint32_t previousCrc32)
{
  uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
  const uint8_t* current = (const uint8_t*) data;

  while (length-- != 0)
    crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *current++];

  return ~crc; // same as crc ^ 0xFFFFFFFF
}
#endif


/// compute CRC32 (byte algorithm) without lookup tables
uint32_t crc32_1byte_tableless(const void* data, size_t length, uint32_t previousCrc32)
{
  uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
  const uint8_t* current = (const uint8_t*) data;

  while (length-- != 0)
  {
    uint8_t s = uint8_t(crc) ^ *current++;

    // Hagai Gold made me aware of this table-less algorithm and send me code

    // polynomial 0xEDB88320 can be written in binary as 11101101101110001000001100100000b
    // reverse the bits (or just assume bit 0 is the first one)
    // and we have bits set at position 0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 16, 22, 23, 26
    // => those are the shift offsets:
    //crc = (crc >> 8) ^
    //       t ^
    //      (t >>  1) ^ (t >>  2) ^ (t >>  4) ^ (t >>  5) ^  // == y
    //      (t >>  7) ^ (t >>  8) ^ (t >> 10) ^ (t >> 11) ^  // == y >> 6
    //      (t >> 12) ^ (t >> 16) ^                          // == z
    //      (t >> 22) ^ (t >> 26) ^                          // == z >> 10
    //      (t >> 23);

    // the fastest I can come up with:
    uint32_t low = (s ^ (s << 6)) & 0xFF;
    uint32_t a   = (low * ((1 << 23) + (1 << 14) + (1 << 2)));
    crc = (crc >> 8) ^
          (low * ((1 << 24) + (1 << 16) + (1 << 8))) ^
           a ^
          (a >> 1) ^
          (low * ((1 << 20) + (1 << 12)           )) ^
          (low << 19) ^
          (low << 17) ^
          (low >>  2);

    // Hagai's code:
    /*uint32_t t = (s ^ (s << 6)) << 24;
    // some temporaries to optimize XOR
    uint32_t x = (t >> 1) ^ (t >> 2);
    uint32_t y = x ^ (x >> 3);
    uint32_t z = (t >> 12) ^ (t >> 16);
    crc = (crc >> 8) ^
           t ^ (t >> 23) ^
           y ^ (y >>  6) ^
           z ^ (z >> 10);*/
  }

  return ~crc; // same as crc ^ 0xFFFFFFFF
}


/// compute CRC32 (byte algorithm) without lookup tables
uint32_t crc32_1byte_tableless2(const void* data, size_t length, uint32_t previousCrc32)
{
  int32_t crc = ~previousCrc32; // note: signed integer, right shift distributes sign bit into lower bits
  const uint8_t* current = (const uint8_t*) data;

  while (length-- != 0)
  {
    crc = crc ^ *current++;

    uint32_t c = (((crc << 31) >> 31) & ((Polynomial >> 7)  ^ (Polynomial >> 1))) ^
                 (((crc << 30) >> 31) & ((Polynomial >> 6)  ^  Polynomial)) ^
                 (((crc << 29) >> 31) &  (Polynomial >> 5)) ^
                 (((crc << 28) >> 31) &  (Polynomial >> 4)) ^
                 (((crc << 27) >> 31) &  (Polynomial >> 3)) ^
                 (((crc << 26) >> 31) &  (Polynomial >> 2)) ^
                 (((crc << 25) >> 31) &  (Polynomial >> 1)) ^
                 (((crc << 24) >> 31) &   Polynomial);

    crc = ((uint32_t)crc >> 8) ^ c; // convert to unsigned integer before right shift
  }

  return ~crc; // same as crc ^ 0xFFFFFFFF
}


#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_4
/// compute CRC32 (Slicing-by-4 algorithm)
uint32_t crc32_4bytes(const void* data, size_t length, uint32_t previousCrc32)
{
  uint32_t  crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
  const uint32_t* current = (const uint32_t*) data;

  // process four bytes at once (Slicing-by-4)
Loading ...