#pragma once
// //////////////////////////////////////////////////////////
// Crc32.h
// Copyright (c) 2011-2019 Stephan Brumme. All rights reserved.
// Slicing-by-16 contributed by Bulat Ziganshin
// Tableless bytewise CRC contributed by Hagai Gold
// see http://create.stephan-brumme.com/disclaimer.html
//
// if running on an embedded system, you might consider shrinking the
// big Crc32Lookup table by undefining these lines:
#define CRC32_USE_LOOKUP_TABLE_BYTE
#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_4
#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_8
#define CRC32_USE_LOOKUP_TABLE_SLICING_BY_16
// - crc32_bitwise doesn't need it at all
// - crc32_halfbyte has its own small lookup table
// - crc32_1byte_tableless and crc32_1byte_tableless2 don't need it at all
// - crc32_1byte needs only Crc32Lookup[0]
// - crc32_4bytes needs only Crc32Lookup[0..3]
// - crc32_8bytes needs only Crc32Lookup[0..7]
// - crc32_4x8bytes needs only Crc32Lookup[0..7]
// - crc32_16bytes needs all of Crc32Lookup
// using the aforementioned #defines the table is automatically fitted to your needs
// uint8_t, uint32_t, int32_t
#include <stdint.h>
// size_t
#include <cstddef>
// crc32_fast selects the fastest algorithm depending on flags (CRC32_USE_LOOKUP_...)
/// compute CRC32 using the fastest algorithm for large datasets on modern CPUs
uint32_t crc32_fast (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// merge two CRC32 such that result = crc32(dataB, lengthB, crc32(dataA, lengthA))
uint32_t crc32_combine (uint32_t crcA, uint32_t crcB, size_t lengthB);
/// compute CRC32 (bitwise algorithm)
uint32_t crc32_bitwise (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (half-byte algoritm)
uint32_t crc32_halfbyte(const void* data, size_t length, uint32_t previousCrc32 = 0);
#ifdef CRC32_USE_LOOKUP_TABLE_BYTE
/// compute CRC32 (standard algorithm)
uint32_t crc32_1byte (const void* data, size_t length, uint32_t previousCrc32 = 0);
#endif
/// compute CRC32 (byte algorithm) without lookup tables
uint32_t crc32_1byte_tableless (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (byte algorithm) without lookup tables
uint32_t crc32_1byte_tableless2(const void* data, size_t length, uint32_t previousCrc32 = 0);
#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_4
/// compute CRC32 (Slicing-by-4 algorithm)
uint32_t crc32_4bytes (const void* data, size_t length, uint32_t previousCrc32 = 0);
#endif
#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_8
/// compute CRC32 (Slicing-by-8 algorithm)
uint32_t crc32_8bytes (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (Slicing-by-8 algorithm), unroll inner loop 4 times
uint32_t crc32_4x8bytes(const void* data, size_t length, uint32_t previousCrc32 = 0);
#endif
#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_16
/// compute CRC32 (Slicing-by-16 algorithm)
uint32_t crc32_16bytes (const void* data, size_t length, uint32_t previousCrc32 = 0);
/// compute CRC32 (Slicing-by-16 algorithm, prefetch upcoming data blocks)
uint32_t crc32_16bytes_prefetch(const void* data, size_t length, uint32_t previousCrc32 = 0, size_t prefetchAhead = 256);
#endif
// //////////////////////////////////////////////////////////
// Crc32.cpp
// Copyright (c) 2011-2019 Stephan Brumme. All rights reserved.
// Slicing-by-16 contributed by Bulat Ziganshin
// Tableless bytewise CRC contributed by Hagai Gold
// see http://create.stephan-brumme.com/disclaimer.html
//
// if running on an embedded system, you might consider shrinking the
// big Crc32Lookup table:
// - crc32_bitwise doesn't need it at all
// - crc32_halfbyte has its own small lookup table
// - crc32_1byte needs only Crc32Lookup[0]
// - crc32_4bytes needs only Crc32Lookup[0..3]
// - crc32_8bytes needs only Crc32Lookup[0..7]
// - crc32_4x8bytes needs only Crc32Lookup[0..7]
// - crc32_16bytes needs all of Crc32Lookup
#ifndef __LITTLE_ENDIAN
#define __LITTLE_ENDIAN 1234
#endif
#ifndef __BIG_ENDIAN
#define __BIG_ENDIAN 4321
#endif
// define endianess and some integer data types
#if defined(_MSC_VER) || defined(__MINGW32__)
// Windows always little endian
#define __BYTE_ORDER __LITTLE_ENDIAN
// intrinsics / prefetching
#include <xmmintrin.h>
#ifdef __MINGW32__
#define PREFETCH(location) __builtin_prefetch(location)
#else
#define PREFETCH(location) _mm_prefetch(location, _MM_HINT_T0)
#endif
#elif defined(__APPLE__)
#include <TargetConditionals.h>
#if TARGET_IPHONE_SIMULATOR
#define __BYTE_ORDER __LITTLE_ENDIAN
#elif TARGET_OS_IPHONE
#define __BYTE_ORDER __LITTLE_ENDIAN
#elif TARGET_OS_MAC
#include <machine/endian.h>
#if defined(__BIG_ENDIAN__)
#define __BYTE_ORDER __BIG_ENDIAN
#endif
#if defined(__LITTLE_ENDIAN__)
#define __BYTE_ORDER __LITTLE_ENDIAN
#endif
#else
# error "Unknown Apple platform"
#endif
#elif defined(__ARMEB__)
#define __BYTE_ORDER __BIG_ENDIAN
#elif defined(__BYTE_ORDER__)
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define __BYTE_ORDER __BIG_ENDIAN
#else
#define __BYTE_ORDER __LITTLE_ENDIAN
#endif
#else
// defines __BYTE_ORDER as __LITTLE_ENDIAN or __BIG_ENDIAN
#include <sys/param.h>
#endif
// intrinsics / prefetching
#ifdef __GNUC__
#define PREFETCH(location) __builtin_prefetch(location)
#else
// no prefetching
#define PREFETCH(location) ;
#endif
// abort if byte order is undefined
#ifndef __BYTE_ORDER
#error undefined byte order, compile with -D__BYTE_ORDER=1234 (if little endian) or -D__BYTE_ORDER=4321 (big endian)
#endif
namespace
{
/// zlib's CRC32 polynomial
const uint32_t Polynomial = 0xEDB88320;
/// swap endianess
static inline uint32_t swap(uint32_t x)
{
#if defined(__GNUC__) || defined(__clang__)
return __builtin_bswap32(x);
#else
return (x >> 24) |
((x >> 8) & 0x0000FF00) |
((x << 8) & 0x00FF0000) |
(x << 24);
#endif
}
/// Slicing-By-16
#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_16
const size_t MaxSlice = 16;
#elif defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_8)
const size_t MaxSlice = 8;
#elif defined(CRC32_USE_LOOKUP_TABLE_SLICING_BY_4)
const size_t MaxSlice = 4;
#elif defined(CRC32_USE_LOOKUP_TABLE_BYTE)
const size_t MaxSlice = 1;
#else
#define NO_LUT // don't need Crc32Lookup at all
#endif
} // anonymous namespace
#ifndef NO_LUT
/// forward declaration, table is at the end of this file
extern const uint32_t Crc32Lookup[MaxSlice][256]; // extern is needed to keep compiler happy
#endif
/// compute CRC32 (bitwise algorithm)
uint32_t crc32_bitwise(const void* data, size_t length, uint32_t previousCrc32)
{
uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
const uint8_t* current = (const uint8_t*) data;
while (length-- != 0)
{
crc ^= *current++;
for (int j = 0; j < 8; j++)
{
// branch-free
crc = (crc >> 1) ^ (-int32_t(crc & 1) & Polynomial);
// branching, much slower:
//if (crc & 1)
// crc = (crc >> 1) ^ Polynomial;
//else
// crc = crc >> 1;
}
}
return ~crc; // same as crc ^ 0xFFFFFFFF
}
/// compute CRC32 (half-byte algoritm)
uint32_t crc32_halfbyte(const void* data, size_t length, uint32_t previousCrc32)
{
uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
const uint8_t* current = (const uint8_t*) data;
/// look-up table for half-byte, same as crc32Lookup[0][16*i]
static const uint32_t Crc32Lookup16[16] =
{
0x00000000,0x1DB71064,0x3B6E20C8,0x26D930AC,0x76DC4190,0x6B6B51F4,0x4DB26158,0x5005713C,
0xEDB88320,0xF00F9344,0xD6D6A3E8,0xCB61B38C,0x9B64C2B0,0x86D3D2D4,0xA00AE278,0xBDBDF21C
};
while (length-- != 0)
{
crc = Crc32Lookup16[(crc ^ *current ) & 0x0F] ^ (crc >> 4);
crc = Crc32Lookup16[(crc ^ (*current >> 4)) & 0x0F] ^ (crc >> 4);
current++;
}
return ~crc; // same as crc ^ 0xFFFFFFFF
}
#ifdef CRC32_USE_LOOKUP_TABLE_BYTE
/// compute CRC32 (standard algorithm)
uint32_t crc32_1byte(const void* data, size_t length, uint32_t previousCrc32)
{
uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
const uint8_t* current = (const uint8_t*) data;
while (length-- != 0)
crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *current++];
return ~crc; // same as crc ^ 0xFFFFFFFF
}
#endif
/// compute CRC32 (byte algorithm) without lookup tables
uint32_t crc32_1byte_tableless(const void* data, size_t length, uint32_t previousCrc32)
{
uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
const uint8_t* current = (const uint8_t*) data;
while (length-- != 0)
{
uint8_t s = uint8_t(crc) ^ *current++;
// Hagai Gold made me aware of this table-less algorithm and send me code
// polynomial 0xEDB88320 can be written in binary as 11101101101110001000001100100000b
// reverse the bits (or just assume bit 0 is the first one)
// and we have bits set at position 0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 16, 22, 23, 26
// => those are the shift offsets:
//crc = (crc >> 8) ^
// t ^
// (t >> 1) ^ (t >> 2) ^ (t >> 4) ^ (t >> 5) ^ // == y
// (t >> 7) ^ (t >> 8) ^ (t >> 10) ^ (t >> 11) ^ // == y >> 6
// (t >> 12) ^ (t >> 16) ^ // == z
// (t >> 22) ^ (t >> 26) ^ // == z >> 10
// (t >> 23);
// the fastest I can come up with:
uint32_t low = (s ^ (s << 6)) & 0xFF;
uint32_t a = (low * ((1 << 23) + (1 << 14) + (1 << 2)));
crc = (crc >> 8) ^
(low * ((1 << 24) + (1 << 16) + (1 << 8))) ^
a ^
(a >> 1) ^
(low * ((1 << 20) + (1 << 12) )) ^
(low << 19) ^
(low << 17) ^
(low >> 2);
// Hagai's code:
/*uint32_t t = (s ^ (s << 6)) << 24;
// some temporaries to optimize XOR
uint32_t x = (t >> 1) ^ (t >> 2);
uint32_t y = x ^ (x >> 3);
uint32_t z = (t >> 12) ^ (t >> 16);
crc = (crc >> 8) ^
t ^ (t >> 23) ^
y ^ (y >> 6) ^
z ^ (z >> 10);*/
}
return ~crc; // same as crc ^ 0xFFFFFFFF
}
/// compute CRC32 (byte algorithm) without lookup tables
uint32_t crc32_1byte_tableless2(const void* data, size_t length, uint32_t previousCrc32)
{
int32_t crc = ~previousCrc32; // note: signed integer, right shift distributes sign bit into lower bits
const uint8_t* current = (const uint8_t*) data;
while (length-- != 0)
{
crc = crc ^ *current++;
uint32_t c = (((crc << 31) >> 31) & ((Polynomial >> 7) ^ (Polynomial >> 1))) ^
(((crc << 30) >> 31) & ((Polynomial >> 6) ^ Polynomial)) ^
(((crc << 29) >> 31) & (Polynomial >> 5)) ^
(((crc << 28) >> 31) & (Polynomial >> 4)) ^
(((crc << 27) >> 31) & (Polynomial >> 3)) ^
(((crc << 26) >> 31) & (Polynomial >> 2)) ^
(((crc << 25) >> 31) & (Polynomial >> 1)) ^
(((crc << 24) >> 31) & Polynomial);
crc = ((uint32_t)crc >> 8) ^ c; // convert to unsigned integer before right shift
}
return ~crc; // same as crc ^ 0xFFFFFFFF
}
#ifdef CRC32_USE_LOOKUP_TABLE_SLICING_BY_4
/// compute CRC32 (Slicing-by-4 algorithm)
uint32_t crc32_4bytes(const void* data, size_t length, uint32_t previousCrc32)
{
uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
const uint32_t* current = (const uint32_t*) data;
// process four bytes at once (Slicing-by-4)
Loading ...