From 4b2b2d4f83ffeaac7708e44409fe34896a01a278 Mon Sep 17 00:00:00 2001 From: Eugene Kliuchnikov Date: Fri, 12 Apr 2019 13:57:42 +0200 Subject: Update (#749) Update: * Bazel: fix MSVC configuration * C: common: extended documentation and helpers around distance codes * C: common: enable BROTLI_DCHECK in "debug" builds * C: common: fix implicit trailing zero in `kPrefixSuffix` * C: dec: fix possible bit reader discharge for "large-window" mode * C: dec: simplify distance decoding via lookup table * C: dec: reuse decoder state members memory via union with lookup table * C: dec: add decoder state diagram * C: enc: clarify access to static dictionary * C: enc: improve static dictionary hash * C: enc: add "stream offset" parameter for parallel encoding * C: enc: reorganize hasher; now Q2-Q3 require exactly 256KiB to avoid global TCMalloc lock * C: enc: fix rare access to uninitialized data in ring-buffer * C: enc: reorganize logging / checks in `write_bits.h` * Java: dec: add "large-window" support * Java: dec: improve speed * Java: dec: debug and 32-bit mode are now activated via system properties * Java: dec: demystify some state variables (use better names) * Dictionary generator: add single input mode * Java: dec: modernize tests * Bazel: js: pick working commit for closure rules --- c/common/constants.h | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++ c/common/platform.h | 4 +- c/common/transform.c | 4 +- 3 files changed, 124 insertions(+), 4 deletions(-) (limited to 'c/common') diff --git a/c/common/constants.h b/c/common/constants.h index d1b88d1..f6e44dc 100644 --- a/c/common/constants.h +++ b/c/common/constants.h @@ -4,9 +4,17 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ +/** + * @file + * Common constants used in decoder and encoder API. + */ + #ifndef BROTLI_COMMON_CONSTANTS_H_ #define BROTLI_COMMON_CONSTANTS_H_ +#include "./platform.h" +#include + /* Specification: 7.3. Encoding of the context map */ #define BROTLI_CONTEXT_MAP_MAX_RLE 16 @@ -29,12 +37,31 @@ #define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8 /* "Large Window Brotli" */ + +/** + * The theoretical maximum number of distance bits specified for large window + * brotli, for 64-bit encoders and decoders. Even when in practice 32-bit + * encoders and decoders only support up to 30 max distance bits, the value is + * set to 62 because it affects the large window brotli file format. + * Specifically, it affects the encoding of simple huffman tree for distances, + * see Specification RFC 7932 chapter 3.4. + */ #define BROTLI_LARGE_MAX_DISTANCE_BITS 62U #define BROTLI_LARGE_MIN_WBITS 10 +/** + * The maximum supported large brotli window bits by the encoder and decoder. + * Large window brotli allows up to 62 bits, however the current encoder and + * decoder, designed for 32-bit integers, only support up to 30 bits maximum. + */ #define BROTLI_LARGE_MAX_WBITS 30 /* Specification: 4. Encoding of distances */ #define BROTLI_NUM_DISTANCE_SHORT_CODES 16 +/** + * Maximal number of "postfix" bits. + * + * Number of "postfix" bits is stored as 2 bits in meta-block header. + */ #define BROTLI_MAX_NPOSTFIX 3 #define BROTLI_MAX_NDIRECT 120 #define BROTLI_MAX_DISTANCE_BITS 24U @@ -45,7 +72,16 @@ #define BROTLI_NUM_DISTANCE_SYMBOLS \ BROTLI_DISTANCE_ALPHABET_SIZE( \ BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS) + +/* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932 + brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and + NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */ #define BROTLI_MAX_DISTANCE 0x3FFFFFC + +/* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit + allows safe distance calculation without overflows, given the distance + alphabet size is limited to corresponding size + (see kLargeWindowDistanceCodeLimits). */ #define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC /* 7.1. Context modes and context ID lookup for literals */ @@ -61,4 +97,88 @@ #define BROTLI_WINDOW_GAP 16 #define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP) +typedef struct BrotliDistanceCodeLimit { + uint32_t max_alphabet_size; + uint32_t max_distance; +} BrotliDistanceCodeLimit; + +/* This function calculates maximal size of distance alphabet, such that the + distances greater than the given values can not be represented. + + This limits are designed to support fast and safe 32-bit decoders. + "32-bit" means that signed integer values up to ((1 << 31) - 1) could be + safely expressed. + + Brotli distance alphabet symbols do not represent consecutive distance + ranges. Each distance alphabet symbol (excluding direct distances and short + codes), represent interleaved (for NPOSTFIX > 0) range of distances. + A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved + range. Two consecutive groups require the same amount of "extra bits". + + It is important that distance alphabet represents complete "groups". + To avoid complex logic on encoder side about interleaved ranges + it was decided to restrict both sides to complete distance code "groups". + */ +BROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit( + uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) { + BrotliDistanceCodeLimit result; + /* Marking this function as unused, because not all files + including "constants.h" use it -> compiler warns about that. */ + BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit); + if (max_distance <= ndirect) { + /* This case never happens / exists only for the sake of completeness. */ + result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES; + result.max_distance = max_distance; + return result; + } else { + /* The first prohibited value. */ + uint32_t forbidden_distance = max_distance + 1; + /* Subtract "directly" encoded region. */ + uint32_t offset = forbidden_distance - ndirect - 1; + uint32_t ndistbits = 0; + uint32_t tmp; + uint32_t half; + uint32_t group; + /* Postfix for the last dcode in the group. */ + uint32_t postfix = (1u << npostfix) - 1; + uint32_t extra; + uint32_t start; + /* Remove postfix and "head-start". */ + offset = (offset >> npostfix) + 4; + /* Calculate the number of distance bits. */ + tmp = offset / 2; + /* Poor-man's log2floor, to avoid extra dependencies. */ + while (tmp != 0) {ndistbits++; tmp = tmp >> 1;} + /* One bit is covered with subrange addressing ("half"). */ + ndistbits--; + /* Find subrange. */ + half = (offset >> ndistbits) & 1; + /* Calculate the "group" part of dcode. */ + group = ((ndistbits - 1) << 1) | half; + /* Calculated "group" covers the prohibited distance value. */ + if (group == 0) { + /* This case is added for correctness; does not occur for limit > 128. */ + result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES; + result.max_distance = ndirect; + return result; + } + /* Decrement "group", so it is the last permitted "group". */ + group--; + /* After group was decremented, ndistbits and half must be recalculated. */ + ndistbits = (group >> 1) + 1; + /* The last available distance in the subrange has all extra bits set. */ + extra = (1u << ndistbits) - 1; + /* Calculate region start. NB: ndistbits >= 1. */ + start = (1u << (ndistbits + 1)) - 4; + /* Move to subregion. */ + start += (group & 1) << ndistbits; + /* Calculate the alphabet size. */ + result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect + + BROTLI_NUM_DISTANCE_SHORT_CODES + 1; + /* Calculate the maximal distance representable by alphabet. */ + result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1; + return result; + } +} + #endif /* BROTLI_COMMON_CONSTANTS_H_ */ diff --git a/c/common/platform.h b/c/common/platform.h index 84c448c..bf5f97b 100755 --- a/c/common/platform.h +++ b/c/common/platform.h @@ -466,20 +466,20 @@ static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) { #endif #if defined(BROTLI_ENABLE_LOG) -#define BROTLI_DCHECK(x) assert(x) #define BROTLI_LOG(x) printf x #else -#define BROTLI_DCHECK(x) #define BROTLI_LOG(x) #endif #if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG) +#define BROTLI_DCHECK(x) assert(x) static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) { fprintf(stderr, "%s:%d (%s)\n", f, l, fn); fflush(stderr); } #define BROTLI_DUMP() BrotliDump(__FILE__, __LINE__, __FUNCTION__) #else +#define BROTLI_DCHECK(x) #define BROTLI_DUMP() (void)(0) #endif diff --git a/c/common/transform.c b/c/common/transform.c index c182053..c44f671 100755 --- a/c/common/transform.c +++ b/c/common/transform.c @@ -24,8 +24,8 @@ static const char kPrefixSuffix[217] = /* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */ " not \3er \3al \4ful \4ive \5less \4es" /* Ax _5 _9 _D _2 _7 _D */ - "t \4ize \2\xc2\xa0\4ous \5 the \2e \0"; -/* Cx _2 _7___ ___ _A _F _5 _8 */ + "t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */ +/* Cx _2 _7___ ___ _A _F _5 _8 */ static const uint16_t kPrefixSuffixMap[50] = { 0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25, -- cgit v1.1