From 35e69fc7cf9421ab04ffc9d52cb36d07fa12984a Mon Sep 17 00:00:00 2001 From: Eugene Kliuchnikov Date: Mon, 26 Feb 2018 09:04:36 -0500 Subject: New feature: "Large Window Brotli" (#640) * New feature: "Large Window Brotli" By setting special encoder/decoder flag it is now possible to extend LZ-window up to 30 bits; though produced stream will not be RFC7932 compliant. Added new dictionary generator - "DSH". It combines speed of "Sieve" and quality of "DM". Plus utilities to prepare train corpora (remove unique strings). Improved compression ratio: now two sub-blocks could be stitched: the last copy command could be extended to span the next sub-block. Fixed compression ineffectiveness caused by floating numbers rounding and wrong cost heuristic. Other C changes: - combined / moved `context.h` to `common` - moved transforms to `common` - unified some aspects of code formatting - added an abstraction for encoder (static) dictionary - moved default allocator/deallocator functions to `common` brotli CLI: - window size is auto-adjusted if not specified explicitly Java: - added "eager" decoding both to JNI wrapper and pure decoder - huge speed-up of `DictionaryData` initialization * Add dictionaryless compressed dictionary * Fix `sources.lst` * Fix `sources.lst` and add a note that `libtool` is also required. * Update setup.py * Fix `EagerStreamTest` * Fix BUILD file * Add missing `libdivsufsort` dependency * Fix "unused parameter" warning. --- .gitmodules | 3 + WORKSPACE | 6 + bootstrap | 2 + c/common/constants.h | 19 +- c/common/context.h | 261 ++++++++ c/common/dictionary.bin.br | Bin 0 -> 51687 bytes c/common/platform.h | 22 +- c/common/transform.c | 236 +++++++ c/common/transform.h | 80 +++ c/dec/bit_reader.h | 33 +- c/dec/context.h | 251 -------- c/dec/decode.c | 455 ++++++++----- c/dec/huffman.c | 22 +- c/dec/huffman.h | 20 +- c/dec/prefix.h | 7 +- c/dec/state.c | 37 +- c/dec/state.h | 47 +- c/dec/transform.h | 300 --------- c/enc/backward_references.c | 21 +- c/enc/backward_references.h | 1 - c/enc/backward_references_hq.c | 91 ++- c/enc/backward_references_hq.h | 16 +- c/enc/backward_references_inc.h | 13 +- c/enc/bit_cost.h | 8 +- c/enc/block_encoder_inc.h | 13 +- c/enc/block_splitter.c | 2 +- c/enc/block_splitter_inc.h | 2 +- c/enc/brotli_bit_stream.c | 187 +++--- c/enc/brotli_bit_stream.h | 50 +- c/enc/command.h | 30 +- c/enc/compress_fragment.c | 4 +- c/enc/compress_fragment_two_pass.c | 2 +- c/enc/context.h | 184 ------ c/enc/encode.c | 338 ++++++---- c/enc/encoder_dict.c | 32 + c/enc/encoder_dict.h | 42 ++ c/enc/entropy_encode.c | 22 +- c/enc/entropy_encode.h | 8 +- c/enc/entropy_encode_static.h | 4 +- c/enc/hash.h | 35 +- c/enc/hash_forgetful_chain_inc.h | 15 +- c/enc/hash_longest_match64_inc.h | 16 +- c/enc/hash_longest_match_inc.h | 14 +- c/enc/hash_longest_match_quickly_inc.h | 19 +- c/enc/hash_to_binary_tree_inc.h | 10 +- c/enc/histogram.c | 15 +- c/enc/histogram.h | 7 +- c/enc/histogram_inc.h | 2 +- c/enc/literal_cost.c | 8 +- c/enc/literal_cost.h | 2 +- c/enc/memory.c | 14 +- c/enc/metablock.c | 24 +- c/enc/metablock.h | 7 +- c/enc/params.h | 11 + c/enc/prefix.h | 5 +- c/enc/quality.h | 11 +- c/enc/ringbuffer.h | 24 +- c/enc/static_dict.c | 80 +-- c/enc/static_dict.h | 5 +- c/enc/static_dict_lut.h | 2 +- c/enc/utf8_util.c | 40 +- c/enc/write_bits.h | 18 +- c/include/brotli/decode.h | 15 +- c/include/brotli/encode.h | 11 +- c/tools/brotli.c | 189 ++++-- docs/brotli.1 | 2 +- docs/decode.h.3 | 5 +- docs/encode.h.3 | 9 +- docs/types.h.3 | 2 +- java/org/brotli/dec/BUILD | 6 + java/org/brotli/dec/BrotliInputStream.java | 12 +- java/org/brotli/dec/Decode.java | 84 +-- java/org/brotli/dec/DictionaryData.java | 29 +- java/org/brotli/dec/EagerStreamTest.java | 386 +++++++++++ java/org/brotli/dec/State.java | 7 +- java/org/brotli/wrapper/dec/BrotliInputStream.java | 4 + java/org/brotli/wrapper/dec/Decoder.java | 10 + java/org/brotli/wrapper/dec/DecoderJNI.java | 26 +- java/org/brotli/wrapper/dec/EagerStreamTest.java | 75 +++ java/org/brotli/wrapper/dec/decoder_jni.cc | 31 +- research/BUILD | 8 + research/BUILD.libdivsufsort | 55 ++ research/deorummolae.cc | 173 ++--- research/deorummolae.h | 9 +- research/dictionary_generator.cc | 119 +++- research/draw_diff.cc | 21 +- research/durchschlag.cc | 714 +++++++++++++++++++++ research/durchschlag.h | 99 +++ research/libdivsufsort | 1 + research/sieve.cc | 174 +++-- research/sieve.h | 5 +- scripts/sources.lst | 17 +- setup.py | 11 +- 93 files changed, 3692 insertions(+), 1872 deletions(-) create mode 100755 c/common/context.h create mode 100755 c/common/dictionary.bin.br create mode 100755 c/common/transform.c create mode 100755 c/common/transform.h delete mode 100644 c/dec/context.h delete mode 100644 c/dec/transform.h delete mode 100644 c/enc/context.h create mode 100755 c/enc/encoder_dict.c create mode 100755 c/enc/encoder_dict.h create mode 100755 java/org/brotli/dec/EagerStreamTest.java create mode 100755 java/org/brotli/wrapper/dec/EagerStreamTest.java create mode 100644 research/BUILD.libdivsufsort create mode 100755 research/durchschlag.cc create mode 100755 research/durchschlag.h create mode 160000 research/libdivsufsort diff --git a/.gitmodules b/.gitmodules index af7df38..3ec8760 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "research/esaxx"] path = research/esaxx url = https://github.com/hillbig/esaxx +[submodule "research/libdivsufsort"] + path = research/libdivsufsort + url = https://github.com/y-256/libdivsufsort.git diff --git a/WORKSPACE b/WORKSPACE index b239745..59c1c4f 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -82,6 +82,12 @@ filegroup( )""", ) +new_local_repository( + name = "divsufsort", + build_file = "//research:BUILD.libdivsufsort", + path = "research/libdivsufsort", +) + load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories") closure_repositories() diff --git a/bootstrap b/bootstrap index dbaea15..64aca2c 100755 --- a/bootstrap +++ b/bootstrap @@ -7,6 +7,8 @@ sed --version >/dev/null 2>&1 || { echo >&2 "'sed' $REQUIRED"; exit 1; } fi autoreconf --version >/dev/null 2>&1 || { echo >&2 "'autoconf' $REQUIRED"; exit 1; } +# If libtool is not installed -> "error: Libtool library used but 'LIBTOOL' is undefined" + mkdir m4 2>/dev/null BROTLI_ABI_HEX=`sed -n 's/#define BROTLI_ABI_VERSION 0x//p' c/common/version.h` diff --git a/c/common/constants.h b/c/common/constants.h index 416ec55..26edcd5 100644 --- a/c/common/constants.h +++ b/c/common/constants.h @@ -28,18 +28,25 @@ /* "code length of 8 is repeated" */ #define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8 +/* "Large Window Brotli" */ +#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U +#define BROTLI_LARGE_MIN_WBITS 10 +#define BROTLI_LARGE_MAX_WBITS 30 + /* Specification: 4. Encoding of distances */ #define BROTLI_NUM_DISTANCE_SHORT_CODES 16 #define BROTLI_MAX_NPOSTFIX 3 #define BROTLI_MAX_NDIRECT 120 #define BROTLI_MAX_DISTANCE_BITS 24U -/* BROTLI_NUM_DISTANCE_SYMBOLS == 520 */ -#define BROTLI_NUM_DISTANCE_SYMBOLS (BROTLI_NUM_DISTANCE_SHORT_CODES + \ - BROTLI_MAX_NDIRECT + \ - (BROTLI_MAX_DISTANCE_BITS << \ - (BROTLI_MAX_NPOSTFIX + 1))) -/* Distance that is guaranteed to be representable in any stream. */ +#define BROTLI_DISTANCE_ALPHABET_SIZE(NDIRECT, NPOSTFIX, MAXNBITS) ( \ + BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) + \ + ((MAXNBITS) << ((NPOSTFIX) + 1))) +/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */ +#define BROTLI_NUM_DISTANCE_SYMBOLS \ + BROTLI_DISTANCE_ALPHABET_SIZE( \ + BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS) #define BROTLI_MAX_DISTANCE 0x3FFFFFC +#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC /* 7.1. Context modes and context ID lookup for literals */ /* "context IDs for literals are in the range of 0..63" */ diff --git a/c/common/context.h b/c/common/context.h new file mode 100755 index 0000000..24b3eb4 --- /dev/null +++ b/c/common/context.h @@ -0,0 +1,261 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* Lookup table to map the previous two bytes to a context id. + + There are four different context modeling modes defined here: + CONTEXT_LSB6: context id is the least significant 6 bits of the last byte, + CONTEXT_MSB6: context id is the most significant 6 bits of the last byte, + CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text, + CONTEXT_SIGNED: second-order context model tuned for signed integers. + + If |p1| and |p2| are the previous two bytes, and |mode| is current context + mode, we calculate the context as: + + context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256]. + + For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters + (i.e. < 128), this will be equivalent to + + context = 4 * context1(p1) + context2(p2), + + where context1 is based on the previous byte in the following way: + + 0 : non-ASCII control + 1 : \t, \n, \r + 2 : space + 3 : other punctuation + 4 : " ' + 5 : % + 6 : ( < [ { + 7 : ) > ] } + 8 : , ; : + 9 : . + 10 : = + 11 : number + 12 : upper-case vowel + 13 : upper-case consonant + 14 : lower-case vowel + 15 : lower-case consonant + + and context2 is based on the second last byte: + + 0 : control, space + 1 : punctuation + 2 : upper-case letter, number + 3 : lower-case letter + + If the last byte is ASCII, and the second last byte is not (in a valid UTF8 + stream it will be a continuation byte, value between 128 and 191), the + context is the same as if the second last byte was an ASCII control or space. + + If the last byte is a UTF8 lead byte (value >= 192), then the next byte will + be a continuation byte and the context id is 2 or 3 depending on the LSB of + the last byte and to a lesser extent on the second last byte if it is ASCII. + + If the last byte is a UTF8 continuation byte, the second last byte can be: + - continuation byte: the next byte is probably ASCII or lead byte (assuming + 4-byte UTF8 characters are rare) and the context id is 0 or 1. + - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 + - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 + + The possible value combinations of the previous two bytes, the range of + context ids and the type of the next byte is summarized in the table below: + + |--------\-----------------------------------------------------------------| + | \ Last byte | + | Second \---------------------------------------------------------------| + | last byte \ ASCII | cont. byte | lead byte | + | \ (0-127) | (128-191) | (192-) | + |=============|===================|=====================|==================| + | ASCII | next: ASCII/lead | not valid | next: cont. | + | (0-127) | context: 4 - 63 | | context: 2 - 3 | + |-------------|-------------------|---------------------|------------------| + | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | + | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | + |-------------|-------------------|---------------------|------------------| + | lead byte | not valid | next: ASCII/lead | not valid | + | (192-207) | | context: 0 - 1 | | + |-------------|-------------------|---------------------|------------------| + | lead byte | not valid | next: cont. | not valid | + | (208-) | | context: 2 - 3 | | + |-------------|-------------------|---------------------|------------------| +*/ + +#ifndef BROTLI_COMMON_CONTEXT_H_ +#define BROTLI_COMMON_CONTEXT_H_ + +#include + +typedef enum ContextType { + CONTEXT_LSB6 = 0, + CONTEXT_MSB6 = 1, + CONTEXT_UTF8 = 2, + CONTEXT_SIGNED = 3 +} ContextType; + +/* Common context lookup table for all context modes. */ +static const uint8_t kContextLookup[2048] = { + /* CONTEXT_LSB6, last byte. */ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + + /* CONTEXT_LSB6, second last byte, */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + /* CONTEXT_MSB6, last byte. */ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, + 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, + 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, + 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, + 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, + 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, + 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, + 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, + 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, + 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, + 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, + 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, + 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, + + /* CONTEXT_MSB6, second last byte, */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + /* CONTEXT_UTF8, last byte. */ + /* ASCII range. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, + 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, + 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, + 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, + 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, + /* UTF8 continuation byte range. */ + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + /* UTF8 lead byte range. */ + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + + /* CONTEXT_UTF8 second last byte. */ + /* ASCII range. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, + /* UTF8 continuation byte range. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* UTF8 lead byte range. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + + /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */ + 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56, + + /* CONTEXT_SIGNED, second last byte. */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, +}; + +typedef const uint8_t* ContextLut; + +/* typeof(MODE) == ContextType; returns ContextLut */ +#define BROTLI_CONTEXT_LUT(MODE) (&kContextLookup[(MODE) << 9]) + +/* typeof(LUT) == ContextLut */ +#define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2]) + +#endif /* BROTLI_COMMON_CONTEXT_H_ */ diff --git a/c/common/dictionary.bin.br b/c/common/dictionary.bin.br new file mode 100755 index 0000000..6a55d42 Binary files /dev/null and b/c/common/dictionary.bin.br differ diff --git a/c/common/platform.h b/c/common/platform.h index 804fd25..d6fd3ee 100755 --- a/c/common/platform.h +++ b/c/common/platform.h @@ -10,6 +10,7 @@ #define BROTLI_COMMON_PLATFORM_H_ #include /* memcpy */ +#include /* malloc, free */ #include #include @@ -204,7 +205,7 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { memcpy(p, &v, sizeof v); } -#else /* BROTLI_ALIGNED_READ */ +#else /* BROTLI_ALIGNED_READ */ /* Unaligned memory access is allowed: just cast pointer to requested type. */ static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) { return *(const uint16_t*)p; @@ -218,7 +219,7 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { *(uint64_t*)p = v; } -#endif /* BROTLI_ALIGNED_READ */ +#endif /* BROTLI_ALIGNED_READ */ #if BROTLI_LITTLE_ENDIAN /* Straight endianness. Just read / write values. */ @@ -390,6 +391,18 @@ BROTLI_MIN_MAX(size_t) BROTLI_MIN_MAX(uint32_t) BROTLI_MIN_MAX(uint8_t) (A)[(J)] = __brotli_swap_tmp; \ } +/* Default brotli_alloc_func */ +static void* BrotliDefaultAllocFunc(void* opaque, size_t size) { + BROTLI_UNUSED(opaque); + return malloc(size); +} + +/* Default brotli_free_func */ +static void BrotliDefaultFreeFunc(void* opaque, void* address) { + BROTLI_UNUSED(opaque); + free(address); +} + BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) { BROTLI_UNUSED(BrotliSuppressUnusedFunctions); BROTLI_UNUSED(BrotliUnalignedRead16); @@ -413,6 +426,11 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) { BROTLI_UNUSED(brotli_max_uint32_t); BROTLI_UNUSED(brotli_min_uint8_t); BROTLI_UNUSED(brotli_max_uint8_t); + BROTLI_UNUSED(BrotliDefaultAllocFunc); + BROTLI_UNUSED(BrotliDefaultFreeFunc); +#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG) + BROTLI_UNUSED(BrotliDump); +#endif } #endif /* BROTLI_COMMON_PLATFORM_H_ */ diff --git a/c/common/transform.c b/c/common/transform.c new file mode 100755 index 0000000..53fe4f6 --- /dev/null +++ b/c/common/transform.c @@ -0,0 +1,236 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#include "./platform.h" +#include "./transform.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/* RFC 7932 transforms string data */ +static const char kPrefixSuffix[217] = + "\1 \2, \10 of the \4 of \2s \1.\5 and \4 " +/* 0x _0 _2 __5 _E _3 _6 _8 _E */ + "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 " +/* 2x _3_ _5 _A_ _D_ _F _2 _4 _A _E */ + "that \1\'\6 with \6 from \4 by \1(\6. T" +/* 4x _5_ _7 _E _5 _A _C */ + "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed " +/* 6x _3 _8 _D _2 _7_ _ _A _C */ + "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5" +/* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */ + " not \3er \3al \4ful \4ive \5less \4es" +/* Ax _5 _9 _D _2 _7 _D */ + "t \4ize \2\xc2\xa0\4ous \5 the \2e \0"; +/* Cx _2 _7___ ___ _A _F _5 _8 */ + +static const uint16_t kPrefixSuffixMap[50] = { + 0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25, + 0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E, + 0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C, + 0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9, + 0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8 +}; + +/* RFC 7932 transforms */ +static const uint8_t kTransformsData[] = { + 49, BROTLI_TRANSFORM_IDENTITY, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 0, + 0, BROTLI_TRANSFORM_IDENTITY, 0, + 49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 47, + 0, BROTLI_TRANSFORM_IDENTITY, 49, + 4, BROTLI_TRANSFORM_IDENTITY, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 3, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 6, + 49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_1, 49, + 1, BROTLI_TRANSFORM_IDENTITY, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 1, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 7, + 49, BROTLI_TRANSFORM_IDENTITY, 9, + 48, BROTLI_TRANSFORM_IDENTITY, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 8, + 49, BROTLI_TRANSFORM_IDENTITY, 5, + 49, BROTLI_TRANSFORM_IDENTITY, 10, + 49, BROTLI_TRANSFORM_IDENTITY, 11, + 49, BROTLI_TRANSFORM_OMIT_LAST_3, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 13, + 49, BROTLI_TRANSFORM_IDENTITY, 14, + 49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_2, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 15, + 49, BROTLI_TRANSFORM_IDENTITY, 16, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 12, + 5, BROTLI_TRANSFORM_IDENTITY, 49, + 0, BROTLI_TRANSFORM_IDENTITY, 1, + 49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 18, + 49, BROTLI_TRANSFORM_IDENTITY, 17, + 49, BROTLI_TRANSFORM_IDENTITY, 19, + 49, BROTLI_TRANSFORM_IDENTITY, 20, + 49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49, + 49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49, + 47, BROTLI_TRANSFORM_IDENTITY, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_4, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 22, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 23, + 49, BROTLI_TRANSFORM_IDENTITY, 24, + 49, BROTLI_TRANSFORM_IDENTITY, 25, + 49, BROTLI_TRANSFORM_OMIT_LAST_7, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_1, 26, + 49, BROTLI_TRANSFORM_IDENTITY, 27, + 49, BROTLI_TRANSFORM_IDENTITY, 28, + 0, BROTLI_TRANSFORM_IDENTITY, 12, + 49, BROTLI_TRANSFORM_IDENTITY, 29, + 49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49, + 49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_6, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 21, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1, + 49, BROTLI_TRANSFORM_OMIT_LAST_8, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 31, + 49, BROTLI_TRANSFORM_IDENTITY, 32, + 47, BROTLI_TRANSFORM_IDENTITY, 3, + 49, BROTLI_TRANSFORM_OMIT_LAST_5, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_9, 49, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8, + 5, BROTLI_TRANSFORM_IDENTITY, 21, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10, + 49, BROTLI_TRANSFORM_IDENTITY, 30, + 0, BROTLI_TRANSFORM_IDENTITY, 5, + 35, BROTLI_TRANSFORM_IDENTITY, 49, + 47, BROTLI_TRANSFORM_IDENTITY, 2, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17, + 49, BROTLI_TRANSFORM_IDENTITY, 36, + 49, BROTLI_TRANSFORM_IDENTITY, 33, + 5, BROTLI_TRANSFORM_IDENTITY, 0, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5, + 49, BROTLI_TRANSFORM_IDENTITY, 37, + 0, BROTLI_TRANSFORM_IDENTITY, 30, + 49, BROTLI_TRANSFORM_IDENTITY, 38, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 39, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 34, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12, + 0, BROTLI_TRANSFORM_IDENTITY, 21, + 49, BROTLI_TRANSFORM_IDENTITY, 40, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12, + 49, BROTLI_TRANSFORM_IDENTITY, 41, + 49, BROTLI_TRANSFORM_IDENTITY, 42, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17, + 49, BROTLI_TRANSFORM_IDENTITY, 43, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10, + 0, BROTLI_TRANSFORM_IDENTITY, 34, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33, + 49, BROTLI_TRANSFORM_IDENTITY, 44, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5, + 45, BROTLI_TRANSFORM_IDENTITY, 49, + 0, BROTLI_TRANSFORM_IDENTITY, 33, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30, + 49, BROTLI_TRANSFORM_IDENTITY, 46, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34, +}; + +static BrotliTransforms kBrotliTransforms = { + sizeof(kPrefixSuffix), + (const uint8_t*)kPrefixSuffix, + kPrefixSuffixMap, + sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])), + kTransformsData, + {0, 12, 27, 23, 42, 63, 56, 48, 59, 64} +}; + +const BrotliTransforms* BrotliGetTransforms(void) { + return &kBrotliTransforms; +} + +static int ToUpperCase(uint8_t* p) { + if (p[0] < 0xC0) { + if (p[0] >= 'a' && p[0] <= 'z') { + p[0] ^= 32; + } + return 1; + } + /* An overly simplified uppercasing model for UTF-8. */ + if (p[0] < 0xE0) { + p[1] ^= 32; + return 2; + } + /* An arbitrary transform for three byte characters. */ + p[2] ^= 5; + return 3; +} + +int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len, + const BrotliTransforms* BROTLI_RESTRICT transforms, int transfom_idx) { + int idx = 0; + const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transfom_idx); + uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transfom_idx); + const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transfom_idx); + { + int prefix_len = *prefix++; + while (prefix_len--) { dst[idx++] = *prefix++; } + } + { + const int t = type; + int i = 0; + if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) { + len -= t; + } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1 + && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) { + int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1); + word += skip; + len -= skip; + } + while (i < len) { dst[idx++] = word[i++]; } + if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) { + ToUpperCase(&dst[idx - len]); + } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) { + uint8_t* uppercase = &dst[idx - len]; + while (len > 0) { + int step = ToUpperCase(uppercase); + uppercase += step; + len -= step; + } + } + } + { + int suffix_len = *suffix++; + while (suffix_len--) { dst[idx++] = *suffix++; } + return idx; + } +} + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif diff --git a/c/common/transform.h b/c/common/transform.h new file mode 100755 index 0000000..b279c04 --- /dev/null +++ b/c/common/transform.h @@ -0,0 +1,80 @@ +/* transforms is a part of ABI, nut not API. + + It means that there are some functions that are supposed to be in "common" + library, but header itself is not placed into include/brotli. This way, + aforementioned functions will be available only to brotli internals. + */ + +#ifndef BROTLI_COMMON_TRANSFORM_H_ +#define BROTLI_COMMON_TRANSFORM_H_ + +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +enum BrotliWordTransformType { + BROTLI_TRANSFORM_IDENTITY = 0, + BROTLI_TRANSFORM_OMIT_LAST_1 = 1, + BROTLI_TRANSFORM_OMIT_LAST_2 = 2, + BROTLI_TRANSFORM_OMIT_LAST_3 = 3, + BROTLI_TRANSFORM_OMIT_LAST_4 = 4, + BROTLI_TRANSFORM_OMIT_LAST_5 = 5, + BROTLI_TRANSFORM_OMIT_LAST_6 = 6, + BROTLI_TRANSFORM_OMIT_LAST_7 = 7, + BROTLI_TRANSFORM_OMIT_LAST_8 = 8, + BROTLI_TRANSFORM_OMIT_LAST_9 = 9, + BROTLI_TRANSFORM_UPPERCASE_FIRST = 10, + BROTLI_TRANSFORM_UPPERCASE_ALL = 11, + BROTLI_TRANSFORM_OMIT_FIRST_1 = 12, + BROTLI_TRANSFORM_OMIT_FIRST_2 = 13, + BROTLI_TRANSFORM_OMIT_FIRST_3 = 14, + BROTLI_TRANSFORM_OMIT_FIRST_4 = 15, + BROTLI_TRANSFORM_OMIT_FIRST_5 = 16, + BROTLI_TRANSFORM_OMIT_FIRST_6 = 17, + BROTLI_TRANSFORM_OMIT_FIRST_7 = 18, + BROTLI_TRANSFORM_OMIT_FIRST_8 = 19, + BROTLI_TRANSFORM_OMIT_FIRST_9 = 20, + BROTLI_NUM_TRANSFORM_TYPES /* Counts transforms, not a transform itself. */ +}; + +#define BROTLI_TRANSFORMS_MAX_CUT_OFF BROTLI_TRANSFORM_OMIT_LAST_9 + +typedef struct BrotliTransforms { + uint16_t prefix_suffix_size; + /* Last character must be null, so prefix_suffix_size must be at least 1. */ + const uint8_t* prefix_suffix; + const uint16_t* prefix_suffix_map; + uint32_t num_transforms; + /* Each entry is a [prefix_id, transform, suffix_id] triplet. */ + const uint8_t* transforms; + /* Indices of transforms like ["", BROTLI_TRANSFORM_OMIT_LAST_#, ""]. + 0-th element corresponds to ["", BROTLI_TRANSFORM_IDENTITY, ""]. + -1, if cut-off transform does not exist. */ + int16_t cutOffTransforms[BROTLI_TRANSFORMS_MAX_CUT_OFF + 1]; +} BrotliTransforms; + +/* T is BrotliTransforms*; result is uint8_t. */ +#define BROTLI_TRANSFORM_PREFIX_ID(T, I) ((T)->transforms[((I) * 3) + 0]) +#define BROTLI_TRANSFORM_TYPE(T, I) ((T)->transforms[((I) * 3) + 1]) +#define BROTLI_TRANSFORM_SUFFIX_ID(T, I) ((T)->transforms[((I) * 3) + 2]) + +/* T is BrotliTransforms*; result is const uint8_t*. */ +#define BROTLI_TRANSFORM_PREFIX(T, I) (&(T)->prefix_suffix[ \ + (T)->prefix_suffix_map[BROTLI_TRANSFORM_PREFIX_ID(T, I)]]) +#define BROTLI_TRANSFORM_SUFFIX(T, I) (&(T)->prefix_suffix[ \ + (T)->prefix_suffix_map[BROTLI_TRANSFORM_SUFFIX_ID(T, I)]]) + +BROTLI_COMMON_API const BrotliTransforms* BrotliGetTransforms(void); + +BROTLI_COMMON_API int BrotliTransformDictionaryWord( + uint8_t* dst, const uint8_t* word, int len, + const BrotliTransforms* transforms, int transform_idx); + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif + +#endif /* BROTLI_COMMON_TRANSFORM_H_ */ diff --git a/c/dec/bit_reader.h b/c/dec/bit_reader.h index 21c59d7..39e4873 100644 --- a/c/dec/bit_reader.h +++ b/c/dec/bit_reader.h @@ -20,7 +20,7 @@ extern "C" { #define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1) -static const uint32_t kBitMask[33] = { 0x0000, +static const uint32_t kBitMask[33] = { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, @@ -35,7 +35,7 @@ static BROTLI_INLINE uint32_t BitMask(uint32_t n) { if (BROTLI_IS_CONSTANT(n) || BROTLI_HAS_UBFX) { /* Masking with this expression turns to a single "Unsigned Bit Field Extract" UBFX instruction on ARM. */ - return ~((0xffffffffU) << n); + return ~((0xFFFFFFFFu) << n); } else { return kBitMask[n]; } @@ -58,8 +58,9 @@ typedef struct { /* Initializes the BrotliBitReader fields. */ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br); -/* Ensures that accumulator is not empty. May consume one byte of input. - Returns 0 if data is required but there is no input available. +/* Ensures that accumulator is not empty. + May consume up to sizeof(brotli_reg_t) - 1 bytes of input. + Returns BROTLI_FALSE if data is required but there is no input available. For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned reading. */ BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br); @@ -98,9 +99,9 @@ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount( return TO_BROTLI_BOOL(br->avail_in >= num); } -/* Guarantees that there are at least n_bits + 1 bits in accumulator. +/* Guarantees that there are at least |n_bits| + 1 bits in accumulator. Precondition: accumulator contains at least 1 bit. - n_bits should be in the range [1..24] for regular build. For portable + |n_bits| should be in the range [1..24] for regular build. For portable non-64-bit little-endian build only 16 bits are safe to request. */ static BROTLI_INLINE void BrotliFillBitWindow( BrotliBitReader* const br, uint32_t n_bits) { @@ -158,7 +159,8 @@ static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) { BrotliFillBitWindow(br, 17); } -/* Pulls one byte of input to accumulator. */ +/* Tries to pull one byte of input to accumulator. + Returns BROTLI_FALSE if there is no input available. */ static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) { if (br->avail_in == 0) { return BROTLI_FALSE; @@ -190,15 +192,16 @@ static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked( return (uint32_t)BrotliGetBitsUnmasked(br); } -/* Returns the specified number of bits from |br| without advancing bit pos. */ +/* Returns the specified number of bits from |br| without advancing bit + position. */ static BROTLI_INLINE uint32_t BrotliGetBits( BrotliBitReader* const br, uint32_t n_bits) { BrotliFillBitWindow(br, n_bits); return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits); } -/* Tries to peek the specified amount of bits. Returns 0, if there is not - enough input. */ +/* Tries to peek the specified amount of bits. Returns BROTLI_FALSE, if there + is not enough input. */ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { while (BrotliGetAvailableBits(br) < n_bits) { @@ -210,7 +213,7 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits( return BROTLI_TRUE; } -/* Advances the bit pos by n_bits. */ +/* Advances the bit pos by |n_bits|. */ static BROTLI_INLINE void BrotliDropBits( BrotliBitReader* const br, uint32_t n_bits) { br->bit_pos_ += n_bits; @@ -230,7 +233,7 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) { } /* Reads the specified number of bits from |br| and advances the bit pos. - Precondition: accumulator MUST contain at least n_bits. */ + Precondition: accumulator MUST contain at least |n_bits|. */ static BROTLI_INLINE void BrotliTakeBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits); @@ -259,8 +262,8 @@ static BROTLI_INLINE uint32_t BrotliReadBits( } } -/* Tries to read the specified amount of bits. Returns 0, if there is not - enough input. n_bits MUST be positive. */ +/* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there + is not enough input. |n_bits| MUST be positive. */ static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { while (BrotliGetAvailableBits(br) < n_bits) { @@ -284,7 +287,7 @@ static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) { } /* Copies remaining input bytes stored in the bit reader to the output. Value - num may not be larger than BrotliGetRemainingBytes. The bit reader must be + |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be warmed up again after this. */ static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest, BrotliBitReader* br, size_t num) { diff --git a/c/dec/context.h b/c/dec/context.h deleted file mode 100644 index 9402cbe..0000000 --- a/c/dec/context.h +++ /dev/null @@ -1,251 +0,0 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Lookup table to map the previous two bytes to a context id. - - There are four different context modeling modes defined here: - CONTEXT_LSB6: context id is the least significant 6 bits of the last byte, - CONTEXT_MSB6: context id is the most significant 6 bits of the last byte, - CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text, - CONTEXT_SIGNED: second-order context model tuned for signed integers. - - The context id for the UTF8 context model is calculated as follows. If p1 - and p2 are the previous two bytes, we calculate the context as - - context = kContextLookup[p1] | kContextLookup[p2 + 256]. - - If the previous two bytes are ASCII characters (i.e. < 128), this will be - equivalent to - - context = 4 * context1(p1) + context2(p2), - - where context1 is based on the previous byte in the following way: - - 0 : non-ASCII control - 1 : \t, \n, \r - 2 : space - 3 : other punctuation - 4 : " ' - 5 : % - 6 : ( < [ { - 7 : ) > ] } - 8 : , ; : - 9 : . - 10 : = - 11 : number - 12 : upper-case vowel - 13 : upper-case consonant - 14 : lower-case vowel - 15 : lower-case consonant - - and context2 is based on the second last byte: - - 0 : control, space - 1 : punctuation - 2 : upper-case letter, number - 3 : lower-case letter - - If the last byte is ASCII, and the second last byte is not (in a valid UTF8 - stream it will be a continuation byte, value between 128 and 191), the - context is the same as if the second last byte was an ASCII control or space. - - If the last byte is a UTF8 lead byte (value >= 192), then the next byte will - be a continuation byte and the context id is 2 or 3 depending on the LSB of - the last byte and to a lesser extent on the second last byte if it is ASCII. - - If the last byte is a UTF8 continuation byte, the second last byte can be: - - continuation byte: the next byte is probably ASCII or lead byte (assuming - 4-byte UTF8 characters are rare) and the context id is 0 or 1. - - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 - - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 - - The possible value combinations of the previous two bytes, the range of - context ids and the type of the next byte is summarized in the table below: - - |--------\-----------------------------------------------------------------| - | \ Last byte | - | Second \---------------------------------------------------------------| - | last byte \ ASCII | cont. byte | lead byte | - | \ (0-127) | (128-191) | (192-) | - |=============|===================|=====================|==================| - | ASCII | next: ASCII/lead | not valid | next: cont. | - | (0-127) | context: 4 - 63 | | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | - | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: ASCII/lead | not valid | - | (192-207) | | context: 0 - 1 | | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: cont. | not valid | - | (208-) | | context: 2 - 3 | | - |-------------|-------------------|---------------------|------------------| - - The context id for the signed context mode is calculated as: - - context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2]. - - For any context modeling modes, the context ids can be calculated by |-ing - together two lookups from one table using context model dependent offsets: - - context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2]. - - where offset1 and offset2 are dependent on the context mode. -*/ - -#ifndef BROTLI_DEC_CONTEXT_H_ -#define BROTLI_DEC_CONTEXT_H_ - -#include - -enum ContextType { - CONTEXT_LSB6 = 0, - CONTEXT_MSB6 = 1, - CONTEXT_UTF8 = 2, - CONTEXT_SIGNED = 3 -}; - -/* Common context lookup table for all context modes. */ -static const uint8_t kContextLookup[1792] = { - /* CONTEXT_UTF8, last byte. */ - /* ASCII range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, - 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, - 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, - 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, - 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, - 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, - /* UTF8 continuation byte range. */ - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - /* UTF8 lead byte range. */ - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - /* CONTEXT_UTF8 second last byte. */ - /* ASCII range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, - 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, - /* UTF8 continuation byte range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* UTF8 lead byte range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - /* CONTEXT_SIGNED, second last byte. */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, - /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */ - 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56, - /* CONTEXT_LSB6, last byte. */ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - /* CONTEXT_MSB6, last byte. */ - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, - 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, - 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, - 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, - 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, - 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, - 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, - 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, - 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, - 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, - 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, - 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, - 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, - /* CONTEXT_{M,L}SB6, second last byte, */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static const int kContextLookupOffsets[8] = { - /* CONTEXT_LSB6 */ - 1024, 1536, - /* CONTEXT_MSB6 */ - 1280, 1536, - /* CONTEXT_UTF8 */ - 0, 256, - /* CONTEXT_SIGNED */ - 768, 512, -}; - -#endif /* BROTLI_DEC_CONTEXT_H_ */ diff --git a/c/dec/decode.c b/c/dec/decode.c index 846557a..630edeb 100644 --- a/c/dec/decode.c +++ b/c/dec/decode.c @@ -14,15 +14,15 @@ #include /* memcpy, memset */ #include "../common/constants.h" +#include "../common/context.h" #include "../common/dictionary.h" #include "../common/platform.h" +#include "../common/transform.h" #include "../common/version.h" #include "./bit_reader.h" -#include "./context.h" #include "./huffman.h" #include "./prefix.h" #include "./state.h" -#include "./transform.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -37,7 +37,7 @@ extern "C" { (unsigned long)(idx), (unsigned long)array_name[idx])) #define HUFFMAN_TABLE_BITS 8U -#define HUFFMAN_TABLE_MASK 0xff +#define HUFFMAN_TABLE_MASK 0xFF /* We need the slack region for the following reasons: - doing up to two 16-byte copies for fast backward copying @@ -59,11 +59,16 @@ static const uint8_t kCodeLengthPrefixValue[16] = { BROTLI_BOOL BrotliDecoderSetParameter( BrotliDecoderState* state, BrotliDecoderParameter p, uint32_t value) { + if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE; switch (p) { case BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION: state->canny_ringbuffer_allocation = !!value ? 0 : 1; return BROTLI_TRUE; + case BROTLI_DECODER_PARAM_LARGE_WINDOW: + state->large_window = TO_BROTLI_BOOL(!!value); + return BROTLI_TRUE; + default: return BROTLI_FALSE; } } @@ -80,8 +85,15 @@ BrotliDecoderState* BrotliDecoderCreateInstance( BROTLI_DUMP(); return 0; } - BrotliDecoderStateInitWithCustomAllocators( - state, alloc_func, free_func, opaque); + if (!BrotliDecoderStateInit(state, alloc_func, free_func, opaque)) { + BROTLI_DUMP(); + if (!alloc_func && !free_func) { + free(state); + } else if (alloc_func && free_func) { + free_func(opaque, state); + } + return 0; + } return state; } @@ -97,39 +109,61 @@ void BrotliDecoderDestroyInstance(BrotliDecoderState* state) { } } -/* Saves error code and converts it to BrotliDecoderResult */ +/* Saves error code and converts it to BrotliDecoderResult. */ static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode( BrotliDecoderState* s, BrotliDecoderErrorCode e) { s->error_code = (int)e; switch (e) { case BROTLI_DECODER_SUCCESS: return BROTLI_DECODER_RESULT_SUCCESS; + case BROTLI_DECODER_NEEDS_MORE_INPUT: return BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT; + case BROTLI_DECODER_NEEDS_MORE_OUTPUT: return BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT; + default: return BROTLI_DECODER_RESULT_ERROR; } } -/* Decodes a number in the range [9..24], by reading 1 - 7 bits. - Precondition: bit-reader accumulator has at least 7 bits. */ -static uint32_t DecodeWindowBits(BrotliBitReader* br) { +/* Decodes WBITS by reading 1 - 7 bits, or 0x11 for "Large Window Brotli". + Precondition: bit-reader accumulator has at least 8 bits. */ +static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s, + BrotliBitReader* br) { uint32_t n; + BROTLI_BOOL large_window = s->large_window; + s->large_window = BROTLI_FALSE; BrotliTakeBits(br, 1, &n); if (n == 0) { - return 16; + s->window_bits = 16; + return BROTLI_DECODER_SUCCESS; } BrotliTakeBits(br, 3, &n); if (n != 0) { - return 17 + n; + s->window_bits = 17 + n; + return BROTLI_DECODER_SUCCESS; } BrotliTakeBits(br, 3, &n); + if (n == 1) { + if (large_window) { + BrotliTakeBits(br, 1, &n); + if (n == 1) { + return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); + } + s->large_window = BROTLI_TRUE; + return BROTLI_DECODER_SUCCESS; + } else { + return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); + } + } if (n != 0) { - return 8 + n; + s->window_bits = 8 + n; + return BROTLI_DECODER_SUCCESS; } - return 17; + s->window_bits = 17; + return BROTLI_DECODER_SUCCESS; } static BROTLI_INLINE void memmove16(uint8_t* dst, uint8_t* src) { @@ -342,7 +376,7 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( *result = table->value; return BROTLI_TRUE; } - return BROTLI_FALSE; /* No valid bits at all. */ + return BROTLI_FALSE; /* No valid bits at all. */ } val = (uint32_t)BrotliGetBitsUnmasked(br); table += val & HUFFMAN_TABLE_MASK; @@ -352,11 +386,11 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( *result = table->value; return BROTLI_TRUE; } else { - return BROTLI_FALSE; /* Not enough bits for the first level. */ + return BROTLI_FALSE; /* Not enough bits for the first level. */ } } if (available_bits <= HUFFMAN_TABLE_BITS) { - return BROTLI_FALSE; /* Not enough bits to move to the second level. */ + return BROTLI_FALSE; /* Not enough bits to move to the second level. */ } /* Speculatively drop HUFFMAN_TABLE_BITS. */ @@ -364,7 +398,7 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( available_bits -= HUFFMAN_TABLE_BITS; table += table->value + val; if (available_bits < table->bits) { - return BROTLI_FALSE; /* Not enough bits for the second level. */ + return BROTLI_FALSE; /* Not enough bits for the second level. */ } BrotliDropBits(br, HUFFMAN_TABLE_BITS + table->bits); @@ -428,12 +462,11 @@ static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) { } /* Reads (s->symbol + 1) symbols. - Totally 1..4 symbols are read, 1..10 bits each. - The list of symbols MUST NOT contain duplicates. - */ + Totally 1..4 symbols are read, 1..11 bits each. + The list of symbols MUST NOT contain duplicates. */ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols( - uint32_t alphabet_size, BrotliDecoderState* s) { - /* max_bits == 1..10; symbol == 0..3; 1..40 bits will be read. */ + uint32_t alphabet_size, uint32_t max_symbol, BrotliDecoderState* s) { + /* max_bits == 1..11; symbol == 0..3; 1..44 bits will be read. */ BrotliBitReader* br = &s->br; uint32_t max_bits = Log2Floor(alphabet_size - 1); uint32_t i = s->sub_loop_counter; @@ -445,7 +478,7 @@ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols( s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ; return BROTLI_DECODER_NEEDS_MORE_INPUT; } - if (v >= alphabet_size) { + if (v >= max_symbol) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET); } @@ -471,14 +504,13 @@ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols( B) remember code length (if it is not 0) C) extend corresponding index-chain D) reduce the Huffman space - E) update the histogram - */ + E) update the histogram */ static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len, uint32_t* symbol, uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len, uint16_t* symbol_lists, uint16_t* code_length_histo, int* next_symbol) { *repeat = 0; - if (code_len != 0) { /* code_len == 1..15 */ + if (code_len != 0) { /* code_len == 1..15 */ symbol_lists[next_symbol[code_len]] = (uint16_t)(*symbol); next_symbol[code_len] = (int)(*symbol); *prev_code_len = code_len; @@ -498,8 +530,7 @@ static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len, D) For each symbol do the same operations as in ProcessSingleCodeLength PRECONDITION: code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH or - code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH - */ + code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH */ static BROTLI_INLINE void ProcessRepeatedCodeLength(uint32_t code_len, uint32_t repeat_delta, uint32_t alphabet_size, uint32_t* symbol, uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len, @@ -576,12 +607,12 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths( BrotliFillBitWindow16(br); p += BrotliGetBitsUnmasked(br) & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH); - BrotliDropBits(br, p->bits); /* Use 1..5 bits */ + BrotliDropBits(br, p->bits); /* Use 1..5 bits. */ code_len = p->value; /* code_len == 0..17 */ if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) { ProcessSingleCodeLength(code_len, &symbol, &repeat, &space, &prev_code_len, symbol_lists, code_length_histo, next_symbol); - } else { /* code_len == 16..17, extra_bits == 2..3 */ + } else { /* code_len == 16..17, extra_bits == 2..3 */ uint32_t extra_bits = (code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) ? 2 : 3; uint32_t repeat_delta = @@ -616,13 +647,13 @@ static BrotliDecoderErrorCode SafeReadSymbolCodeLengths( get_byte = BROTLI_TRUE; continue; } - code_len = p->value; /* code_len == 0..17 */ + code_len = p->value; /* code_len == 0..17 */ if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) { BrotliDropBits(br, p->bits); ProcessSingleCodeLength(code_len, &s->symbol, &s->repeat, &s->space, &s->prev_code_len, s->symbol_lists, s->code_length_histo, s->next_symbol); - } else { /* code_len == 16..17, extra_bits == 2..3 */ + } else { /* code_len == 16..17, extra_bits == 2..3 */ uint32_t extra_bits = code_len - 14U; uint32_t repeat_delta = (bits >> p->bits) & BitMask(extra_bits); if (available_bits < p->bits + extra_bits) { @@ -674,7 +705,7 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) { ++num_codes; ++s->code_length_histo[v]; if (space - 1U >= 32U) { - /* space is 0 or wrapped around */ + /* space is 0 or wrapped around. */ break; } } @@ -689,22 +720,22 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) { There are 2 scenarios: A) Huffman code contains only few symbols (1..4). Those symbols are read directly; their code lengths are defined by the number of symbols. - For this scenario 4 - 45 bits will be read. + For this scenario 4 - 49 bits will be read. B) 2-phase decoding: B.1) Small Huffman table is decoded; it is specified with code lengths encoded with predefined entropy code. 32 - 74 bits are used. B.2) Decoded table is used to decode code lengths of symbols in resulting - Huffman table. In worst case 3520 bits are read. -*/ + Huffman table. In worst case 3520 bits are read. */ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, + uint32_t max_symbol, HuffmanCode* table, uint32_t* opt_table_size, BrotliDecoderState* s) { BrotliBitReader* br = &s->br; /* Unnecessary masking, but might be good for safety. */ - alphabet_size &= 0x3ff; - /* State machine */ + alphabet_size &= 0x7FF; + /* State machine. */ for (;;) { switch (s->substate_huffman) { case BROTLI_STATE_HUFFMAN_NONE: @@ -717,7 +748,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */ if (s->sub_loop_counter != 1) { s->space = 32; - s->repeat = 0; /* num_codes */ + s->repeat = 0; /* num_codes */ memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo[0]) * (BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1)); memset(&s->code_length_code_lengths[0], 0, @@ -729,20 +760,22 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE: /* Read symbols, codes & code lengths directly. */ - if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */ + if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */ s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE; return BROTLI_DECODER_NEEDS_MORE_INPUT; } s->sub_loop_counter = 0; /* No break, transit to the next state. */ + case BROTLI_STATE_HUFFMAN_SIMPLE_READ: { BrotliDecoderErrorCode result = - ReadSimpleHuffmanSymbols(alphabet_size, s); + ReadSimpleHuffmanSymbols(alphabet_size, max_symbol, s); if (result != BROTLI_DECODER_SUCCESS) { return result; } /* No break, transit to the next state. */ } + case BROTLI_STATE_HUFFMAN_SIMPLE_BUILD: { uint32_t table_size; if (s->symbol == 3) { @@ -787,11 +820,12 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, s->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS; /* No break, transit to the next state. */ } + case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: { uint32_t table_size; - BrotliDecoderErrorCode result = ReadSymbolCodeLengths(alphabet_size, s); + BrotliDecoderErrorCode result = ReadSymbolCodeLengths(max_symbol, s); if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) { - result = SafeReadSymbolCodeLengths(alphabet_size, s); + result = SafeReadSymbolCodeLengths(max_symbol, s); } if (result != BROTLI_DECODER_SUCCESS) { return result; @@ -823,7 +857,7 @@ static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table, uint32_t code; uint32_t nbits; code = ReadSymbol(table, br); - nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */ + nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */ return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits); } @@ -842,7 +876,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength( } { uint32_t bits; - uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */ + uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */ if (!BrotliSafeReadBits(br, nbits, &bits)) { s->block_length_index = index; s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX; @@ -867,8 +901,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength( of Y values, and reinitialize only first elements in L. Most of input values are 0 and 1. To reduce number of branches, we replace - inner for loop with do-while. - */ + inner for loop with do-while. */ static BROTLI_NOINLINE void InverseMoveToFrontTransform( uint8_t* v, uint32_t v_len, BrotliDecoderState* state) { /* Reinitialize elements that could have been changed. */ @@ -884,7 +917,7 @@ static BROTLI_NOINLINE void InverseMoveToFrontTransform( /* Initialize list using 4 consequent values pattern. */ mtf[0] = pattern; do { - pattern += 0x04040404; /* Advance all 4 values by 4. */ + pattern += 0x04040404; /* Advance all 4 values by 4. */ mtf[i] = pattern; i++; } while (i <= upper_bound); @@ -917,7 +950,8 @@ static BrotliDecoderErrorCode HuffmanTreeGroupDecode( while (s->htree_index < group->num_htrees) { uint32_t table_size; BrotliDecoderErrorCode result = - ReadHuffmanCode(group->alphabet_size, s->next, &table_size, s); + ReadHuffmanCode(group->alphabet_size, group->max_symbol, + s->next, &table_size, s); if (result != BROTLI_DECODER_SUCCESS) return result; group->htrees[s->htree_index] = s->next; s->next += table_size; @@ -934,8 +968,7 @@ static BrotliDecoderErrorCode HuffmanTreeGroupDecode( 2) Decode Huffman table using ReadHuffmanCode function. This table will be used for reading context map items. 3) Read context map items; "0" values could be run-length encoded. - 4) Optionally, apply InverseMoveToFront transform to the resulting map. - */ + 4) Optionally, apply InverseMoveToFront transform to the resulting map. */ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, uint32_t* num_htrees, uint8_t** context_map_arg, @@ -964,6 +997,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, } s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX; /* No break, continue to next state. */ + case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: { uint32_t bits; /* In next stage ReadHuffmanCode uses at least 4 bits, so it is safe @@ -982,13 +1016,17 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN; /* No break, continue to next state. */ } - case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: - result = ReadHuffmanCode(*num_htrees + s->max_run_length_prefix, + + case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: { + uint32_t alphabet_size = *num_htrees + s->max_run_length_prefix; + result = ReadHuffmanCode(alphabet_size, alphabet_size, s->context_map_table, NULL, s); if (result != BROTLI_DECODER_SUCCESS) return result; s->code = 0xFFFF; s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE; /* No break, continue to next state. */ + } + case BROTLI_STATE_CONTEXT_MAP_DECODE: { uint32_t context_index = s->context_index; uint32_t max_run_length_prefix = s->max_run_length_prefix; @@ -1037,6 +1075,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, } /* No break, continue to next state. */ } + case BROTLI_STATE_CONTEXT_MAP_TRANSFORM: { uint32_t bits; if (!BrotliSafeReadBits(br, 1, &bits)) { @@ -1049,6 +1088,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; return BROTLI_DECODER_SUCCESS; } + default: return BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE); @@ -1067,8 +1107,11 @@ static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength( BrotliBitReader* br = &s->br; uint32_t* ringbuffer = &s->block_type_rb[tree_type * 2]; uint32_t block_type; + if (max_block_type <= 1) { + return BROTLI_FALSE; + } - /* Read 0..15 + 3..39 bits */ + /* Read 0..15 + 3..39 bits. */ if (!safe) { block_type = ReadSymbol(type_tree, br); s->block_length[tree_type] = ReadBlockLength(len_tree, br); @@ -1125,9 +1168,8 @@ static BROTLI_INLINE void PrepareLiteralDecoding(BrotliDecoderState* s) { trivial = s->trivial_literal_contexts[block_type >> 5]; s->trivial_literal_context = (trivial >> (block_type & 31)) & 1; s->literal_htree = s->literal_hgroup.htrees[s->context_map_slice[0]]; - context_mode = s->context_modes[block_type]; - s->context_lookup1 = &kContextLookup[kContextLookupOffsets[context_mode]]; - s->context_lookup2 = &kContextLookup[kContextLookupOffsets[context_mode + 1]]; + context_mode = s->context_modes[block_type] & 3; + s->context_lookup = BROTLI_CONTEXT_LUT(context_mode); } /* Decodes the block type and updates the state for literal context. @@ -1164,6 +1206,7 @@ static BROTLI_INLINE BROTLI_BOOL DecodeCommandBlockSwitchInternal( static void BROTLI_NOINLINE DecodeCommandBlockSwitch(BrotliDecoderState* s) { DecodeCommandBlockSwitchInternal(0, s); } + static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeCommandBlockSwitch( BrotliDecoderState* s) { return DecodeCommandBlockSwitchInternal(1, s); @@ -1200,8 +1243,7 @@ static size_t UnwrittenBytes(const BrotliDecoderState* s, BROTLI_BOOL wrap) { /* Dumps output. Returns BROTLI_DECODER_NEEDS_MORE_OUTPUT only if there is more output to push - and either ring-buffer is as big as window size, or |force| is true. - */ + and either ring-buffer is as big as window size, or |force| is true. */ static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer( BrotliDecoderState* s, size_t* available_out, uint8_t** next_out, size_t* total_out, BROTLI_BOOL force) { @@ -1260,8 +1302,7 @@ static void BROTLI_NOINLINE WrapRingBuffer(BrotliDecoderState* s) { this function is called. Last two bytes of ring-buffer are initialized to 0, so context calculation - could be done uniformly for the first two and all other positions. -*/ + could be done uniformly for the first two and all other positions. */ static BROTLI_BOOL BROTLI_NOINLINE BrotliEnsureRingBuffer( BrotliDecoderState* s) { uint8_t* old_ringbuffer = s->ringbuffer; @@ -1321,8 +1362,9 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput( return BROTLI_DECODER_NEEDS_MORE_INPUT; } s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_UNCOMPRESSED_WRITE: { BrotliDecoderErrorCode result; result = WriteRingBuffer( @@ -1346,8 +1388,7 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput( If we know the data size is small, do not allocate more ring buffer size than needed to reduce memory usage. - When this method is called, metablock size and flags MUST be decoded. -*/ + When this method is called, metablock size and flags MUST be decoded. */ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize( BrotliDecoderState* s) { int window_size = 1 << s->window_bits; @@ -1378,7 +1419,7 @@ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize( if (!!s->canny_ringbuffer_allocation) { /* Reduce ring buffer size to save memory when server is unscrupulous. In worst case memory usage might be 1.5x bigger for a short period of - ring buffer reallocation.*/ + ring buffer reallocation. */ while ((new_ringbuffer_size >> 1) >= min_size) { new_ringbuffer_size >>= 1; } @@ -1398,7 +1439,7 @@ static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) { s->loop_counter = i; return BROTLI_DECODER_NEEDS_MORE_INPUT; } - s->context_modes[i] = (uint8_t)(bits << 1); + s->context_modes[i] = (uint8_t)bits; BROTLI_LOG_ARRAY_INDEX(s->context_modes, i); i++; } @@ -1413,12 +1454,12 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) { s->distance_context = 1; } else { int distance_code = s->distance_code << 1; - /* kDistanceShortCodeIndexOffset has 2-bit values from LSB: */ - /* 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */ - const uint32_t kDistanceShortCodeIndexOffset = 0xaaafff1b; - /* kDistanceShortCodeValueOffset has 2-bit values from LSB: */ - /*-0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */ - const uint32_t kDistanceShortCodeValueOffset = 0xfa5fa500; + /* kDistanceShortCodeIndexOffset has 2-bit values from LSB: + 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */ + const uint32_t kDistanceShortCodeIndexOffset = 0xAAAFFF1B; + /* kDistanceShortCodeValueOffset has 2-bit values from LSB: + -0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */ + const uint32_t kDistanceShortCodeValueOffset = 0xFA5FA500; int v = (s->dist_rb_idx + (int)(kDistanceShortCodeIndexOffset >> distance_code)) & 0x3; s->distance_code = s->dist_rb[v]; @@ -1428,9 +1469,9 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) { } else { s->distance_code -= v; if (s->distance_code <= 0) { - /* A huge distance will cause a BROTLI_FAILURE() soon. */ - /* This is a little faster than failing here. */ - s->distance_code = 0x0fffffff; + /* A huge distance will cause a BROTLI_FAILURE() soon. + This is a little faster than failing here. */ + s->distance_code = 0x7FFFFFFF; } } } @@ -1446,7 +1487,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBits( } } -/* Precondition: s->distance_code < 0 */ +/* Precondition: s->distance_code < 0. */ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( int safe, BrotliDecoderState* s, BrotliBitReader* br) { int distval; @@ -1462,10 +1503,10 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( } s->distance_code = (int)code; } - /* Convert the distance code to the actual distance by possibly */ - /* looking up past distances from the s->ringbuffer. */ + /* Convert the distance code to the actual distance by possibly + looking up past distances from the s->ringbuffer. */ s->distance_context = 0; - if ((s->distance_code & ~0xf) == 0) { + if ((s->distance_code & ~0xF) == 0) { TakeDistanceFromRingBuffer(s); --s->block_length[2]; return BROTLI_TRUE; @@ -1481,14 +1522,14 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( s->distance_code = (int)s->num_direct_distance_codes + offset + (int)BrotliReadBits(br, nbits); } else { - /* This branch also works well when s->distance_postfix_bits == 0 */ + /* This branch also works well when s->distance_postfix_bits == 0. */ uint32_t bits; postfix = distval & s->distance_postfix_mask; distval >>= s->distance_postfix_bits; nbits = ((uint32_t)distval >> 1) + 1; if (safe) { if (!SafeReadBits(br, nbits, &bits)) { - s->distance_code = -1; /* Restore precondition. */ + s->distance_code = -1; /* Restore precondition. */ BrotliBitReaderRestoreState(br, &memento); return BROTLI_FALSE; } @@ -1615,7 +1656,7 @@ CommandBegin: if (safe) { s->state = BROTLI_STATE_COMMAND_BEGIN; } - if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */ + if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */ s->state = BROTLI_STATE_COMMAND_BEGIN; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1624,7 +1665,7 @@ CommandBegin: BROTLI_SAFE(DecodeCommandBlockSwitch(s)); goto CommandBegin; } - /* Read the insert/copy length in the command */ + /* Read the insert/copy length in the command. */ BROTLI_SAFE(ReadCommand(s, br, &i)); BROTLI_LOG(("[ProcessCommandsInternal] pos = %d insert = %d copy = %d\n", pos, i, s->copy_length)); @@ -1637,13 +1678,13 @@ CommandInner: if (safe) { s->state = BROTLI_STATE_COMMAND_INNER; } - /* Read the literals in the command */ + /* Read the literals in the command. */ if (s->trivial_literal_context) { uint32_t bits; uint32_t value; PreloadSymbol(safe, s->literal_htree, br, &bits, &value); do { - if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ + if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ s->state = BROTLI_STATE_COMMAND_INNER; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1679,7 +1720,7 @@ CommandInner: do { const HuffmanCode* hc; uint8_t context; - if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ + if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ s->state = BROTLI_STATE_COMMAND_INNER; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1688,7 +1729,7 @@ CommandInner: BROTLI_SAFE(DecodeLiteralBlockSwitch(s)); if (s->trivial_literal_context) goto CommandInner; } - context = s->context_lookup1[p1] | s->context_lookup2[p2]; + context = BROTLI_CONTEXT(p1, p2, s->context_lookup); BROTLI_LOG_UINT(context); hc = s->literal_hgroup.htrees[s->context_map_slice[context]]; p2 = p1; @@ -1744,14 +1785,25 @@ CommandPostDecodeLiterals: } i = s->copy_length; /* Apply copy of LZ77 back-reference, or static dictionary reference if - the distance is larger than the max LZ77 distance */ + the distance is larger than the max LZ77 distance */ if (s->distance_code > s->max_distance) { - int address = s->distance_code - s->max_distance - 1; + /* The maximum allowed distance is BROTLI_MAX_ALLOWED_DISTANCE = 0x7FFFFFFC. + With this choice, no signed overflow can occur after decoding + a special distance code (e.g., after adding 3 to the last distance). */ + if (s->distance_code > BROTLI_MAX_ALLOWED_DISTANCE) { + BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d " + "len: %d bytes left: %d\n", + pos, s->distance_code, i, s->meta_block_remaining_len)); + return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DISTANCE); + } if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH && i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) { + int address = s->distance_code - s->max_distance - 1; const BrotliDictionary* words = s->dictionary; + const BrotliTransforms* transforms = s->transforms; int offset = (int)s->dictionary->offsets_by_length[i]; uint32_t shift = s->dictionary->size_bits_by_length[i]; + int mask = (int)BitMask(shift); int word_idx = address & mask; int transform_idx = address >> shift; @@ -1761,16 +1813,16 @@ CommandPostDecodeLiterals: if (BROTLI_PREDICT_FALSE(!words->data)) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET); } - if (transform_idx < kNumTransforms) { + if (transform_idx < (int)transforms->num_transforms) { const uint8_t* word = &words->data[offset]; int len = i; - if (transform_idx == 0) { + if (transform_idx == transforms->cutOffTransforms[0]) { memcpy(&s->ringbuffer[pos], word, (size_t)len); BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]\n", len, word)); } else { len = BrotliTransformDictionaryWord(&s->ringbuffer[pos], word, len, - transform_idx); + transforms, transform_idx); BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]," " transform_idx = %d, transformed: [%.*s]\n", i, word, transform_idx, len, &s->ringbuffer[pos])); @@ -1778,7 +1830,6 @@ CommandPostDecodeLiterals: pos += len; s->meta_block_remaining_len -= len; if (pos >= s->ringbuffer_size) { - /*s->partial_pos_rb += (size_t)s->ringbuffer_size;*/ s->state = BROTLI_STATE_COMMAND_POST_WRITE_1; goto saveStateAndReturn; } @@ -1800,14 +1851,13 @@ CommandPostDecodeLiterals: uint8_t* copy_src = &s->ringbuffer[src_start]; int dst_end = pos + i; int src_end = src_start + i; - /* update the recent distances cache */ + /* Update the recent distances cache. */ s->dist_rb[s->dist_rb_idx & 3] = s->distance_code; ++s->dist_rb_idx; s->meta_block_remaining_len -= i; /* There are 32+ bytes of slack in the ring-buffer allocation. Also, we have 16 short codes, that make these 16 bytes irrelevant - in the ring-buffer. Let's copy over them as a first guess. - */ + in the ring-buffer. Let's copy over them as a first guess. */ memmove16(copy_dst, copy_src); if (src_end > pos && dst_end > src_start) { /* Regions intersect. */ @@ -1830,7 +1880,7 @@ CommandPostDecodeLiterals: } BROTLI_LOG_UINT(s->meta_block_remaining_len); if (s->meta_block_remaining_len <= 0) { - /* Next metablock, if any */ + /* Next metablock, if any. */ s->state = BROTLI_STATE_METABLOCK_DONE; goto saveStateAndReturn; } else { @@ -1850,7 +1900,7 @@ CommandPostWrapCopy: } } if (s->meta_block_remaining_len <= 0) { - /* Next metablock, if any */ + /* Next metablock, if any. */ s->state = BROTLI_STATE_METABLOCK_DONE; goto saveStateAndReturn; } else { @@ -1875,6 +1925,21 @@ static BROTLI_NOINLINE BrotliDecoderErrorCode SafeProcessCommands( return ProcessCommandsInternal(1, s); } +/* Returns the maximum number of distance symbols which can only represent + distances not exceeding BROTLI_MAX_ALLOWED_DISTANCE. */ +static uint32_t BrotliMaxDistanceSymbol(uint32_t ndirect, uint32_t npostfix) { + static const uint32_t bound[BROTLI_MAX_NPOSTFIX + 1] = {0, 4, 12, 28}; + static const uint32_t diff[BROTLI_MAX_NPOSTFIX + 1] = {73, 126, 228, 424}; + uint32_t postfix = 1U << npostfix; + if (ndirect < bound[npostfix]) { + return ndirect + diff[npostfix] + postfix; + } else if (ndirect > bound[npostfix] + postfix) { + return ndirect + diff[npostfix]; + } else { + return bound[npostfix] + diff[npostfix] + postfix; + } +} + BrotliDecoderResult BrotliDecoderDecompress( size_t encoded_size, const uint8_t* encoded_buffer, size_t* decoded_size, uint8_t* decoded_buffer) { @@ -1885,7 +1950,9 @@ BrotliDecoderResult BrotliDecoderDecompress( const uint8_t* next_in = encoded_buffer; size_t available_out = *decoded_size; uint8_t* next_out = decoded_buffer; - BrotliDecoderStateInit(&s); + if (!BrotliDecoderStateInit(&s, 0, 0, 0)) { + return BROTLI_DECODER_RESULT_ERROR; + } result = BrotliDecoderDecompressStream( &s, &available_in, &next_in, &available_out, &next_out, &total_out); *decoded_size = total_out; @@ -1897,23 +1964,22 @@ BrotliDecoderResult BrotliDecoderDecompress( } /* Invariant: input stream is never overconsumed: - * invalid input implies that the whole stream is invalid -> any amount of + - invalid input implies that the whole stream is invalid -> any amount of input could be read and discarded - * when result is "needs more input", then at least one more byte is REQUIRED + - when result is "needs more input", then at least one more byte is REQUIRED to complete decoding; all input data MUST be consumed by decoder, so client could swap the input buffer - * when result is "needs more output" decoder MUST ensure that it doesn't + - when result is "needs more output" decoder MUST ensure that it doesn't hold more than 7 bits in bit reader; this saves client from swapping input buffer ahead of time - * when result is "success" decoder MUST return all unused data back to input - buffer; this is possible because the invariant is hold on enter -*/ + - when result is "success" decoder MUST return all unused data back to input + buffer; this is possible because the invariant is held on enter */ BrotliDecoderResult BrotliDecoderDecompressStream( BrotliDecoderState* s, size_t* available_in, const uint8_t** next_in, size_t* available_out, uint8_t** next_out, size_t* total_out) { BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS; BrotliBitReader* br = &s->br; - /* Ensure that *total_out is set, even if no data will ever be pushed out. */ + /* Ensure that |total_out| is set, even if no data will ever be pushed out. */ if (total_out) { *total_out = s->partial_pos_out; } @@ -1926,7 +1992,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS)); } if (!*available_out) next_out = 0; - if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */ + if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */ br->avail_in = *available_in; br->next_in = *next_in; } else { @@ -1938,9 +2004,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } /* State machine */ for (;;) { - if (result != BROTLI_DECODER_SUCCESS) { /* Error, needs more input/output */ + if (result != BROTLI_DECODER_SUCCESS) { + /* Error, needs more input/output. */ if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) { - if (s->ringbuffer != 0) { /* Pro-actively push output. */ + if (s->ringbuffer != 0) { /* Pro-actively push output. */ BrotliDecoderErrorCode intermediate_result = WriteRingBuffer(s, available_out, next_out, total_out, BROTLI_TRUE); /* WriteRingBuffer checks s->meta_block_remaining_len validity. */ @@ -1949,9 +2016,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } } - if (s->buffer_length != 0) { /* Used with internal buffer. */ - if (br->avail_in == 0) { /* Successfully finished read transaction. */ - /* Accumulator contains less than 8 bits, because internal buffer + if (s->buffer_length != 0) { /* Used with internal buffer. */ + if (br->avail_in == 0) { + /* Successfully finished read transaction. + Accumulator contains less than 8 bits, because internal buffer is expanded byte-by-byte until it is enough to complete read. */ s->buffer_length = 0; /* Switch to input stream and restart. */ @@ -1971,9 +2039,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream( /* Retry with more data in buffer. */ continue; } - /* Can't finish reading and no more input.*/ + /* Can't finish reading and no more input. */ break; - } else { /* Input stream doesn't contain enough input. */ + } else { /* Input stream doesn't contain enough input. */ /* Copy tail to internal buffer and return. */ *next_in = br->next_in; *available_in = br->avail_in; @@ -1992,7 +2060,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( if (s->buffer_length != 0) { /* Just consumed the buffered input and produced some output. Otherwise - it would result in "needs more input". Reset internal buffer.*/ + it would result in "needs more input". Reset internal buffer. */ s->buffer_length = 0; } else { /* Using input stream in last iteration. When decoder switches to input @@ -2012,13 +2080,32 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } /* Decode window size. */ - s->window_bits = DecodeWindowBits(br); /* Reads 1..7 bits. */ - BROTLI_LOG_UINT(s->window_bits); - if (s->window_bits == 9) { - /* Value 9 is reserved for future use. */ + result = DecodeWindowBits(s, br); /* Reads 1..8 bits. */ + if (result != BROTLI_DECODER_SUCCESS) { + break; + } + if (s->large_window) { + s->state = BROTLI_STATE_LARGE_WINDOW_BITS; + break; + } + s->state = BROTLI_STATE_INITIALIZE; + break; + + case BROTLI_STATE_LARGE_WINDOW_BITS: + if (!BrotliSafeReadBits(br, 6, &s->window_bits)) { + result = BROTLI_DECODER_NEEDS_MORE_INPUT; + break; + } + if (s->window_bits < BROTLI_LARGE_MIN_WBITS || + s->window_bits > BROTLI_LARGE_MAX_WBITS) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); break; } + s->state = BROTLI_STATE_INITIALIZE; + /* No break, continue to next state */ + + case BROTLI_STATE_INITIALIZE: + BROTLI_LOG_UINT(s->window_bits); /* Maximum distance, see section 9.1. of the spec. */ s->max_backward_distance = (1 << s->window_bits) - BROTLI_WINDOW_GAP; @@ -2034,14 +2121,16 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->block_type_trees + 3 * BROTLI_HUFFMAN_MAX_SIZE_258; s->state = BROTLI_STATE_METABLOCK_BEGIN; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_METABLOCK_BEGIN: BrotliDecoderStateMetablockBegin(s); BROTLI_LOG_UINT(s->pos); s->state = BROTLI_STATE_METABLOCK_HEADER; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_METABLOCK_HEADER: - result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */ + result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */ if (result != BROTLI_DECODER_SUCCESS) { break; } @@ -2071,6 +2160,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->loop_counter = 0; s->state = BROTLI_STATE_HUFFMAN_CODE_0; break; + case BROTLI_STATE_UNCOMPRESSED: { result = CopyUncompressedBlockToOutput( available_out, next_out, total_out, s); @@ -2080,6 +2170,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_METABLOCK_DONE; break; } + case BROTLI_STATE_METADATA: for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) { uint32_t bits; @@ -2093,6 +2184,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_METABLOCK_DONE; } break; + case BROTLI_STATE_HUFFMAN_CODE_0: if (s->loop_counter >= 3) { s->state = BROTLI_STATE_METABLOCK_HEADER_2; @@ -2110,23 +2202,28 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } s->state = BROTLI_STATE_HUFFMAN_CODE_1; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_HUFFMAN_CODE_1: { + uint32_t alphabet_size = s->num_block_types[s->loop_counter] + 2; int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_258; - result = ReadHuffmanCode(s->num_block_types[s->loop_counter] + 2, + result = ReadHuffmanCode(alphabet_size, alphabet_size, &s->block_type_trees[tree_offset], NULL, s); if (result != BROTLI_DECODER_SUCCESS) break; s->state = BROTLI_STATE_HUFFMAN_CODE_2; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_HUFFMAN_CODE_2: { + uint32_t alphabet_size = BROTLI_NUM_BLOCK_LEN_SYMBOLS; int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26; - result = ReadHuffmanCode(BROTLI_NUM_BLOCK_LEN_SYMBOLS, + result = ReadHuffmanCode(alphabet_size, alphabet_size, &s->block_len_trees[tree_offset], NULL, s); if (result != BROTLI_DECODER_SUCCESS) break; s->state = BROTLI_STATE_HUFFMAN_CODE_3; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_HUFFMAN_CODE_3: { int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26; if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter], @@ -2139,6 +2236,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_HUFFMAN_CODE_0; break; } + case BROTLI_STATE_METABLOCK_HEADER_2: { uint32_t bits; if (!BrotliSafeReadBits(br, 6, &bits)) { @@ -2160,15 +2258,17 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } s->loop_counter = 0; s->state = BROTLI_STATE_CONTEXT_MODES; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_CONTEXT_MODES: result = ReadContextModes(s); if (result != BROTLI_DECODER_SUCCESS) { break; } s->state = BROTLI_STATE_CONTEXT_MAP_1; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_CONTEXT_MAP_1: result = DecodeContextMap( s->num_block_types[0] << BROTLI_LITERAL_CONTEXT_BITS, @@ -2178,54 +2278,54 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } DetectTrivialLiteralBlockTypes(s); s->state = BROTLI_STATE_CONTEXT_MAP_2; - /* No break, continue to next state */ - case BROTLI_STATE_CONTEXT_MAP_2: - { - uint32_t num_distance_codes = s->num_direct_distance_codes + - ((2 * BROTLI_MAX_DISTANCE_BITS) << s->distance_postfix_bits); - BROTLI_BOOL allocation_success = BROTLI_TRUE; - result = DecodeContextMap( - s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS, - &s->num_dist_htrees, &s->dist_context_map, s); - if (result != BROTLI_DECODER_SUCCESS) { - break; - } - allocation_success &= BrotliDecoderHuffmanTreeGroupInit( - s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS, - s->num_literal_htrees); - allocation_success &= BrotliDecoderHuffmanTreeGroupInit( - s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS, - s->num_block_types[1]); - allocation_success &= BrotliDecoderHuffmanTreeGroupInit( - s, &s->distance_hgroup, num_distance_codes, - s->num_dist_htrees); - if (!allocation_success) { - return SaveErrorCode(s, - BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS)); - } + /* No break, continue to next state. */ + + case BROTLI_STATE_CONTEXT_MAP_2: { + uint32_t num_direct_codes = + s->num_direct_distance_codes - BROTLI_NUM_DISTANCE_SHORT_CODES; + uint32_t num_distance_codes = BROTLI_DISTANCE_ALPHABET_SIZE( + num_direct_codes, s->distance_postfix_bits, + (s->large_window ? BROTLI_LARGE_MAX_DISTANCE_BITS : + BROTLI_MAX_DISTANCE_BITS)); + uint32_t max_distance_symbol = (s->large_window ? + BrotliMaxDistanceSymbol( + num_direct_codes, s->distance_postfix_bits) : + num_distance_codes); + BROTLI_BOOL allocation_success = BROTLI_TRUE; + result = DecodeContextMap( + s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS, + &s->num_dist_htrees, &s->dist_context_map, s); + if (result != BROTLI_DECODER_SUCCESS) { + break; + } + allocation_success &= BrotliDecoderHuffmanTreeGroupInit( + s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS, + BROTLI_NUM_LITERAL_SYMBOLS, s->num_literal_htrees); + allocation_success &= BrotliDecoderHuffmanTreeGroupInit( + s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS, + BROTLI_NUM_COMMAND_SYMBOLS, s->num_block_types[1]); + allocation_success &= BrotliDecoderHuffmanTreeGroupInit( + s, &s->distance_hgroup, num_distance_codes, + max_distance_symbol, s->num_dist_htrees); + if (!allocation_success) { + return SaveErrorCode(s, + BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS)); } s->loop_counter = 0; s->state = BROTLI_STATE_TREE_GROUP; - /* No break, continue to next state */ - case BROTLI_STATE_TREE_GROUP: - { - HuffmanTreeGroup* hgroup = NULL; - switch (s->loop_counter) { - case 0: - hgroup = &s->literal_hgroup; - break; - case 1: - hgroup = &s->insert_copy_hgroup; - break; - case 2: - hgroup = &s->distance_hgroup; - break; - default: - return SaveErrorCode(s, BROTLI_FAILURE( - BROTLI_DECODER_ERROR_UNREACHABLE)); - } - result = HuffmanTreeGroupDecode(hgroup, s); + /* No break, continue to next state. */ + } + + case BROTLI_STATE_TREE_GROUP: { + HuffmanTreeGroup* hgroup = NULL; + switch (s->loop_counter) { + case 0: hgroup = &s->literal_hgroup; break; + case 1: hgroup = &s->insert_copy_hgroup; break; + case 2: hgroup = &s->distance_hgroup; break; + default: return SaveErrorCode(s, BROTLI_FAILURE( + BROTLI_DECODER_ERROR_UNREACHABLE)); } + result = HuffmanTreeGroupDecode(hgroup, s); if (result != BROTLI_DECODER_SUCCESS) break; s->loop_counter++; if (s->loop_counter >= 3) { @@ -2239,6 +2339,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_COMMAND_BEGIN; } break; + } + case BROTLI_STATE_COMMAND_BEGIN: case BROTLI_STATE_COMMAND_INNER: case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS: @@ -2248,6 +2350,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( result = SafeProcessCommands(s); } break; + case BROTLI_STATE_COMMAND_INNER_WRITE: case BROTLI_STATE_COMMAND_POST_WRITE_1: case BROTLI_STATE_COMMAND_POST_WRITE_2: @@ -2262,7 +2365,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) { if (s->meta_block_remaining_len == 0) { - /* Next metablock, if any */ + /* Next metablock, if any. */ s->state = BROTLI_STATE_METABLOCK_DONE; } else { s->state = BROTLI_STATE_COMMAND_BEGIN; @@ -2282,6 +2385,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_COMMAND_INNER; } break; + case BROTLI_STATE_METABLOCK_DONE: if (s->meta_block_remaining_len < 0) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2); @@ -2302,7 +2406,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream( *next_in = br->next_in; } s->state = BROTLI_STATE_DONE; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_DONE: if (s->ringbuffer != 0) { result = WriteRingBuffer( diff --git a/c/dec/huffman.c b/c/dec/huffman.c index 4fe7bfa..f142442 100644 --- a/c/dec/huffman.c +++ b/c/dec/huffman.c @@ -86,9 +86,9 @@ static BROTLI_INLINE void ReplicateValue(HuffmanCode* table, } while (end > 0); } -/* Returns the table width of the next 2nd level table. count is the histogram - of bit lengths for the remaining symbols, len is the code length of the next - processed symbol */ +/* Returns the table width of the next 2nd level table. |count| is the histogram + of bit lengths for the remaining symbols, |len| is the code length of the + next processed symbol. */ static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count, int len, int root_bits) { int left = 1 << (len - root_bits); @@ -118,7 +118,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <= BROTLI_REVERSE_BITS_MAX); - /* generate offsets into sorted symbol table by code length */ + /* Generate offsets into sorted symbol table by code length. */ symbol = -1; bits = 1; BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, { @@ -129,7 +129,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, /* Symbols with code length 0 are placed after all other symbols. */ offset[0] = BROTLI_CODE_LENGTH_CODES - 1; - /* sort symbols by length, by symbol order within each length */ + /* Sort symbols by length, by symbol order within each length. */ symbol = BROTLI_CODE_LENGTH_CODES; do { BROTLI_REPEAT(6, { @@ -150,7 +150,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, return; } - /* fill in table */ + /* Fill in table. */ key = 0; key_step = BROTLI_REVERSE_BITS_LOWEST; symbol = 0; @@ -200,9 +200,8 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, table_size = 1 << table_bits; total_size = table_size; - /* fill in root table */ - /* let's reduce the table size to a smaller size if possible, and */ - /* create the repetitions by memcpy if possible in the coming loop */ + /* Fill in the root table. Reduce the table size to if possible, + and create the repetitions by memcpy. */ if (table_bits > max_length) { table_bits = max_length; table_size = 1 << table_bits; @@ -224,15 +223,14 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, key_step >>= 1; } while (++bits <= table_bits); - /* if root_bits != table_bits we only created one fraction of the */ - /* table, and we need to replicate it now. */ + /* If root_bits != table_bits then replicate to fill the remaining slots. */ while (total_size != table_size) { memcpy(&table[table_size], &table[0], (size_t)table_size * sizeof(table[0])); table_size <<= 1; } - /* fill in 2nd level tables and add pointers to root table */ + /* Fill in 2nd level tables and add pointers to root table. */ key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1); sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1); sub_key_step = BROTLI_REVERSE_BITS_LOWEST; diff --git a/c/dec/huffman.h b/c/dec/huffman.h index 730af88..521ec6e 100644 --- a/c/dec/huffman.h +++ b/c/dec/huffman.h @@ -19,10 +19,11 @@ extern "C" { #define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15 /* Maximum possible Huffman table size for an alphabet size of (index * 32), - * max code length 15 and root table bits 8. */ + max code length 15 and root table bits 8. */ static const uint16_t kMaxHuffmanTableSize[] = { 256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822, - 854, 886, 920, 952, 984, 1016, 1048, 1080}; + 854, 886, 920, 952, 984, 1016, 1048, 1080, 1112, 1144, 1176, 1208, 1240, 1272, + 1304, 1336, 1368, 1400, 1432, 1464, 1496, 1528}; /* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */ #define BROTLI_HUFFMAN_MAX_SIZE_26 396 /* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */ @@ -41,23 +42,26 @@ typedef struct { BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table, const uint8_t* const code_lengths, uint16_t* count); -/* Builds Huffman lookup table assuming code lengths are in symbol order. */ -/* Returns size of resulting table. */ +/* Builds Huffman lookup table assuming code lengths are in symbol order. + Returns size of resulting table. */ BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, int root_bits, const uint16_t* const symbol_lists, uint16_t* count_arg); -/* Builds a simple Huffman table. The num_symbols parameter is to be */ -/* interpreted as follows: 0 means 1 symbol, 1 means 2 symbols, 2 means 3 */ -/* symbols, 3 means 4 symbols with lengths 2,2,2,2, 4 means 4 symbols with */ -/* lengths 1,2,3,3. */ +/* Builds a simple Huffman table. The |num_symbols| parameter is to be + interpreted as follows: 0 means 1 symbol, 1 means 2 symbols, + 2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2], + 4 means 4 symbols with lengths [1, 2, 3, 3]. */ BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table, int root_bits, uint16_t* symbols, uint32_t num_symbols); /* Contains a collection of Huffman trees with the same alphabet size. */ +/* max_symbol is needed due to simple codes since log2(alphabet_size) could be + greater than log2(max_symbol). */ typedef struct { HuffmanCode** htrees; HuffmanCode* codes; uint16_t alphabet_size; + uint16_t max_symbol; uint16_t num_htrees; } HuffmanTreeGroup; diff --git a/c/dec/prefix.h b/c/dec/prefix.h index aa776c7..3ea062d 100644 --- a/c/dec/prefix.h +++ b/c/dec/prefix.h @@ -5,8 +5,7 @@ */ /* Lookup tables to map prefix codes to value ranges. This is used during - decoding of the block lengths, literal insertion lengths and copy lengths. -*/ + decoding of the block lengths, literal insertion lengths and copy lengths. */ #ifndef BROTLI_DEC_PREFIX_H_ #define BROTLI_DEC_PREFIX_H_ @@ -14,8 +13,8 @@ #include "../common/constants.h" #include -/* Represents the range of values belonging to a prefix code: */ -/* [offset, offset + 2^nbits) */ +/* Represents the range of values belonging to a prefix code: + [offset, offset + 2^nbits) */ struct PrefixCodeRange { uint16_t offset; uint8_t nbits; diff --git a/c/dec/state.c b/c/dec/state.c index eaec823..e0b37c2 100644 --- a/c/dec/state.c +++ b/c/dec/state.c @@ -15,25 +15,11 @@ extern "C" { #endif -static void* DefaultAllocFunc(void* opaque, size_t size) { - BROTLI_UNUSED(opaque); - return malloc(size); -} - -static void DefaultFreeFunc(void* opaque, void* address) { - BROTLI_UNUSED(opaque); - free(address); -} - -void BrotliDecoderStateInit(BrotliDecoderState* s) { - BrotliDecoderStateInitWithCustomAllocators(s, 0, 0, 0); -} - -void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s, +BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { if (!alloc_func) { - s->alloc_func = DefaultAllocFunc; - s->free_func = DefaultFreeFunc; + s->alloc_func = BrotliDefaultAllocFunc; + s->free_func = BrotliDefaultFreeFunc; s->memory_manager_opaque = 0; } else { s->alloc_func = alloc_func; @@ -45,6 +31,7 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s, BrotliInitBitReader(&s->br); s->state = BROTLI_STATE_UNINITED; + s->large_window = 0; s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE; s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; @@ -103,13 +90,16 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s, s->mtf_upper_bound = 63; s->dictionary = BrotliGetDictionary(); + s->transforms = BrotliGetTransforms(); + + return BROTLI_TRUE; } void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) { s->meta_block_remaining_len = 0; - s->block_length[0] = 1U << 28; - s->block_length[1] = 1U << 28; - s->block_length[2] = 1U << 28; + s->block_length[0] = 1U << 24; + s->block_length[1] = 1U << 24; + s->block_length[2] = 1U << 24; s->num_block_types[0] = 1; s->num_block_types[1] = 1; s->num_block_types[2] = 1; @@ -126,8 +116,7 @@ void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) { s->literal_htree = NULL; s->dist_context_map_slice = NULL; s->dist_htree_index = 0; - s->context_lookup1 = NULL; - s->context_lookup2 = NULL; + s->context_lookup = NULL; s->literal_hgroup.codes = NULL; s->literal_hgroup.htrees = NULL; s->insert_copy_hgroup.codes = NULL; @@ -153,7 +142,8 @@ void BrotliDecoderStateCleanup(BrotliDecoderState* s) { } BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s, - HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t ntrees) { + HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t max_symbol, + uint32_t ntrees) { /* Pack two allocations into one */ const size_t max_table_size = kMaxHuffmanTableSize[(alphabet_size + 31) >> 5]; const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size; @@ -162,6 +152,7 @@ BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s, HuffmanCode** p = (HuffmanCode**)BROTLI_DECODER_ALLOC(s, code_size + htree_size); group->alphabet_size = (uint16_t)alphabet_size; + group->max_symbol = (uint16_t)max_symbol; group->num_htrees = (uint16_t)ntrees; group->htrees = p; group->codes = (HuffmanCode*)(&p[ntrees]); diff --git a/c/dec/state.h b/c/dec/state.h index 069beca..d28b639 100644 --- a/c/dec/state.h +++ b/c/dec/state.h @@ -12,6 +12,7 @@ #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" +#include "../common/transform.h" #include #include "./bit_reader.h" #include "./huffman.h" @@ -22,6 +23,8 @@ extern "C" { typedef enum { BROTLI_STATE_UNINITED, + BROTLI_STATE_LARGE_WINDOW_BITS, + BROTLI_STATE_INITIALIZE, BROTLI_STATE_METABLOCK_BEGIN, BROTLI_STATE_METABLOCK_HEADER, BROTLI_STATE_METABLOCK_HEADER_2, @@ -126,23 +129,22 @@ struct BrotliDecoderStateStruct { uint8_t* ringbuffer; uint8_t* ringbuffer_end; HuffmanCode* htree_command; - const uint8_t* context_lookup1; - const uint8_t* context_lookup2; + const uint8_t* context_lookup; uint8_t* context_map_slice; uint8_t* dist_context_map_slice; - /* This ring buffer holds a few past copy distances that will be used by */ - /* some special distance codes. */ + /* This ring buffer holds a few past copy distances that will be used by + some special distance codes. */ HuffmanTreeGroup literal_hgroup; HuffmanTreeGroup insert_copy_hgroup; HuffmanTreeGroup distance_hgroup; HuffmanCode* block_type_trees; HuffmanCode* block_len_trees; /* This is true if the literal context map histogram type always matches the - block type. It is then not needed to keep the context (faster decoding). */ + block type. It is then not needed to keep the context (faster decoding). */ int trivial_literal_context; - /* Distance context is actual after command is decoded and before distance - is computed. After distance computation it is used as a temporary variable. */ + /* Distance context is actual after command is decoded and before distance is + computed. After distance computation it is used as a temporary variable. */ int distance_context; int meta_block_remaining_len; uint32_t block_length_index; @@ -162,11 +164,11 @@ struct BrotliDecoderStateStruct { int copy_length; int distance_code; - /* For partial write operations */ - size_t rb_roundtrips; /* How many times we went around the ring-buffer */ - size_t partial_pos_out; /* How much output to the user in total */ + /* For partial write operations. */ + size_t rb_roundtrips; /* how many times we went around the ring-buffer */ + size_t partial_pos_out; /* how much output to the user in total */ - /* For ReadHuffmanCode */ + /* For ReadHuffmanCode. */ uint32_t symbol; uint32_t repeat; uint32_t space; @@ -180,25 +182,26 @@ struct BrotliDecoderStateStruct { /* Tails of symbol chains. */ int next_symbol[32]; uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES]; - /* Population counts for the code lengths */ + /* Population counts for the code lengths. */ uint16_t code_length_histo[16]; - /* For HuffmanTreeGroupDecode */ + /* For HuffmanTreeGroupDecode. */ int htree_index; HuffmanCode* next; - /* For DecodeContextMap */ + /* For DecodeContextMap. */ uint32_t context_index; uint32_t max_run_length_prefix; uint32_t code; HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272]; - /* For InverseMoveToFrontTransform */ + /* For InverseMoveToFrontTransform. */ uint32_t mtf_upper_bound; uint32_t mtf[64 + 1]; - /* less used attributes are in the end of this struct */ - /* States inside function calls */ + /* Less used attributes are at the end of this struct. */ + + /* States inside function calls. */ BrotliRunningMetablockHeaderState substate_metablock_header; BrotliRunningTreeGroupState substate_tree_group; BrotliRunningContextMapState substate_context_map; @@ -212,6 +215,7 @@ struct BrotliDecoderStateStruct { unsigned int is_metadata : 1; unsigned int should_wrap_ringbuffer : 1; unsigned int canny_ringbuffer_allocation : 1; + unsigned int large_window : 1; unsigned int size_nibbles : 8; uint32_t window_bits; @@ -222,6 +226,7 @@ struct BrotliDecoderStateStruct { uint8_t* context_modes; const BrotliDictionary* dictionary; + const BrotliTransforms* transforms; uint32_t trivial_literal_contexts[8]; /* 256 bits */ }; @@ -229,17 +234,15 @@ struct BrotliDecoderStateStruct { typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal; #define BrotliDecoderState BrotliDecoderStateInternal -BROTLI_INTERNAL void BrotliDecoderStateInit(BrotliDecoderState* s); -BROTLI_INTERNAL void BrotliDecoderStateInitWithCustomAllocators( - BrotliDecoderState* s, brotli_alloc_func alloc_func, - brotli_free_func free_func, void* opaque); +BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, + brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque); BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s); BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s); BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock( BrotliDecoderState* s); BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit( BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size, - uint32_t ntrees); + uint32_t max_symbol, uint32_t ntrees); #define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L) diff --git a/c/dec/transform.h b/c/dec/transform.h deleted file mode 100644 index e1d96ff..0000000 --- a/c/dec/transform.h +++ /dev/null @@ -1,300 +0,0 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Transformations on dictionary words. */ - -#ifndef BROTLI_DEC_TRANSFORM_H_ -#define BROTLI_DEC_TRANSFORM_H_ - -#include "../common/platform.h" -#include - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -enum WordTransformType { - kIdentity = 0, - kOmitLast1 = 1, - kOmitLast2 = 2, - kOmitLast3 = 3, - kOmitLast4 = 4, - kOmitLast5 = 5, - kOmitLast6 = 6, - kOmitLast7 = 7, - kOmitLast8 = 8, - kOmitLast9 = 9, - kUppercaseFirst = 10, - kUppercaseAll = 11, - kOmitFirst1 = 12, - kOmitFirst2 = 13, - kOmitFirst3 = 14, - kOmitFirst4 = 15, - kOmitFirst5 = 16, - kOmitFirst6 = 17, - kOmitFirst7 = 18, - kOmitFirst8 = 19, - kOmitFirst9 = 20 -}; - -typedef struct { - const uint8_t prefix_id; - const uint8_t transform; - const uint8_t suffix_id; -} Transform; - -static const char kPrefixSuffix[208] = - "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0" - " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0" - " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0" - " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous "; - -enum { - /* EMPTY = "" - SP = " " - DQUOT = "\"" - SQUOT = "'" - CLOSEBR = "]" - OPEN = "(" - SLASH = "/" - NBSP = non-breaking space "\0xc2\xa0" - */ - kPFix_EMPTY = 0, - kPFix_SP = 1, - kPFix_COMMASP = 3, - kPFix_SPofSPtheSP = 6, - kPFix_SPtheSP = 9, - kPFix_eSP = 12, - kPFix_SPofSP = 15, - kPFix_sSP = 20, - kPFix_DOT = 23, - kPFix_SPandSP = 25, - kPFix_SPinSP = 31, - kPFix_DQUOT = 36, - kPFix_SPtoSP = 38, - kPFix_DQUOTGT = 43, - kPFix_NEWLINE = 46, - kPFix_DOTSP = 48, - kPFix_CLOSEBR = 51, - kPFix_SPforSP = 53, - kPFix_SPaSP = 59, - kPFix_SPthatSP = 63, - kPFix_SQUOT = 70, - kPFix_SPwithSP = 72, - kPFix_SPfromSP = 79, - kPFix_SPbySP = 86, - kPFix_OPEN = 91, - kPFix_DOTSPTheSP = 93, - kPFix_SPonSP = 100, - kPFix_SPasSP = 105, - kPFix_SPisSP = 110, - kPFix_ingSP = 115, - kPFix_NEWLINETAB = 120, - kPFix_COLON = 123, - kPFix_edSP = 125, - kPFix_EQDQUOT = 129, - kPFix_SPatSP = 132, - kPFix_lySP = 137, - kPFix_COMMA = 141, - kPFix_EQSQUOT = 143, - kPFix_DOTcomSLASH = 146, - kPFix_DOTSPThisSP = 152, - kPFix_SPnotSP = 160, - kPFix_erSP = 166, - kPFix_alSP = 170, - kPFix_fulSP = 174, - kPFix_iveSP = 179, - kPFix_lessSP = 184, - kPFix_estSP = 190, - kPFix_izeSP = 195, - kPFix_NBSP = 200, - kPFix_ousSP = 203 -}; - -static const Transform kTransforms[] = { - { kPFix_EMPTY, kIdentity, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SP }, - { kPFix_SP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_SPtheSP }, - { kPFix_SP, kIdentity, kPFix_EMPTY }, - { kPFix_sSP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_SPofSP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPandSP }, - { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY }, - { kPFix_COMMASP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_COMMASP }, - { kPFix_SP, kUppercaseFirst, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_SPinSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPtoSP }, - { kPFix_eSP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_DQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_DOT }, - { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT }, - { kPFix_EMPTY, kIdentity, kPFix_NEWLINE }, - { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR }, - { kPFix_EMPTY, kIdentity, kPFix_SPforSP }, - { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPaSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPthatSP }, - { kPFix_SP, kUppercaseFirst, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_DOTSP }, - { kPFix_DOT, kIdentity, kPFix_EMPTY }, - { kPFix_SP, kIdentity, kPFix_COMMASP }, - { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPwithSP }, - { kPFix_EMPTY, kIdentity, kPFix_SQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_SPfromSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPbySP }, - { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY }, - { kPFix_SPtheSP, kIdentity, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPonSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPasSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPisSP }, - { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast1, kPFix_ingSP }, - { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB }, - { kPFix_EMPTY, kIdentity, kPFix_COLON }, - { kPFix_SP, kIdentity, kPFix_DOTSP }, - { kPFix_EMPTY, kIdentity, kPFix_edSP }, - { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP }, - { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPatSP }, - { kPFix_EMPTY, kIdentity, kPFix_lySP }, - { kPFix_SPtheSP, kIdentity, kPFix_SPofSP }, - { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY }, - { kPFix_SP, kUppercaseFirst, kPFix_COMMASP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT }, - { kPFix_DOT, kIdentity, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseAll, kPFix_SP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT }, - { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT }, - { kPFix_SP, kIdentity, kPFix_DOT }, - { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY }, - { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP }, - { kPFix_EMPTY, kIdentity, kPFix_COMMA }, - { kPFix_DOT, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT }, - { kPFix_EMPTY, kIdentity, kPFix_SPnotSP }, - { kPFix_SP, kIdentity, kPFix_EQDQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_erSP }, - { kPFix_SP, kUppercaseAll, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_alSP }, - { kPFix_SP, kUppercaseAll, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP }, - { kPFix_SP, kIdentity, kPFix_OPEN }, - { kPFix_EMPTY, kIdentity, kPFix_fulSP }, - { kPFix_SP, kUppercaseFirst, kPFix_DOTSP }, - { kPFix_EMPTY, kIdentity, kPFix_iveSP }, - { kPFix_EMPTY, kIdentity, kPFix_lessSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_estSP }, - { kPFix_SP, kUppercaseFirst, kPFix_DOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT }, - { kPFix_SP, kIdentity, kPFix_EQSQUOT }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA }, - { kPFix_EMPTY, kIdentity, kPFix_izeSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DOT }, - { kPFix_NBSP, kIdentity, kPFix_EMPTY }, - { kPFix_SP, kIdentity, kPFix_COMMA }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_ousSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT }, - { kPFix_SP, kUppercaseFirst, kPFix_COMMA }, - { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT }, - { kPFix_SP, kUppercaseAll, kPFix_COMMASP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA }, - { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP }, - { kPFix_SP, kUppercaseAll, kPFix_DOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT }, - { kPFix_SP, kUppercaseAll, kPFix_DOTSP }, - { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT }, - { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT }, - { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT }, -}; - -static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]); - -static int ToUpperCase(uint8_t* p) { - if (p[0] < 0xc0) { - if (p[0] >= 'a' && p[0] <= 'z') { - p[0] ^= 32; - } - return 1; - } - /* An overly simplified uppercasing model for UTF-8. */ - if (p[0] < 0xe0) { - p[1] ^= 32; - return 2; - } - /* An arbitrary transform for three byte characters. */ - p[2] ^= 5; - return 3; -} - -static BROTLI_NOINLINE int BrotliTransformDictionaryWord( - uint8_t* dst, const uint8_t* word, int len, int transform) { - int idx = 0; - { - const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id]; - while (*prefix) { dst[idx++] = (uint8_t)*prefix++; } - } - { - const int t = kTransforms[transform].transform; - int i = 0; - int skip = t - (kOmitFirst1 - 1); - if (skip > 0) { - word += skip; - len -= skip; - } else if (t <= kOmitLast9) { - len -= t; - } - while (i < len) { dst[idx++] = word[i++]; } - if (t == kUppercaseFirst) { - ToUpperCase(&dst[idx - len]); - } else if (t == kUppercaseAll) { - uint8_t* uppercase = &dst[idx - len]; - while (len > 0) { - int step = ToUpperCase(uppercase); - uppercase += step; - len -= step; - } - } - } - { - const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id]; - while (*suffix) { dst[idx++] = (uint8_t)*suffix++; } - return idx; - } -} - -#if defined(__cplusplus) || defined(c_plusplus) -} /* extern "C" */ -#endif - -#endif /* BROTLI_DEC_TRANSFORM_H_ */ diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c index cce0cd4..62ecea7 100644 --- a/c/enc/backward_references.c +++ b/c/enc/backward_references.c @@ -102,23 +102,16 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance, #undef CAT #undef EXPAND_CAT -void BrotliCreateBackwardReferences(const BrotliDictionary* dictionary, - size_t num_bytes, - size_t position, - const uint8_t* ringbuffer, - size_t ringbuffer_mask, - const BrotliEncoderParams* params, - HasherHandle hasher, - int* dist_cache, - size_t* last_insert_len, - Command* commands, - size_t* num_commands, - size_t* num_literals) { +void BrotliCreateBackwardReferences( + size_t num_bytes, size_t position, const uint8_t* ringbuffer, + size_t ringbuffer_mask, const BrotliEncoderParams* params, + HasherHandle hasher, int* dist_cache, size_t* last_insert_len, + Command* commands, size_t* num_commands, size_t* num_literals) { switch (params->hasher.type) { #define CASE_(N) \ case N: \ - CreateBackwardReferencesNH ## N(dictionary, \ - kStaticDictionaryHash, num_bytes, position, ringbuffer, \ + CreateBackwardReferencesNH ## N( \ + num_bytes, position, ringbuffer, \ ringbuffer_mask, params, hasher, dist_cache, \ last_insert_len, commands, num_commands, num_literals); \ return; diff --git a/c/enc/backward_references.h b/c/enc/backward_references.h index 631c2f6..3a41466 100644 --- a/c/enc/backward_references.h +++ b/c/enc/backward_references.h @@ -26,7 +26,6 @@ extern "C" { CreateBackwardReferences calls, and must be incremented by the amount written by this call. */ BROTLI_INTERNAL void BrotliCreateBackwardReferences( - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c index b05cb4f..f2f9918 100644 --- a/c/enc/backward_references_hq.c +++ b/c/enc/backward_references_hq.c @@ -25,6 +25,10 @@ extern "C" { #endif +#define BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE ( \ + BROTLI_NUM_DISTANCE_SHORT_CODES + (2 * BROTLI_LARGE_MAX_DISTANCE_BITS)) +/* BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE == 74 */ + static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */ static const uint32_t kDistanceCacheIndex[] = { @@ -39,40 +43,40 @@ void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) { size_t i; stub.length = 1; stub.distance = 0; - stub.insert_length = 0; + stub.dcode_insert_length = 0; stub.u.cost = kInfinity; for (i = 0; i < length; ++i) array[i] = stub; } static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) { - return self->length & 0xffffff; + return self->length & 0x1FFFFFF; } static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) { - const uint32_t modifier = self->length >> 24; + const uint32_t modifier = self->length >> 25; return ZopfliNodeCopyLength(self) + 9u - modifier; } static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) { - return self->distance & 0x7ffffff; + return self->distance; } static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) { - const uint32_t short_code = self->distance >> 27; + const uint32_t short_code = self->dcode_insert_length >> 27; return short_code == 0 ? ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 : short_code - 1; } static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) { - return ZopfliNodeCopyLength(self) + self->insert_length; + return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF); } /* Histogram based cost model for zopflification. */ typedef struct ZopfliCostModel { /* The insert and copy length symbols. */ float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS]; - float cost_dist_[BROTLI_NUM_DISTANCE_SYMBOLS]; + float cost_dist_[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE]; /* Cumulative costs of literals per position in the stream. */ float* literal_costs_; float min_cost_cmd_; @@ -91,17 +95,26 @@ static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) { } static void SetCost(const uint32_t* histogram, size_t histogram_size, - float* cost) { + BROTLI_BOOL literal_histogram, float* cost) { size_t sum = 0; + size_t missing_symbol_sum; float log2sum; + float missing_symbol_cost; size_t i; for (i = 0; i < histogram_size; i++) { sum += histogram[i]; } log2sum = (float)FastLog2(sum); + missing_symbol_sum = sum; + if (!literal_histogram) { + for (i = 0; i < histogram_size; i++) { + if (histogram[i] == 0) missing_symbol_sum++; + } + } + missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2; for (i = 0; i < histogram_size; i++) { if (histogram[i] == 0) { - cost[i] = log2sum + 2; + cost[i] = missing_symbol_cost; continue; } @@ -122,7 +135,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, size_t last_insert_len) { uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS]; uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS]; - uint32_t histogram_dist[BROTLI_NUM_DISTANCE_SYMBOLS]; + uint32_t histogram_dist[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE]; float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS]; size_t pos = position - last_insert_len; float min_cost_cmd = kInfinity; @@ -136,7 +149,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, for (i = 0; i < num_commands; i++) { size_t inslength = commands[i].insert_len_; size_t copylength = CommandCopyLen(&commands[i]); - size_t distcode = commands[i].dist_prefix_; + size_t distcode = commands[i].dist_prefix_ & 0x3FF; size_t cmdcode = commands[i].cmd_prefix_; size_t j; @@ -150,9 +163,12 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, pos += inslength + copylength; } - SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, cost_literal); - SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, cost_cmd); - SetCost(histogram_dist, BROTLI_NUM_DISTANCE_SYMBOLS, self->cost_dist_); + SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE, + cost_literal); + SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE, + cost_cmd); + SetCost(histogram_dist, BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE, BROTLI_FALSE, + self->cost_dist_); for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) { min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]); @@ -161,11 +177,14 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, { float* literal_costs = self->literal_costs_; + float literal_carry = 0.0; size_t num_bytes = self->num_bytes_; literal_costs[0] = 0.0; for (i = 0; i < num_bytes; ++i) { - literal_costs[i + 1] = literal_costs[i] + + literal_carry += cost_literal[ringbuffer[(position + i) & ringbuffer_mask]]; + literal_costs[i + 1] = literal_costs[i] + literal_carry; + literal_carry -= literal_costs[i + 1] - literal_costs[i]; } } } @@ -175,6 +194,7 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self, const uint8_t* ringbuffer, size_t ringbuffer_mask) { float* literal_costs = self->literal_costs_; + float literal_carry = 0.0; float* cost_dist = self->cost_dist_; float* cost_cmd = self->cost_cmd_; size_t num_bytes = self->num_bytes_; @@ -183,12 +203,14 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self, ringbuffer, &literal_costs[1]); literal_costs[0] = 0.0; for (i = 0; i < num_bytes; ++i) { - literal_costs[i + 1] += literal_costs[i]; + literal_carry += literal_costs[i + 1]; + literal_costs[i + 1] = literal_costs[i] + literal_carry; + literal_carry -= literal_costs[i + 1] - literal_costs[i]; } for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) { cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i); } - for (i = 0; i < BROTLI_NUM_DISTANCE_SYMBOLS; ++i) { + for (i = 0; i < BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE; ++i) { cost_dist[i] = (float)FastLog2(20 + (uint32_t)i); } self->min_cost_cmd_ = (float)FastLog2(11); @@ -221,9 +243,10 @@ static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos, size_t len, size_t len_code, size_t dist, size_t short_code, float cost) { ZopfliNode* next = &nodes[pos + len]; - next->length = (uint32_t)(len | ((len + 9u - len_code) << 24)); - next->distance = (uint32_t)(dist | (short_code << 27)); - next->insert_length = (uint32_t)(pos - start_pos); + next->length = (uint32_t)(len | ((len + 9u - len_code) << 25)); + next->distance = (uint32_t)dist; + next->dcode_insert_length = (uint32_t)( + (short_code << 27) | (pos - start_pos)); next->u.cost = cost; } @@ -303,7 +326,7 @@ static uint32_t ComputeDistanceShortcut(const size_t block_start, const size_t gap, const ZopfliNode* nodes) { const size_t clen = ZopfliNodeCopyLength(&nodes[pos]); - const size_t ilen = nodes[pos].insert_length; + const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF; const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]); /* Since |block_start + pos| is the end position of the command, the copy part starts from |block_start + pos - clen|. Distances that are greater than @@ -335,7 +358,7 @@ static void ComputeDistanceCache(const size_t pos, int idx = 0; size_t p = nodes[pos].u.shortcut; while (idx < 4 && p > 0) { - const size_t ilen = nodes[p].insert_length; + const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF; const size_t clen = ZopfliNodeCopyLength(&nodes[p]); const size_t dist = ZopfliNodeCopyDistance(&nodes[p]); dist_cache[idx++] = (int)dist; @@ -483,9 +506,9 @@ static size_t UpdateNodes( float dist_cost; size_t max_match_len; PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra); - distnumextra = distextra >> 24; + distnumextra = dist_symbol >> 10; dist_cost = base_cost + (float)distnumextra + - ZopfliCostModelGetDistanceCost(model, dist_symbol); + ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF); /* Try all copy lengths up until the maximum copy length corresponding to this distance. If the distance refers to the static dictionary, or @@ -517,7 +540,8 @@ static size_t ComputeShortestPathFromNodes(size_t num_bytes, ZopfliNode* nodes) { size_t index = num_bytes; size_t num_commands = 0; - while (nodes[index].insert_length == 0 && nodes[index].length == 1) --index; + while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 && + nodes[index].length == 1) --index; nodes[index].u.next = BROTLI_UINT32_MAX; while (index != 0) { size_t len = ZopfliNodeCommandLength(&nodes[index]); @@ -546,7 +570,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes, for (i = 0; offset != BROTLI_UINT32_MAX; i++) { const ZopfliNode* next = &nodes[pos + offset]; size_t copy_length = ZopfliNodeCopyLength(next); - size_t insert_length = next->insert_length; + size_t insert_length = next->dcode_insert_length & 0x7FFFFFF; pos += insert_length; offset = next->u.next; if (i == 0) { @@ -624,7 +648,6 @@ static size_t ZopfliIterate(size_t num_bytes, /* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, const size_t max_backward_limit, const int* dist_cache, HasherHandle hasher, @@ -649,9 +672,9 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, const size_t pos = position + i; const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit); size_t skip; - size_t num_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer, - ringbuffer_mask, pos, num_bytes - i, max_distance, gap, params, - &matches[lz_matches_offset]); + size_t num_matches = FindAllMatchesH10(hasher, ¶ms->dictionary, + ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance, gap, + params, &matches[lz_matches_offset]); if (num_matches > 0 && BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) { matches[0] = matches[num_matches - 1]; @@ -683,7 +706,6 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, } void BrotliCreateZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, @@ -693,7 +715,7 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m, nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1); if (BROTLI_IS_OOM(m)) return; BrotliInitZopfliNodes(nodes, num_bytes + 1); - *num_commands += BrotliZopfliComputeShortestPath(m, dictionary, + *num_commands += BrotliZopfliComputeShortestPath(m, num_bytes, position, ringbuffer, ringbuffer_mask, params, max_backward_limit, dist_cache, hasher, nodes); if (BROTLI_IS_OOM(m)) return; @@ -703,7 +725,6 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m, } void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, @@ -736,8 +757,8 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size, cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches); if (BROTLI_IS_OOM(m)) return; - num_found_matches = FindAllMatchesH10(hasher, dictionary, - ringbuffer, ringbuffer_mask, pos, max_length, + num_found_matches = FindAllMatchesH10(hasher, + ¶ms->dictionary, ringbuffer, ringbuffer_mask, pos, max_length, max_distance, gap, params, &matches[cur_match_pos + shadow_matches]); cur_match_end = cur_match_pos + num_found_matches; for (j = cur_match_pos; j + 1 < cur_match_end; ++j) { diff --git a/c/enc/backward_references_hq.h b/c/enc/backward_references_hq.h index cc19544..7c38bd6 100644 --- a/c/enc/backward_references_hq.h +++ b/c/enc/backward_references_hq.h @@ -23,29 +23,26 @@ extern "C" { #endif BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, Command* commands, size_t* num_commands, size_t* num_literals); BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, Command* commands, size_t* num_commands, size_t* num_literals); typedef struct ZopfliNode { - /* best length to get up to this byte (not including this byte itself) - highest 8 bit is used to reconstruct the length code */ + /* Best length to get up to this byte (not including this byte itself) + highest 7 bit is used to reconstruct the length code. */ uint32_t length; - /* distance associated with the length; highest 5 bits contain distance - short code + 1 (or zero if no short code); this way only distances shorter - than 128MiB are allowed here */ + /* Distance associated with the length. */ uint32_t distance; - /* number of literal inserts before this copy */ - uint32_t insert_length; + /* Number of literal inserts before this copy; highest 5 bits contain + distance short code + 1 (or zero if no short code). */ + uint32_t dcode_insert_length; /* This union holds information used by dynamic-programming. During forward pass |cost| it used to store the goal function. When node is processed its @@ -78,7 +75,6 @@ BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length); (2) nodes[i].command_length() <= i and (3) nodes[i - nodes[i].command_length()].cost < kInfinity */ BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, const size_t max_backward_limit, const int* dist_cache, HasherHandle hasher, diff --git a/c/enc/backward_references_inc.h b/c/enc/backward_references_inc.h index 0a715b2..967545d 100644 --- a/c/enc/backward_references_inc.h +++ b/c/enc/backward_references_inc.h @@ -8,8 +8,6 @@ /* template parameters: EXPORT_FN, FN */ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)( - const BrotliDictionary* dictionary, - const uint16_t* dictionary_hash, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, @@ -43,9 +41,10 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)( sr.len_code_delta = 0; sr.distance = 0; sr.score = kMinScore; - FN(FindLongestMatch)(hasher, dictionary, dictionary_hash, ringbuffer, - ringbuffer_mask, dist_cache, position, - max_length, max_distance, gap, &sr); + FN(FindLongestMatch)(hasher, ¶ms->dictionary, + ringbuffer, ringbuffer_mask, dist_cache, position, + max_length, max_distance, gap, + params->dist.max_distance, &sr); if (sr.score > kMinScore) { /* Found a match. Let's look for something even better ahead. */ int delayed_backward_references_in_row = 0; @@ -59,9 +58,9 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)( sr2.distance = 0; sr2.score = kMinScore; max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit); - FN(FindLongestMatch)(hasher, dictionary, dictionary_hash, + FN(FindLongestMatch)(hasher, ¶ms->dictionary, ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length, - max_distance, gap, &sr2); + max_distance, gap, params->dist.max_distance, &sr2); if (sr2.score >= sr.score + cost_diff_lazy) { /* Ok, let's just write one byte for now and start a match from the next byte. */ diff --git a/c/enc/bit_cost.h b/c/enc/bit_cost.h index e8b7013..6586469 100644 --- a/c/enc/bit_cost.h +++ b/c/enc/bit_cost.h @@ -18,11 +18,11 @@ extern "C" { #endif -static BROTLI_INLINE double ShannonEntropy(const uint32_t *population, - size_t size, size_t *total) { +static BROTLI_INLINE double ShannonEntropy( + const uint32_t* population, size_t size, size_t* total) { size_t sum = 0; double retval = 0; - const uint32_t *population_end = population + size; + const uint32_t* population_end = population + size; size_t p; if (size & 1) { goto odd_number_of_elements_left; @@ -42,7 +42,7 @@ static BROTLI_INLINE double ShannonEntropy(const uint32_t *population, } static BROTLI_INLINE double BitsEntropy( - const uint32_t *population, size_t size) { + const uint32_t* population, size_t size) { size_t sum; double retval = ShannonEntropy(population, size, &sum); if (retval < sum) { diff --git a/c/enc/block_encoder_inc.h b/c/enc/block_encoder_inc.h index 2a08f90..8cbd5ea 100644 --- a/c/enc/block_encoder_inc.h +++ b/c/enc/block_encoder_inc.h @@ -13,9 +13,9 @@ stream. */ static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self, const HistogramType* histograms, const size_t histograms_size, - HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) { - const size_t alphabet_size = self->alphabet_size_; - const size_t table_size = histograms_size * alphabet_size; + const size_t alphabet_size, HuffmanTree* tree, + size_t* storage_ix, uint8_t* storage) { + const size_t table_size = histograms_size * self->histogram_length_; self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size); self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size); if (BROTLI_IS_OOM(m)) return; @@ -23,9 +23,10 @@ static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self, { size_t i; for (i = 0; i < histograms_size; ++i) { - size_t ix = i * alphabet_size; - BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size, tree, - &self->depths_[ix], &self->bits_[ix], storage_ix, storage); + size_t ix = i * self->histogram_length_; + BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_, + alphabet_size, tree, &self->depths_[ix], &self->bits_[ix], + storage_ix, storage); } } } diff --git a/c/enc/block_splitter.c b/c/enc/block_splitter.c index 6362211..d308eca 100644 --- a/c/enc/block_splitter.c +++ b/c/enc/block_splitter.c @@ -174,7 +174,7 @@ void BrotliSplitBlock(MemoryManager* m, for (i = 0; i < num_commands; ++i) { const Command* cmd = &cmds[i]; if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { - distance_prefixes[j++] = cmd->dist_prefix_; + distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF; } } /* Create the block split on the array of distance prefixes. */ diff --git a/c/enc/block_splitter_inc.h b/c/enc/block_splitter_inc.h index 5712572..023712b 100644 --- a/c/enc/block_splitter_inc.h +++ b/c/enc/block_splitter_inc.h @@ -70,7 +70,7 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length, double* insert_cost, double* cost, uint8_t* switch_signal, - uint8_t *block_id) { + uint8_t* block_id) { const size_t data_size = FN(HistogramDataSize)(); const size_t bitmaplen = (num_histograms + 7) >> 3; size_t num_blocks = 1; diff --git a/c/enc/brotli_bit_stream.c b/c/enc/brotli_bit_stream.c index cd9c594..aaf2dad 100644 --- a/c/enc/brotli_bit_stream.c +++ b/c/enc/brotli_bit_stream.c @@ -13,12 +13,13 @@ #include /* memcpy, memset */ #include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include -#include "./context.h" #include "./entropy_encode.h" #include "./entropy_encode_static.h" #include "./fast_log.h" +#include "./histogram.h" #include "./memory.h" #include "./write_bits.h" @@ -27,12 +28,11 @@ extern "C" { #endif #define MAX_HUFFMAN_TREE_SIZE (2 * BROTLI_NUM_COMMAND_SYMBOLS + 1) -/* The size of Huffman dictionary for distances assuming that NPOSTFIX = 0 and - NDIRECT = 0. */ -#define SIMPLE_DISTANCE_ALPHABET_SIZE (BROTLI_NUM_DISTANCE_SHORT_CODES + \ - (2 * BROTLI_MAX_DISTANCE_BITS)) -/* SIMPLE_DISTANCE_ALPHABET_SIZE == 64 */ -#define SIMPLE_DISTANCE_ALPHABET_BITS 6 +/* The maximum size of Huffman dictionary for distances assuming that + NPOSTFIX = 0 and NDIRECT = 0. */ +#define MAX_SIMPLE_DISTANCE_ALPHABET_SIZE \ + BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_LARGE_MAX_DISTANCE_BITS) +/* MAX_SIMPLE_DISTANCE_ALPHABET_SIZE == 140 */ /* Represents the range of values belonging to a prefix code: [offset, offset + 2^nbits) */ @@ -258,7 +258,7 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4], size_t num_symbols, size_t max_bits, - size_t *storage_ix, uint8_t *storage) { + size_t* storage_ix, uint8_t* storage) { /* value of 1 indicates a simple Huffman code */ BrotliWriteBits(2, 1, storage_ix, storage); BrotliWriteBits(2, num_symbols - 1, storage_ix, storage); /* NSYM - 1 */ @@ -297,7 +297,7 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths, depths = symbol depths */ void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree, - size_t *storage_ix, uint8_t *storage) { + size_t* storage_ix, uint8_t* storage) { /* Write the Huffman tree into the brotli-representation. The command alphabet is the largest, so this allocation will fit all alphabets. */ @@ -360,8 +360,9 @@ void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num, /* Builds a Huffman tree from histogram[0:length] into depth[0:length] and bits[0:length] and stores the encoded tree to the bit stream. */ -static void BuildAndStoreHuffmanTree(const uint32_t *histogram, - const size_t length, +static void BuildAndStoreHuffmanTree(const uint32_t* histogram, + const size_t histogram_length, + const size_t alphabet_size, HuffmanTree* tree, uint8_t* depth, uint16_t* bits, @@ -371,7 +372,7 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram, size_t s4[4] = { 0 }; size_t i; size_t max_bits = 0; - for (i = 0; i < length; i++) { + for (i = 0; i < histogram_length; i++) { if (histogram[i]) { if (count < 4) { s4[count] = i; @@ -383,7 +384,7 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram, } { - size_t max_bits_counter = length - 1; + size_t max_bits_counter = alphabet_size - 1; while (max_bits_counter) { max_bits_counter >>= 1; ++max_bits; @@ -398,14 +399,14 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram, return; } - memset(depth, 0, length * sizeof(depth[0])); - BrotliCreateHuffmanTree(histogram, length, 15, tree, depth); - BrotliConvertBitDepthsToSymbols(depth, length, bits); + memset(depth, 0, histogram_length * sizeof(depth[0])); + BrotliCreateHuffmanTree(histogram, histogram_length, 15, tree, depth); + BrotliConvertBitDepthsToSymbols(depth, histogram_length, bits); if (count <= 4) { StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage); } else { - BrotliStoreHuffmanTree(depth, length, tree, storage_ix, storage); + BrotliStoreHuffmanTree(depth, histogram_length, tree, storage_ix, storage); } } @@ -729,6 +730,7 @@ static void EncodeContextMap(MemoryManager* m, } } BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix, + num_clusters + max_run_length_prefix, tree, depths, bits, storage_ix, storage); for (i = 0; i < num_rle_symbols; ++i) { const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask; @@ -788,10 +790,11 @@ static void BuildAndStoreBlockSplitCode(const uint8_t* types, } StoreVarLenUint8(num_types - 1, storage_ix, storage); if (num_types > 1) { /* TODO: else? could StoreBlockSwitch occur? */ - BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree, + BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, num_types + 2, tree, &code->type_depths[0], &code->type_bits[0], storage_ix, storage); BuildAndStoreHuffmanTree(&length_histo[0], BROTLI_NUM_BLOCK_LEN_SYMBOLS, + BROTLI_NUM_BLOCK_LEN_SYMBOLS, tree, &code->length_depths[0], &code->length_bits[0], storage_ix, storage); StoreBlockSwitch(code, lengths[0], types[0], 1, storage_ix, storage); @@ -822,8 +825,8 @@ static void StoreTrivialContextMap(size_t num_types, for (i = context_bits; i < alphabet_size; ++i) { histogram[i] = 1; } - BuildAndStoreHuffmanTree(histogram, alphabet_size, tree, - depths, bits, storage_ix, storage); + BuildAndStoreHuffmanTree(histogram, alphabet_size, alphabet_size, + tree, depths, bits, storage_ix, storage); for (i = 0; i < num_types; ++i) { size_t code = (i == 0 ? 0 : i + context_bits - 1); BrotliWriteBits(depths[code], bits[code], storage_ix, storage); @@ -838,7 +841,7 @@ static void StoreTrivialContextMap(size_t num_types, /* Manages the encoding of one block category (literal, command or distance). */ typedef struct BlockEncoder { - size_t alphabet_size_; + size_t histogram_length_; size_t num_block_types_; const uint8_t* block_types_; /* Not owned. */ const uint32_t* block_lengths_; /* Not owned. */ @@ -851,10 +854,10 @@ typedef struct BlockEncoder { uint16_t* bits_; } BlockEncoder; -static void InitBlockEncoder(BlockEncoder* self, size_t alphabet_size, +static void InitBlockEncoder(BlockEncoder* self, size_t histogram_length, size_t num_block_types, const uint8_t* block_types, const uint32_t* block_lengths, const size_t num_blocks) { - self->alphabet_size_ = alphabet_size; + self->histogram_length_ = histogram_length; self->num_block_types_ = num_block_types; self->block_types_ = block_types; self->block_lengths_ = block_lengths; @@ -890,7 +893,7 @@ static void StoreSymbol(BlockEncoder* self, size_t symbol, size_t* storage_ix, uint32_t block_len = self->block_lengths_[block_ix]; uint8_t block_type = self->block_types_[block_ix]; self->block_len_ = block_len; - self->entropy_ix_ = block_type * self->alphabet_size_; + self->entropy_ix_ = block_type * self->histogram_length_; StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0, storage_ix, storage); } @@ -919,7 +922,7 @@ static void StoreSymbolWithContext(BlockEncoder* self, size_t symbol, --self->block_len_; { size_t histo_ix = context_map[self->entropy_ix_ + context]; - size_t ix = histo_ix * self->alphabet_size_ + symbol; + size_t ix = histo_ix * self->histogram_length_ + symbol; BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage); } } @@ -945,42 +948,38 @@ static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) { } void BrotliStoreMetaBlock(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - uint8_t prev_byte, - uint8_t prev_byte2, - BROTLI_BOOL is_last, - uint32_t num_direct_distance_codes, - uint32_t distance_postfix_bits, - ContextType literal_context_mode, - const Command *commands, - size_t n_commands, - const MetaBlockSplit* mb, - size_t *storage_ix, - uint8_t *storage) { + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last, + const BrotliEncoderParams* params, ContextType literal_context_mode, + const Command* commands, size_t n_commands, const MetaBlockSplit* mb, + size_t* storage_ix, uint8_t* storage) { + size_t pos = start_pos; size_t i; - size_t num_distance_codes = - BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_distance_codes + - (48u << distance_postfix_bits); + uint32_t num_distance_symbols = params->dist.alphabet_size; + uint32_t num_effective_distance_symbols = num_distance_symbols; HuffmanTree* tree; + ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); BlockEncoder literal_enc; BlockEncoder command_enc; BlockEncoder distance_enc; + const BrotliDistanceParams* dist = ¶ms->dist; + if (params->large_window && + num_effective_distance_symbols > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) { + num_effective_distance_symbols = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS; + } StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage); tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE); if (BROTLI_IS_OOM(m)) return; - InitBlockEncoder(&literal_enc, 256, mb->literal_split.num_types, - mb->literal_split.types, mb->literal_split.lengths, - mb->literal_split.num_blocks); + InitBlockEncoder(&literal_enc, BROTLI_NUM_LITERAL_SYMBOLS, + mb->literal_split.num_types, mb->literal_split.types, + mb->literal_split.lengths, mb->literal_split.num_blocks); InitBlockEncoder(&command_enc, BROTLI_NUM_COMMAND_SYMBOLS, mb->command_split.num_types, mb->command_split.types, mb->command_split.lengths, mb->command_split.num_blocks); - InitBlockEncoder(&distance_enc, num_distance_codes, + InitBlockEncoder(&distance_enc, num_effective_distance_symbols, mb->distance_split.num_types, mb->distance_split.types, mb->distance_split.lengths, mb->distance_split.num_blocks); @@ -989,9 +988,10 @@ void BrotliStoreMetaBlock(MemoryManager* m, BuildAndStoreBlockSwitchEntropyCodes( &distance_enc, tree, storage_ix, storage); - BrotliWriteBits(2, distance_postfix_bits, storage_ix, storage); - BrotliWriteBits(4, num_direct_distance_codes >> distance_postfix_bits, - storage_ix, storage); + BrotliWriteBits(2, dist->distance_postfix_bits, storage_ix, storage); + BrotliWriteBits( + 4, dist->num_direct_distance_codes >> dist->distance_postfix_bits, + storage_ix, storage); for (i = 0; i < mb->literal_split.num_types; ++i) { BrotliWriteBits(2, literal_context_mode, storage_ix, storage); } @@ -1017,13 +1017,16 @@ void BrotliStoreMetaBlock(MemoryManager* m, } BuildAndStoreEntropyCodesLiteral(m, &literal_enc, mb->literal_histograms, - mb->literal_histograms_size, tree, storage_ix, storage); + mb->literal_histograms_size, BROTLI_NUM_LITERAL_SYMBOLS, tree, + storage_ix, storage); if (BROTLI_IS_OOM(m)) return; BuildAndStoreEntropyCodesCommand(m, &command_enc, mb->command_histograms, - mb->command_histograms_size, tree, storage_ix, storage); + mb->command_histograms_size, BROTLI_NUM_COMMAND_SYMBOLS, tree, + storage_ix, storage); if (BROTLI_IS_OOM(m)) return; BuildAndStoreEntropyCodesDistance(m, &distance_enc, mb->distance_histograms, - mb->distance_histograms_size, tree, storage_ix, storage); + mb->distance_histograms_size, num_distance_symbols, tree, + storage_ix, storage); if (BROTLI_IS_OOM(m)) return; BROTLI_FREE(m, tree); @@ -1041,7 +1044,8 @@ void BrotliStoreMetaBlock(MemoryManager* m, } else { size_t j; for (j = cmd.insert_len_; j != 0; --j) { - size_t context = Context(prev_byte, prev_byte2, literal_context_mode); + size_t context = + BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut); uint8_t literal = input[pos & mask]; StoreSymbolWithContext(&literal_enc, literal, context, mb->literal_context_map, storage_ix, storage, @@ -1056,9 +1060,9 @@ void BrotliStoreMetaBlock(MemoryManager* m, prev_byte2 = input[(pos - 2) & mask]; prev_byte = input[(pos - 1) & mask]; if (cmd.cmd_prefix_ >= 128) { - size_t dist_code = cmd.dist_prefix_; - uint32_t distnumextra = cmd.dist_extra_ >> 24; - uint64_t distextra = cmd.dist_extra_ & 0xffffff; + size_t dist_code = cmd.dist_prefix_ & 0x3FF; + uint32_t distnumextra = cmd.dist_prefix_ >> 10; + uint64_t distextra = cmd.dist_extra_; if (mb->distance_context_map_size == 0) { StoreSymbol(&distance_enc, dist_code, storage_ix, storage); } else { @@ -1082,7 +1086,7 @@ void BrotliStoreMetaBlock(MemoryManager* m, static void BuildHistograms(const uint8_t* input, size_t start_pos, size_t mask, - const Command *commands, + const Command* commands, size_t n_commands, HistogramLiteral* lit_histo, HistogramCommand* cmd_histo, @@ -1099,7 +1103,7 @@ static void BuildHistograms(const uint8_t* input, } pos += CommandCopyLen(&cmd); if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) { - HistogramAddDistance(dist_histo, cmd.dist_prefix_); + HistogramAddDistance(dist_histo, cmd.dist_prefix_ & 0x3FF); } } } @@ -1107,7 +1111,7 @@ static void BuildHistograms(const uint8_t* input, static void StoreDataWithHuffmanCodes(const uint8_t* input, size_t start_pos, size_t mask, - const Command *commands, + const Command* commands, size_t n_commands, const uint8_t* lit_depth, const uint16_t* lit_bits, @@ -1134,9 +1138,9 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input, } pos += CommandCopyLen(&cmd); if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) { - const size_t dist_code = cmd.dist_prefix_; - const uint32_t distnumextra = cmd.dist_extra_ >> 24; - const uint32_t distextra = cmd.dist_extra_ & 0xffffff; + const size_t dist_code = cmd.dist_prefix_ & 0x3FF; + const uint32_t distnumextra = cmd.dist_prefix_ >> 10; + const uint32_t distextra = cmd.dist_extra_; BrotliWriteBits(dist_depth[dist_code], dist_bits[dist_code], storage_ix, storage); BrotliWriteBits(distnumextra, distextra, storage_ix, storage); @@ -1145,15 +1149,10 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input, } void BrotliStoreMetaBlockTrivial(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t *storage_ix, - uint8_t *storage) { + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage) { HistogramLiteral lit_histo; HistogramCommand cmd_histo; HistogramDistance dist_histo; @@ -1161,9 +1160,10 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m, uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS]; uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS]; uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS]; - uint8_t dist_depth[SIMPLE_DISTANCE_ALPHABET_SIZE]; - uint16_t dist_bits[SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; HuffmanTree* tree; + uint32_t num_distance_symbols = params->dist.alphabet_size; StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage); @@ -1178,14 +1178,16 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m, tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE); if (BROTLI_IS_OOM(m)) return; - BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS, tree, + BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS, + BROTLI_NUM_LITERAL_SYMBOLS, tree, lit_depth, lit_bits, storage_ix, storage); - BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS, tree, + BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS, + BROTLI_NUM_COMMAND_SYMBOLS, tree, cmd_depth, cmd_bits, storage_ix, storage); - BuildAndStoreHuffmanTree(dist_histo.data_, SIMPLE_DISTANCE_ALPHABET_SIZE, - tree, + BuildAndStoreHuffmanTree(dist_histo.data_, MAX_SIMPLE_DISTANCE_ALPHABET_SIZE, + num_distance_symbols, tree, dist_depth, dist_bits, storage_ix, storage); BROTLI_FREE(m, tree); @@ -1200,15 +1202,14 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m, } void BrotliStoreMetaBlockFast(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t *storage_ix, - uint8_t *storage) { + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage) { + uint32_t num_distance_symbols = params->dist.alphabet_size; + uint32_t distance_alphabet_bits = + Log2FloorNonZero(num_distance_symbols - 1) + 1; + StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage); BrotliWriteBits(13, 0, storage_ix, storage); @@ -1252,8 +1253,8 @@ void BrotliStoreMetaBlockFast(MemoryManager* m, uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS]; uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS]; uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS]; - uint8_t dist_depth[SIMPLE_DISTANCE_ALPHABET_SIZE]; - uint16_t dist_bits[SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; HistogramClearLiteral(&lit_histo); HistogramClearCommand(&cmd_histo); HistogramClearDistance(&dist_histo); @@ -1274,7 +1275,7 @@ void BrotliStoreMetaBlockFast(MemoryManager* m, BrotliBuildAndStoreHuffmanTreeFast(m, dist_histo.data_, dist_histo.total_count_, /* max_bits = */ - SIMPLE_DISTANCE_ALPHABET_BITS, + distance_alphabet_bits, dist_depth, dist_bits, storage_ix, storage); if (BROTLI_IS_OOM(m)) return; @@ -1293,11 +1294,11 @@ void BrotliStoreMetaBlockFast(MemoryManager* m, /* This is for storing uncompressed blocks (simple raw storage of bytes-as-bytes). */ void BrotliStoreUncompressedMetaBlock(BROTLI_BOOL is_final_block, - const uint8_t * BROTLI_RESTRICT input, + const uint8_t* BROTLI_RESTRICT input, size_t position, size_t mask, size_t len, - size_t * BROTLI_RESTRICT storage_ix, - uint8_t * BROTLI_RESTRICT storage) { + size_t* BROTLI_RESTRICT storage_ix, + uint8_t* BROTLI_RESTRICT storage) { size_t masked_pos = position & mask; BrotliStoreUncompressedMetaBlockHeader(len, storage_ix, storage); JumpToByteBoundary(storage_ix, storage); diff --git a/c/enc/brotli_bit_stream.h b/c/enc/brotli_bit_stream.h index 1324b18..9089b1d 100644 --- a/c/enc/brotli_bit_stream.h +++ b/c/enc/brotli_bit_stream.h @@ -16,10 +16,10 @@ #ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_ #define BROTLI_ENC_BROTLI_BIT_STREAM_H_ +#include "../common/context.h" #include "../common/platform.h" #include #include "./command.h" -#include "./context.h" #include "./entropy_encode.h" #include "./memory.h" #include "./metablock.h" @@ -32,7 +32,7 @@ extern "C" { position for the current storage. */ BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num, - HuffmanTree* tree, size_t *storage_ix, uint8_t *storage); + HuffmanTree* tree, size_t* storage_ix, uint8_t* storage); BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast( MemoryManager* m, const uint32_t* histogram, const size_t histogram_total, @@ -42,51 +42,31 @@ BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast( /* REQUIRES: length > 0 */ /* REQUIRES: length <= (1 << 24) */ BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - uint8_t prev_byte, - uint8_t prev_byte2, - BROTLI_BOOL is_final_block, - uint32_t num_direct_distance_codes, - uint32_t distance_postfix_bits, - ContextType literal_context_mode, - const Command* commands, - size_t n_commands, - const MetaBlockSplit* mb, - size_t* storage_ix, - uint8_t* storage); + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last, + const BrotliEncoderParams* params, ContextType literal_context_mode, + const Command* commands, size_t n_commands, const MetaBlockSplit* mb, + size_t* storage_ix, uint8_t* storage); /* Stores the meta-block without doing any block splitting, just collects one histogram per block category and uses that for entropy coding. REQUIRES: length > 0 REQUIRES: length <= (1 << 24) */ BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t* storage_ix, - uint8_t* storage); + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage); /* Same as above, but uses static prefix codes for histograms with a only a few symbols, and uses static code length prefix codes for all other histograms. REQUIRES: length > 0 REQUIRES: length <= (1 << 24) */ BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t* storage_ix, - uint8_t* storage); + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage); /* This is for storing uncompressed blocks (simple raw storage of bytes-as-bytes). diff --git a/c/enc/command.h b/c/enc/command.h index 3bf0cf7..0526815 100644 --- a/c/enc/command.h +++ b/c/enc/command.h @@ -105,10 +105,13 @@ static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) { typedef struct Command { uint32_t insert_len_; - /* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */ + /* Stores copy_len in low 25 bits and copy_code - copy_len in high 7 bit. */ uint32_t copy_len_; + /* Stores distance extra bits. */ uint32_t dist_extra_; uint16_t cmd_prefix_; + /* Stores distance code in low 10 bits + and number of extra bits in high 6 bits. */ uint16_t dist_prefix_; } Command; @@ -118,7 +121,7 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen, /* Don't rely on signed int representation, use honest casts. */ uint32_t delta = (uint8_t)((int8_t)copylen_code_delta); self->insert_len_ = (uint32_t)insertlen; - self->copy_len_ = (uint32_t)(copylen | (delta << 24)); + self->copy_len_ = (uint32_t)(copylen | (delta << 25)); /* The distance prefix and extra bits are stored in this Command as if npostfix and ndirect were 0, they are only recomputed later after the clustering if needed. */ @@ -126,29 +129,29 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen, distance_code, 0, 0, &self->dist_prefix_, &self->dist_extra_); GetLengthCode( insertlen, (size_t)((int)copylen + copylen_code_delta), - TO_BROTLI_BOOL(self->dist_prefix_ == 0), &self->cmd_prefix_); + TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_); } static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) { self->insert_len_ = (uint32_t)insertlen; - self->copy_len_ = 4 << 24; + self->copy_len_ = 4 << 25; self->dist_extra_ = 0; self->dist_prefix_ = BROTLI_NUM_DISTANCE_SHORT_CODES; GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_); } static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(const Command* self) { - if (self->dist_prefix_ < BROTLI_NUM_DISTANCE_SHORT_CODES) { - return self->dist_prefix_; + if ((self->dist_prefix_ & 0x3FF) < BROTLI_NUM_DISTANCE_SHORT_CODES) { + return self->dist_prefix_ & 0x3FF; } else { - uint32_t nbits = self->dist_extra_ >> 24; - uint32_t extra = self->dist_extra_ & 0xffffff; + uint32_t nbits = self->dist_prefix_ >> 10; + uint32_t extra = self->dist_extra_; /* It is assumed that the distance was first encoded with NPOSTFIX = 0 and NDIRECT = 0, so the code itself is of this form: BROTLI_NUM_DISTANCE_SHORT_CODES + 2 * (nbits - 1) + prefix_bit Therefore, the following expression results in (2 + prefix_bit). */ - uint32_t prefix = - self->dist_prefix_ + 4u - BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits; + uint32_t prefix = (self->dist_prefix_ & 0x3FF) + 4u - + BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits; /* Subtract 4 for offset (Chapter 4.) and increase by BROTLI_NUM_DISTANCE_SHORT_CODES - 1 */ return (prefix << nbits) + extra + BROTLI_NUM_DISTANCE_SHORT_CODES - 4u; @@ -165,12 +168,13 @@ static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) { } static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) { - return self->copy_len_ & 0xFFFFFF; + return self->copy_len_ & 0x1FFFFFF; } static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) { - int32_t delta = (int8_t)((uint8_t)(self->copy_len_ >> 24)); - return (uint32_t)((int32_t)(self->copy_len_ & 0xFFFFFF) + delta); + uint32_t modifier = self->copy_len_ >> 25; + int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1))); + return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta); } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/compress_fragment.c b/c/enc/compress_fragment.c index 40dce3e..f75069b 100644 --- a/c/enc/compress_fragment.c +++ b/c/enc/compress_fragment.c @@ -38,7 +38,7 @@ extern "C" { * There is no effort to ensure that it is a prime, the oddity is enough for this use. * The number has been tuned heuristically against compression benchmarks. */ -static const uint32_t kHashMul32 = 0x1e35a7bd; +static const uint32_t kHashMul32 = 0x1E35A7BD; static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) { const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32; @@ -343,7 +343,7 @@ static void BrotliStoreMetaBlockHeader( } static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos, - uint8_t *array) { + uint8_t* array) { while (n_bits > 0) { size_t byte_pos = pos >> 3; size_t n_unchanged_bits = pos & 7; diff --git a/c/enc/compress_fragment_two_pass.c b/c/enc/compress_fragment_two_pass.c index 8259817..b8bd6e8 100644 --- a/c/enc/compress_fragment_two_pass.c +++ b/c/enc/compress_fragment_two_pass.c @@ -37,7 +37,7 @@ extern "C" { * There is no effort to ensure that it is a prime, the oddity is enough for this use. * The number has been tuned heuristically against compression benchmarks. */ -static const uint32_t kHashMul32 = 0x1e35a7bd; +static const uint32_t kHashMul32 = 0x1E35A7BD; static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) { const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 16) * kHashMul32; diff --git a/c/enc/context.h b/c/enc/context.h deleted file mode 100644 index caa4230..0000000 --- a/c/enc/context.h +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Functions to map previous bytes into a context id. */ - -#ifndef BROTLI_ENC_CONTEXT_H_ -#define BROTLI_ENC_CONTEXT_H_ - -#include "../common/platform.h" -#include - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -/* Second-order context lookup table for UTF8 byte streams. - - If p1 and p2 are the previous two bytes, we calculate the context as - - context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]. - - If the previous two bytes are ASCII characters (i.e. < 128), this will be - equivalent to - - context = 4 * context1(p1) + context2(p2), - - where context1 is based on the previous byte in the following way: - - 0 : non-ASCII control - 1 : \t, \n, \r - 2 : space - 3 : other punctuation - 4 : " ' - 5 : % - 6 : ( < [ { - 7 : ) > ] } - 8 : , ; : - 9 : . - 10 : = - 11 : number - 12 : upper-case vowel - 13 : upper-case consonant - 14 : lower-case vowel - 15 : lower-case consonant - - and context2 is based on the second last byte: - - 0 : control, space - 1 : punctuation - 2 : upper-case letter, number - 3 : lower-case letter - - If the last byte is ASCII, and the second last byte is not (in a valid UTF8 - stream it will be a continuation byte, value between 128 and 191), the - context is the same as if the second last byte was an ASCII control or space. - - If the last byte is a UTF8 lead byte (value >= 192), then the next byte will - be a continuation byte and the context id is 2 or 3 depending on the LSB of - the last byte and to a lesser extent on the second last byte if it is ASCII. - - If the last byte is a UTF8 continuation byte, the second last byte can be: - - continuation byte: the next byte is probably ASCII or lead byte (assuming - 4-byte UTF8 characters are rare) and the context id is 0 or 1. - - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 - - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 - - The possible value combinations of the previous two bytes, the range of - context ids and the type of the next byte is summarized in the table below: - - |--------\-----------------------------------------------------------------| - | \ Last byte | - | Second \---------------------------------------------------------------| - | last byte \ ASCII | cont. byte | lead byte | - | \ (0-127) | (128-191) | (192-) | - |=============|===================|=====================|==================| - | ASCII | next: ASCII/lead | not valid | next: cont. | - | (0-127) | context: 4 - 63 | | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | - | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: ASCII/lead | not valid | - | (192-207) | | context: 0 - 1 | | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: cont. | not valid | - | (208-) | | context: 2 - 3 | | - |-------------|-------------------|---------------------|------------------| -*/ -static const uint8_t kUTF8ContextLookup[512] = { - /* Last byte. */ - /* */ - /* ASCII range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, - 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, - 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, - 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, - 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, - 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, - /* UTF8 continuation byte range. */ - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - /* UTF8 lead byte range. */ - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - /* Second last byte. */ - /* */ - /* ASCII range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, - 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, - /* UTF8 continuation byte range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* UTF8 lead byte range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -/* Context lookup table for small signed integers. */ -static const uint8_t kSigned3BitContextLookup[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, -}; - -typedef enum ContextType { - CONTEXT_LSB6 = 0, - CONTEXT_MSB6 = 1, - CONTEXT_UTF8 = 2, - CONTEXT_SIGNED = 3 -} ContextType; - -static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) { - switch (mode) { - case CONTEXT_LSB6: - return p1 & 0x3f; - case CONTEXT_MSB6: - return (uint8_t)(p1 >> 2); - case CONTEXT_UTF8: - return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]; - case CONTEXT_SIGNED: - return (uint8_t)((kSigned3BitContextLookup[p1] << 3) + - kSigned3BitContextLookup[p2]); - default: - return 0; - } -} - -#if defined(__cplusplus) || defined(c_plusplus) -} /* extern "C" */ -#endif - -#endif /* BROTLI_ENC_CONTEXT_H_ */ diff --git a/c/enc/encode.c b/c/enc/encode.c index 794a409..4fd28d0 100644 --- a/c/enc/encode.c +++ b/c/enc/encode.c @@ -11,6 +11,8 @@ #include /* free, malloc */ #include /* memcpy, memset */ +#include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include "../common/version.h" #include "./backward_references.h" @@ -19,7 +21,7 @@ #include "./brotli_bit_stream.h" #include "./compress_fragment.h" #include "./compress_fragment_two_pass.h" -#include "./context.h" +#include "./encoder_dict.h" #include "./entropy_encode.h" #include "./fast_log.h" #include "./hash.h" @@ -69,8 +71,8 @@ typedef struct BrotliEncoderStateStruct { uint64_t last_processed_pos_; int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES]; int saved_dist_cache_[4]; - uint8_t last_byte_; - uint8_t last_byte_bits_; + uint16_t last_bytes_; + uint8_t last_bytes_bits_; uint8_t prev_byte_; uint8_t prev_byte2_; size_t storage_size_; @@ -161,6 +163,10 @@ BROTLI_BOOL BrotliEncoderSetParameter( state->params.size_hint = value; return BROTLI_TRUE; + case BROTLI_PARAM_LARGE_WINDOW: + state->params.large_window = TO_BROTLI_BOOL(!!value); + return BROTLI_TRUE; + default: return BROTLI_FALSE; } } @@ -251,20 +257,25 @@ static int* GetHashTable(BrotliEncoderState* s, int quality, return table; } -static void EncodeWindowBits(int lgwin, uint8_t* last_byte, - uint8_t* last_byte_bits) { - if (lgwin == 16) { - *last_byte = 0; - *last_byte_bits = 1; - } else if (lgwin == 17) { - *last_byte = 1; - *last_byte_bits = 7; - } else if (lgwin > 17) { - *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1); - *last_byte_bits = 4; +static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window, + uint16_t* last_bytes, uint8_t* last_bytes_bits) { + if (large_window) { + *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11); + *last_bytes_bits = 14; } else { - *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1); - *last_byte_bits = 7; + if (lgwin == 16) { + *last_bytes = 0; + *last_bytes_bits = 1; + } else if (lgwin == 17) { + *last_bytes = 1; + *last_bytes_bits = 7; + } else if (lgwin > 17) { + *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01); + *last_bytes_bits = 4; + } else { + *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01); + *last_bytes_bits = 7; + } } } @@ -420,6 +431,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input, double entropy[3]; size_t dummy; size_t i; + ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8); for (; start_pos + 64 <= end_pos; start_pos += 4096) { const size_t stride_end_pos = start_pos + 64; uint8_t prev2 = input[start_pos & mask]; @@ -430,7 +442,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input, for (pos = start_pos + 2; pos < stride_end_pos; ++pos) { const uint8_t literal = input[pos & mask]; const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[ - Context(prev1, prev2, CONTEXT_UTF8)]; + BROTLI_CONTEXT(prev1, prev2, utf8_lut)]; ++total; ++combined_histo[literal >> 3]; ++context_histo[context][literal >> 3]; @@ -519,12 +531,26 @@ static BROTLI_BOOL ShouldCompress( return BROTLI_TRUE; } +/* Chooses the literal context mode for a metablock */ +static ContextType ChooseContextMode(const BrotliEncoderParams* params, + const uint8_t* data, const size_t pos, const size_t mask, + const size_t length) { + /* We only do the computation for the option of something else than + CONTEXT_UTF8 for the highest qualities */ + if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING && + !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) { + return CONTEXT_SIGNED; + } + return CONTEXT_UTF8; +} + static void WriteMetaBlockInternal(MemoryManager* m, const uint8_t* data, const size_t mask, const uint64_t last_flush_pos, const size_t bytes, const BROTLI_BOOL is_last, + ContextType literal_context_mode, const BrotliEncoderParams* params, const uint8_t prev_byte, const uint8_t prev_byte2, @@ -536,10 +562,9 @@ static void WriteMetaBlockInternal(MemoryManager* m, size_t* storage_ix, uint8_t* storage) { const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos); - uint8_t last_byte; - uint8_t last_byte_bits; - uint32_t num_direct_distance_codes = 0; - uint32_t distance_postfix_bits = 0; + uint16_t last_bytes; + uint8_t last_bytes_bits; + ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); if (bytes == 0) { /* Write the ISLAST and ISEMPTY bits. */ @@ -559,31 +584,29 @@ static void WriteMetaBlockInternal(MemoryManager* m, return; } - last_byte = storage[0]; - last_byte_bits = (uint8_t)(*storage_ix & 0xff); - if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES && - params->mode == BROTLI_MODE_FONT) { - num_direct_distance_codes = 12; - distance_postfix_bits = 1; + BROTLI_DCHECK(*storage_ix <= 14); + last_bytes = (uint16_t)((storage[1] << 8) | storage[0]); + last_bytes_bits = (uint8_t)(*storage_ix); + if (params->dist.num_direct_distance_codes != 0 || + params->dist.distance_postfix_bits != 0) { RecomputeDistancePrefixes(commands, num_commands, - num_direct_distance_codes, - distance_postfix_bits); + params->dist.num_direct_distance_codes, + params->dist.distance_postfix_bits); } if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos, - bytes, mask, is_last, + bytes, mask, is_last, params, commands, num_commands, storage_ix, storage); if (BROTLI_IS_OOM(m)) return; } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) { BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos, - bytes, mask, is_last, + bytes, mask, is_last, params, commands, num_commands, storage_ix, storage); if (BROTLI_IS_OOM(m)) return; } else { - ContextType literal_context_mode = CONTEXT_UTF8; MetaBlockSplit mb; InitMetaBlockSplit(&mb); if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) { @@ -596,14 +619,10 @@ static void WriteMetaBlockInternal(MemoryManager* m, &literal_context_map); } BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask, - prev_byte, prev_byte2, literal_context_mode, num_literal_contexts, + prev_byte, prev_byte2, literal_context_lut, num_literal_contexts, literal_context_map, commands, num_commands, &mb); if (BROTLI_IS_OOM(m)) return; } else { - if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes, - kMinUTF8Ratio)) { - literal_context_mode = CONTEXT_SIGNED; - } BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, params, prev_byte, prev_byte2, commands, num_commands, @@ -612,15 +631,18 @@ static void WriteMetaBlockInternal(MemoryManager* m, if (BROTLI_IS_OOM(m)) return; } if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) { - BrotliOptimizeHistograms(num_direct_distance_codes, - distance_postfix_bits, - &mb); + /* The number of distance symbols effectively used by + "Large Window Brotli" (32-bit). */ + uint32_t num_effective_dist_codes = params->dist.alphabet_size; + if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) { + num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS; + } + BrotliOptimizeHistograms(num_effective_dist_codes, &mb); } BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask, prev_byte, prev_byte2, is_last, - num_direct_distance_codes, - distance_postfix_bits, + params, literal_context_mode, commands, num_commands, &mb, @@ -631,20 +653,54 @@ static void WriteMetaBlockInternal(MemoryManager* m, if (bytes + 4 < (*storage_ix >> 3)) { /* Restore the distance cache and last byte. */ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); - storage[0] = last_byte; - *storage_ix = last_byte_bits; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); + *storage_ix = last_bytes_bits; BrotliStoreUncompressedMetaBlock(is_last, data, wrapped_last_flush_pos, mask, bytes, storage_ix, storage); } } +static void ChooseDistanceParams(BrotliEncoderParams* params) { + uint32_t num_direct_distance_codes = 0; + uint32_t distance_postfix_bits = 0; + uint32_t alphabet_size; + size_t max_distance = BROTLI_MAX_DISTANCE; + + if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES && + params->mode == BROTLI_MODE_FONT) { + num_direct_distance_codes = 12; + distance_postfix_bits = 1; + max_distance = (1U << 27) + 4; + } + + alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE( + num_direct_distance_codes, distance_postfix_bits, + BROTLI_MAX_DISTANCE_BITS); + if (params->large_window) { + max_distance = BROTLI_MAX_ALLOWED_DISTANCE; + if (num_direct_distance_codes != 0 || distance_postfix_bits != 0) { + max_distance = (3U << 29) - 4; + } + alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE( + num_direct_distance_codes, distance_postfix_bits, + BROTLI_LARGE_MAX_DISTANCE_BITS); + } + + params->dist.num_direct_distance_codes = num_direct_distance_codes; + params->dist.distance_postfix_bits = distance_postfix_bits; + params->dist.alphabet_size = alphabet_size; + params->dist.max_distance = max_distance; +} + static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE; if (s->is_initialized_) return BROTLI_TRUE; SanitizeParams(&s->params); s->params.lgblock = ComputeLgBlock(&s->params); + ChooseDistanceParams(&s->params); s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; @@ -657,7 +713,8 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { lgwin = BROTLI_MAX(int, lgwin, 18); } - EncodeWindowBits(lgwin, &s->last_byte_, &s->last_byte_bits_); + EncodeWindowBits(lgwin, s->params.large_window, + &s->last_bytes_, &s->last_bytes_bits_); } if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { @@ -671,11 +728,18 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { static void BrotliEncoderInitParams(BrotliEncoderParams* params) { params->mode = BROTLI_DEFAULT_MODE; + params->large_window = BROTLI_FALSE; params->quality = BROTLI_DEFAULT_QUALITY; params->lgwin = BROTLI_DEFAULT_WINDOW; params->lgblock = 0; params->size_hint = 0; params->disable_literal_context_modeling = BROTLI_FALSE; + BrotliInitEncoderDictionary(¶ms->dictionary); + params->dist.num_direct_distance_codes = 0; + params->dist.distance_postfix_bits = 0; + params->dist.alphabet_size = + BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS); + params->dist.max_distance = BROTLI_MAX_DISTANCE; } static void BrotliEncoderInitState(BrotliEncoderState* s) { @@ -837,6 +901,37 @@ static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) { return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos); } +static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes, + uint32_t* wrapped_last_processed_pos) { + Command* last_command = &s->commands_[s->num_commands_ - 1]; + const uint8_t* data = s->ringbuffer_.buffer_; + const uint32_t mask = s->ringbuffer_.mask_; + uint64_t max_backward_distance = (1u << s->params.lgwin) - BROTLI_WINDOW_GAP; + uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF; + uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len; + uint64_t max_distance = last_processed_pos < max_backward_distance ? + last_processed_pos : max_backward_distance; + uint64_t cmd_dist = (uint64_t)s->dist_cache_[0]; + uint32_t distance_code = CommandRestoreDistanceCode(last_command); + if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES || + distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) { + if (cmd_dist <= max_distance) { + while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] == + data[(*wrapped_last_processed_pos - cmd_dist) & mask]) { + last_command->copy_len_++; + (*bytes)--; + (*wrapped_last_processed_pos)++; + } + } + /* The copy length is at most the metablock size, and thus expressible. */ + GetLengthCode(last_command->insert_len_, + (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) + + (int)(last_command->copy_len_ >> 25)), + TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0), + &last_command->cmd_prefix_); + } +} + /* Processes the accumulated input data and sets |*out_size| to the length of the new output meta-block, or to zero if no new output meta-block has been @@ -853,13 +948,12 @@ static BROTLI_BOOL EncodeData( BrotliEncoderState* s, const BROTLI_BOOL is_last, const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) { const uint64_t delta = UnprocessedInputSize(s); - const uint32_t bytes = (uint32_t)delta; - const uint32_t wrapped_last_processed_pos = - WrapPosition(s->last_processed_pos_); + uint32_t bytes = (uint32_t)delta; + uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_); uint8_t* data; uint32_t mask; MemoryManager* m = &s->memory_manager_; - const BrotliDictionary* dictionary = BrotliGetDictionary(); + ContextType literal_context_mode; if (!EnsureInitialized(s)) return BROTLI_FALSE; data = s->ringbuffer_.buffer_; @@ -884,7 +978,7 @@ static BROTLI_BOOL EncodeData( if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { uint8_t* storage; - size_t storage_ix = s->last_byte_bits_; + size_t storage_ix = s->last_bytes_bits_; size_t table_size; int* table; @@ -894,9 +988,10 @@ static BROTLI_BOOL EncodeData( *out_size = 0; return BROTLI_TRUE; } - storage = GetBrotliStorage(s, 2 * bytes + 502); + storage = GetBrotliStorage(s, 2 * bytes + 503); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; - storage[0] = s->last_byte_; + storage[0] = (uint8_t)s->last_bytes_; + storage[1] = (uint8_t)(s->last_bytes_ >> 8); table = GetHashTable(s, s->params.quality, bytes, &table_size); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { @@ -917,8 +1012,8 @@ static BROTLI_BOOL EncodeData( &storage_ix, storage); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } - s->last_byte_ = storage[storage_ix >> 3]; - s->last_byte_bits_ = storage_ix & 7u; + s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); + s->last_bytes_bits_ = storage_ix & 7u; UpdateLastProcessedPos(s); *output = &storage[0]; *out_size = storage_ix >> 3; @@ -946,27 +1041,36 @@ static BROTLI_BOOL EncodeData( InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params, wrapped_last_processed_pos, bytes, is_last); + + literal_context_mode = ChooseContextMode( + &s->params, data, WrapPosition(s->last_flush_pos_), + mask, (size_t)(s->input_pos_ - s->last_flush_pos_)); + if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; + if (s->num_commands_ && s->last_insert_len_ == 0) { + ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos); + } + if (s->params.quality == ZOPFLIFICATION_QUALITY) { BROTLI_DCHECK(s->params.hasher.type == 10); - BrotliCreateZopfliBackwardReferences( - m, dictionary, bytes, wrapped_last_processed_pos, + BrotliCreateZopfliBackwardReferences(m, + bytes, wrapped_last_processed_pos, data, mask, &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) { BROTLI_DCHECK(s->params.hasher.type == 10); - BrotliCreateHqZopfliBackwardReferences( - m, dictionary, bytes, wrapped_last_processed_pos, + BrotliCreateHqZopfliBackwardReferences(m, + bytes, wrapped_last_processed_pos, data, mask, &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } else { BrotliCreateBackwardReferences( - dictionary, bytes, wrapped_last_processed_pos, + bytes, wrapped_last_processed_pos, data, mask, &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); @@ -1018,18 +1122,19 @@ static BROTLI_BOOL EncodeData( { const uint32_t metablock_size = (uint32_t)(s->input_pos_ - s->last_flush_pos_); - uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 502); - size_t storage_ix = s->last_byte_bits_; + uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503); + size_t storage_ix = s->last_bytes_bits_; if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; - storage[0] = s->last_byte_; + storage[0] = (uint8_t)s->last_bytes_; + storage[1] = (uint8_t)(s->last_bytes_ >> 8); WriteMetaBlockInternal( m, data, mask, s->last_flush_pos_, metablock_size, is_last, - &s->params, s->prev_byte_, s->prev_byte2_, + literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_, s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_, s->dist_cache_, &storage_ix, storage); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; - s->last_byte_ = storage[storage_ix >> 3]; - s->last_byte_bits_ = storage_ix & 7u; + s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); + s->last_bytes_bits_ = storage_ix & 7u; s->last_flush_pos_ = s->input_pos_; if (UpdateLastProcessedPos(s)) { HasherReset(s->hasher_); @@ -1058,10 +1163,11 @@ static BROTLI_BOOL EncodeData( static size_t WriteMetadataHeader( BrotliEncoderState* s, const size_t block_size, uint8_t* header) { size_t storage_ix; - storage_ix = s->last_byte_bits_; - header[0] = s->last_byte_; - s->last_byte_ = 0; - s->last_byte_bits_ = 0; + storage_ix = s->last_bytes_bits_; + header[0] = (uint8_t)s->last_bytes_; + header[1] = (uint8_t)(s->last_bytes_ >> 8); + s->last_bytes_ = 0; + s->last_bytes_bits_ = 0; BrotliWriteBits(1, 0, &storage_ix, header); BrotliWriteBits(2, 3, &storage_ix, header); @@ -1091,15 +1197,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( BROTLI_BOOL ok = BROTLI_TRUE; const size_t max_out_size = *encoded_size; size_t total_out_size = 0; - uint8_t last_byte; - uint8_t last_byte_bits; + uint16_t last_bytes; + uint8_t last_bytes_bits; HasherHandle hasher = NULL; const size_t hasher_eff_size = BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP); BrotliEncoderParams params; - const BrotliDictionary* dictionary = BrotliGetDictionary(); const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1); size_t max_block_size; @@ -1113,14 +1218,18 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( BrotliEncoderInitParams(¶ms); params.quality = 10; params.lgwin = lgwin; + if (lgwin > BROTLI_MAX_WINDOW_BITS) { + params.large_window = BROTLI_TRUE; + } SanitizeParams(¶ms); params.lgblock = ComputeLgBlock(¶ms); + ChooseDistanceParams(¶ms); max_block_size = (size_t)1 << params.lgblock; BrotliInitMemoryManager(m, 0, 0, 0); BROTLI_DCHECK(input_size <= mask + 1); - EncodeWindowBits(lgwin, &last_byte, &last_byte_bits); + EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits); InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, ¶ms, 0, hasher_eff_size, BROTLI_TRUE); if (BROTLI_IS_OOM(m)) goto oom; @@ -1140,6 +1249,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( uint8_t* storage; size_t storage_ix; + ContextType literal_context_mode = ChooseContextMode(¶ms, + input_buffer, metablock_start, mask, metablock_end - metablock_start); + size_t block_start; for (block_start = metablock_start; block_start < metablock_end; ) { size_t block_size = @@ -1151,10 +1263,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( BrotliInitZopfliNodes(nodes, block_size + 1); StitchToPreviousBlockH10(hasher, block_size, block_start, input_buffer, mask); - path_size = BrotliZopfliComputeShortestPath( - m, dictionary, block_size, block_start, - input_buffer, mask, ¶ms, max_backward_limit, dist_cache, hasher, - nodes); + path_size = BrotliZopfliComputeShortestPath(m, + block_size, block_start, input_buffer, mask, ¶ms, + max_backward_limit, dist_cache, hasher, nodes); if (BROTLI_IS_OOM(m)) goto oom; /* We allocate a command buffer in the first iteration of this loop that will be likely big enough for the whole metablock, so that for most @@ -1197,13 +1308,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size); storage = NULL; - storage_ix = last_byte_bits; + storage_ix = last_bytes_bits; if (metablock_size == 0) { /* Write the ISLAST and ISEMPTY bits. */ storage = BROTLI_ALLOC(m, uint8_t, 16); if (BROTLI_IS_OOM(m)) goto oom; - storage[0] = last_byte; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); BrotliWriteBits(2, 3, &storage_ix, storage); storage_ix = (storage_ix + 7u) & ~7u; } else if (!ShouldCompress(input_buffer, mask, metablock_start, @@ -1213,37 +1325,35 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16); if (BROTLI_IS_OOM(m)) goto oom; - storage[0] = last_byte; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); BrotliStoreUncompressedMetaBlock(is_last, input_buffer, metablock_start, mask, metablock_size, &storage_ix, storage); } else { - uint32_t num_direct_distance_codes = 0; - uint32_t distance_postfix_bits = 0; - ContextType literal_context_mode = CONTEXT_UTF8; MetaBlockSplit mb; - InitMetaBlockSplit(&mb); - if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask, - metablock_size, kMinUTF8Ratio)) { - literal_context_mode = CONTEXT_SIGNED; + /* The number of distance symbols effectively used by + "Large Window Brotli" (32-bit). */ + uint32_t num_effective_dist_codes = params.dist.alphabet_size; + if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) { + num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS; } + InitMetaBlockSplit(&mb); BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, ¶ms, prev_byte, prev_byte2, commands, num_commands, literal_context_mode, &mb); if (BROTLI_IS_OOM(m)) goto oom; - BrotliOptimizeHistograms(num_direct_distance_codes, - distance_postfix_bits, - &mb); - storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 502); + BrotliOptimizeHistograms(num_effective_dist_codes, &mb); + storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503); if (BROTLI_IS_OOM(m)) goto oom; - storage[0] = last_byte; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size, mask, prev_byte, prev_byte2, is_last, - num_direct_distance_codes, - distance_postfix_bits, + ¶ms, literal_context_mode, commands, num_commands, &mb, @@ -1252,16 +1362,17 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( if (metablock_size + 4 < (storage_ix >> 3)) { /* Restore the distance cache and last byte. */ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); - storage[0] = last_byte; - storage_ix = last_byte_bits; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); + storage_ix = last_bytes_bits; BrotliStoreUncompressedMetaBlock(is_last, input_buffer, metablock_start, mask, metablock_size, &storage_ix, storage); } DestroyMetaBlockSplit(m, &mb); } - last_byte = storage[storage_ix >> 3]; - last_byte_bits = storage_ix & 7u; + last_bytes = (uint16_t)(storage[storage_ix >> 3]); + last_bytes_bits = storage_ix & 7u; metablock_start += metablock_size; if (metablock_start < input_size) { prev_byte = input_buffer[metablock_start - 1]; @@ -1296,8 +1407,8 @@ oom: size_t BrotliEncoderMaxCompressedSize(size_t input_size) { /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */ - size_t num_small_blocks = input_size >> 14; - size_t overhead = 2 + (4 * num_small_blocks) + 3 + 1; + size_t num_large_blocks = input_size >> 14; + size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1; size_t result = input_size + overhead; if (input_size == 0) return 2; return (result < input_size) ? 0 : result; @@ -1360,7 +1471,7 @@ BROTLI_BOOL BrotliEncoderCompress( } if (quality == 10) { /* TODO: Implement this direct path for all quality levels. */ - const int lg_win = BROTLI_MIN(int, BROTLI_MAX_WINDOW_BITS, + const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS, BROTLI_MAX(int, 16, lgwin)); int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer, encoded_size, encoded_buffer); @@ -1384,6 +1495,9 @@ BROTLI_BOOL BrotliEncoderCompress( BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin); BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode); BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size); + if (lgwin > BROTLI_MAX_WINDOW_BITS) { + BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE); + } result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH, &available_in, &next_in, &available_out, &next_out, &total_out); if (!BrotliEncoderIsFinished(s)) result = 0; @@ -1406,11 +1520,11 @@ fallback: } static void InjectBytePaddingBlock(BrotliEncoderState* s) { - uint32_t seal = s->last_byte_; - size_t seal_bits = s->last_byte_bits_; + uint32_t seal = s->last_bytes_; + size_t seal_bits = s->last_bytes_bits_; uint8_t* destination; - s->last_byte_ = 0; - s->last_byte_bits_ = 0; + s->last_bytes_ = 0; + s->last_bytes_bits_ = 0; /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */ seal |= 0x6u << seal_bits; seal_bits += 6; @@ -1424,6 +1538,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) { } destination[0] = (uint8_t)seal; if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8); + if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16); s->available_out_ += (seal_bits + 7) >> 3; } @@ -1432,7 +1547,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) { static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, size_t* available_out, uint8_t** next_out, size_t* total_out) { if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && - s->last_byte_bits_ != 0) { + s->last_bytes_bits_ != 0) { InjectBytePaddingBlock(s); return BROTLI_TRUE; } @@ -1513,10 +1628,10 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH); BROTLI_BOOL force_flush = (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH); - size_t max_out_size = 2 * block_size + 502; + size_t max_out_size = 2 * block_size + 503; BROTLI_BOOL inplace = BROTLI_TRUE; uint8_t* storage = NULL; - size_t storage_ix = s->last_byte_bits_; + size_t storage_ix = s->last_bytes_bits_; size_t table_size; int* table; @@ -1531,7 +1646,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( storage = GetBrotliStorage(s, max_out_size); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } - storage[0] = s->last_byte_; + storage[0] = (uint8_t)s->last_bytes_; + storage[1] = (uint8_t)(s->last_bytes_ >> 8); table = GetHashTable(s, s->params.quality, block_size, &table_size); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; @@ -1561,8 +1677,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( s->next_out_ = storage; s->available_out_ = out_bytes; } - s->last_byte_ = storage[storage_ix >> 3]; - s->last_byte_bits_ = storage_ix & 7u; + s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); + s->last_bytes_bits_ = storage_ix & 7u; if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; diff --git a/c/enc/encoder_dict.c b/c/enc/encoder_dict.c new file mode 100755 index 0000000..8b2f6ad --- /dev/null +++ b/c/enc/encoder_dict.c @@ -0,0 +1,32 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#include "./encoder_dict.h" + +#include "../common/dictionary.h" +#include "../common/transform.h" +#include "./dictionary_hash.h" +#include "./hash.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) { + dict->words = BrotliGetDictionary(); + + dict->hash_table = kStaticDictionaryHash; + dict->buckets = kStaticDictionaryBuckets; + dict->dict_words = kStaticDictionaryWords; + + dict->cutoffTransformsCount = kCutoffTransformsCount; + dict->cutoffTransforms = kCutoffTransforms; + +} + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif diff --git a/c/enc/encoder_dict.h b/c/enc/encoder_dict.h new file mode 100755 index 0000000..9ac8b4a --- /dev/null +++ b/c/enc/encoder_dict.h @@ -0,0 +1,42 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#ifndef BROTLI_ENC_ENCODER_DICT_H_ +#define BROTLI_ENC_ENCODER_DICT_H_ + +#include "../common/dictionary.h" +#include "../common/platform.h" +#include +#include "./static_dict_lut.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/* Dictionary data (words and transforms) for 1 possible context */ +typedef struct BrotliEncoderDictionary { + const BrotliDictionary* words; + + /* cut off for fast encoder */ + uint32_t cutoffTransformsCount; + uint64_t cutoffTransforms; + + /* from dictionary_hash.h, for fast encoder */ + const uint16_t* hash_table; + + /* from static_dict_lut.h, for slow encoder */ + const uint16_t* buckets; + const DictWord* dict_words; +} BrotliEncoderDictionary; + +BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict); + + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif + +#endif /* BROTLI_ENC_ENCODER_DICT_H_ */ diff --git a/c/enc/entropy_encode.c b/c/enc/entropy_encode.c index 9e0ea11..97f9dfb 100644 --- a/c/enc/entropy_encode.c +++ b/c/enc/entropy_encode.c @@ -66,11 +66,11 @@ static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree( we are not planning to use this with extremely long blocks. See http://en.wikipedia.org/wiki/Huffman_coding */ -void BrotliCreateHuffmanTree(const uint32_t *data, +void BrotliCreateHuffmanTree(const uint32_t* data, const size_t length, const int tree_limit, HuffmanTree* tree, - uint8_t *depth) { + uint8_t* depth) { uint32_t count_limit; HuffmanTree sentinel; InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1); @@ -371,8 +371,8 @@ void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, } static void DecideOverRleUse(const uint8_t* depth, const size_t length, - BROTLI_BOOL *use_rle_for_non_zero, - BROTLI_BOOL *use_rle_for_zero) { + BROTLI_BOOL* use_rle_for_non_zero, + BROTLI_BOOL* use_rle_for_zero) { size_t total_reps_zero = 0; size_t total_reps_non_zero = 0; size_t count_reps_zero = 1; @@ -454,26 +454,26 @@ void BrotliWriteHuffmanTree(const uint8_t* depth, static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) { static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */ - 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, - 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf + 0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E, + 0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F }; - size_t retval = kLut[bits & 0xf]; + size_t retval = kLut[bits & 0x0F]; size_t i; for (i = 4; i < num_bits; i += 4) { retval <<= 4; bits = (uint16_t)(bits >> 4); - retval |= kLut[bits & 0xf]; + retval |= kLut[bits & 0x0F]; } - retval >>= ((0 - num_bits) & 0x3); + retval >>= ((0 - num_bits) & 0x03); return (uint16_t)retval; } /* 0..15 are values for bits */ #define MAX_HUFFMAN_BITS 16 -void BrotliConvertBitDepthsToSymbols(const uint8_t *depth, +void BrotliConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, - uint16_t *bits) { + uint16_t* bits) { /* In Brotli, all bit depths are [1..15] 0 bit depth means that the symbol does not exist. */ uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 }; diff --git a/c/enc/entropy_encode.h b/c/enc/entropy_encode.h index ef7c216..f23d9c3 100644 --- a/c/enc/entropy_encode.h +++ b/c/enc/entropy_encode.h @@ -46,11 +46,11 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth( be at least 2 * length + 1 long. See http://en.wikipedia.org/wiki/Huffman_coding */ -BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t *data, +BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data, const size_t length, const int tree_limit, HuffmanTree* tree, - uint8_t *depth); + uint8_t* depth); /* Change the population counts in a way that the consequent Huffman tree compression, especially its RLE-part will be more @@ -72,9 +72,9 @@ BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth, uint8_t* extra_bits_data); /* Get the actual bit values for a tree of bit depths. */ -BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t *depth, +BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, - uint16_t *bits); + uint16_t* bits); /* Input size optimized Shell sort. */ typedef BROTLI_BOOL (*HuffmanTreeComparator)( diff --git a/c/enc/entropy_encode_static.h b/c/enc/entropy_encode_static.h index b2c1fbb..62b99a9 100644 --- a/c/enc/entropy_encode_static.h +++ b/c/enc/entropy_encode_static.h @@ -83,7 +83,7 @@ static const uint32_t kCodeLengthBits[18] = { static BROTLI_INLINE void StoreStaticCodeLengthCode( size_t* storage_ix, uint8_t* storage) { BrotliWriteBits( - 40, BROTLI_MAKE_UINT64_T(0x0000ffU, 0x55555554U), storage_ix, storage); + 40, BROTLI_MAKE_UINT64_T(0x0000FFu, 0x55555554u), storage_ix, storage); } static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = { @@ -529,7 +529,7 @@ static const uint16_t kStaticDistanceCodeBits[64] = { static BROTLI_INLINE void StoreStaticDistanceHuffmanTree( size_t* storage_ix, uint8_t* storage) { - BrotliWriteBits(28, 0x0369dc03U, storage_ix, storage); + BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage); } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/hash.h b/c/enc/hash.h index 2a1634d..1827ce6 100644 --- a/c/enc/hash.h +++ b/c/enc/hash.h @@ -16,6 +16,7 @@ #include "../common/dictionary.h" #include "../common/platform.h" #include +#include "./encoder_dict.h" #include "./fast_log.h" #include "./find_match_length.h" #include "./memory.h" @@ -73,10 +74,10 @@ typedef struct HasherSearchResult { * There is no effort to ensure that it is a prime, the oddity is enough for this use. * The number has been tuned heuristically against compression benchmarks. */ -static const uint32_t kHashMul32 = 0x1e35a7bd; -static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1e35a7bd, 0x1e35a7bd); +static const uint32_t kHashMul32 = 0x1E35A7BD; +static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD); static const uint64_t kHashMul64Long = - BROTLI_MAKE_UINT64_T(0x1fe35a7bU, 0xd3579bd3U); + BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u); static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; @@ -146,8 +147,9 @@ static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance( } static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( - const BrotliDictionary* dictionary, size_t item, const uint8_t* data, - size_t max_length, size_t max_backward, HasherSearchResult* out) { + const BrotliEncoderDictionary* dictionary, size_t item, const uint8_t* data, + size_t max_length, size_t max_backward, size_t max_distance, + HasherSearchResult* out) { size_t len; size_t dist; size_t offset; @@ -156,24 +158,24 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( score_t score; len = item & 0x1F; dist = item >> 5; - offset = dictionary->offsets_by_length[len] + len * dist; + offset = dictionary->words->offsets_by_length[len] + len * dist; if (len > max_length) { return BROTLI_FALSE; } matchlen = - FindMatchLengthWithLimit(data, &dictionary->data[offset], len); - if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) { + FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len); + if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) { return BROTLI_FALSE; } { size_t cut = len - matchlen; - size_t transform_id = - (cut << 2) + (size_t)((kCutoffTransforms >> (cut * 6)) & 0x3F); + size_t transform_id = (cut << 2) + + (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); backward = max_backward + dist + 1 + - (transform_id << dictionary->size_bits_by_length[len]); + (transform_id << dictionary->words->size_bits_by_length[len]); } - if (backward >= BROTLI_MAX_DISTANCE) { + if (backward > max_distance) { return BROTLI_FALSE; } score = BackwardReferenceScore(matchlen, backward); @@ -188,9 +190,10 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( } static BROTLI_INLINE void SearchInStaticDictionary( - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, HasherHandle handle, const uint8_t* data, size_t max_length, - size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) { + size_t max_backward, size_t max_distance, + HasherSearchResult* out, BROTLI_BOOL shallow) { size_t key; size_t i; HasherCommon* self = GetHasherCommon(handle); @@ -199,11 +202,11 @@ static BROTLI_INLINE void SearchInStaticDictionary( } key = Hash14(data) << 1; for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) { - size_t item = dictionary_hash[key]; + size_t item = dictionary->hash_table[key]; self->dict_num_lookups++; if (item != 0) { BROTLI_BOOL item_matches = TestStaticDictionaryItem( - dictionary, item, data, max_length, max_backward, out); + dictionary, item, data, max_length, max_backward, max_distance, out); if (item_matches) { self->dict_num_matches++; } diff --git a/c/enc/hash_forgetful_chain_inc.h b/c/enc/hash_forgetful_chain_inc.h index 46d363c..41cb3ff 100644 --- a/c/enc/hash_forgetful_chain_inc.h +++ b/c/enc/hash_forgetful_chain_inc.h @@ -28,7 +28,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } /* HashBytes is the function that chooses the bucket to place the address in.*/ -static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t *data) { +static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* data) { const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -115,7 +115,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -154,11 +154,12 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, - const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) { + const size_t gap, const size_t max_distance, + HasherSearchResult* BROTLI_RESTRICT out) { HashForgetfulChain* self = FN(Self)(handle); const size_t cur_ix_masked = cur_ix & ring_buffer_mask; /* Don't accept a short copy from far away. */ @@ -240,9 +241,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, FN(Store)(handle, data, ring_buffer_mask, cur_ix); } if (out->score == min_score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_FALSE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_FALSE); } } diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h index 6b0697b..e099edf 100644 --- a/c/enc/hash_longest_match64_inc.h +++ b/c/enc/hash_longest_match64_inc.h @@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; } static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; } /* HashBytes is the function that chooses the bucket to place the address in. */ -static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t *data, +static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* data, const uint64_t mask, const int shift) { const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long; @@ -105,7 +105,7 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and store the value of ix at that position. */ -static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, +static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data, const size_t mask, const size_t ix) { HashLongestMatch* self = FN(Self)(handle); uint16_t* num = FN(Num)(self); @@ -119,7 +119,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -158,11 +158,11 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, const size_t gap, - HasherSearchResult* BROTLI_RESTRICT out) { + const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { HasherCommon* common = GetHasherCommon(handle); HashLongestMatch* self = FN(Self)(handle); uint16_t* num = FN(Num)(self); @@ -257,9 +257,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, ++num[key]; } if (min_score == out->score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_FALSE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_FALSE); } } diff --git a/c/enc/hash_longest_match_inc.h b/c/enc/hash_longest_match_inc.h index d24576d..951d7a4 100644 --- a/c/enc/hash_longest_match_inc.h +++ b/c/enc/hash_longest_match_inc.h @@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } /* HashBytes is the function that chooses the bucket to place the address in. */ -static uint32_t FN(HashBytes)(const uint8_t *data, const int shift) { +static uint32_t FN(HashBytes)(const uint8_t* data, const int shift) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -112,7 +112,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -151,11 +151,11 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, const size_t gap, - HasherSearchResult* BROTLI_RESTRICT out) { + const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { HasherCommon* common = GetHasherCommon(handle); HashLongestMatch* self = FN(Self)(handle); uint16_t* num = FN(Num)(self); @@ -249,9 +249,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, ++num[key]; } if (min_score == out->score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_FALSE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_FALSE); } } diff --git a/c/enc/hash_longest_match_quickly_inc.h b/c/enc/hash_longest_match_quickly_inc.h index 2c78351..a7b9639 100644 --- a/c/enc/hash_longest_match_quickly_inc.h +++ b/c/enc/hash_longest_match_quickly_inc.h @@ -81,7 +81,7 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( Compute a hash from these, and store the value somewhere within [ix .. ix+3]. */ static BROTLI_INLINE void FN(Store)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix) { + const uint8_t* data, const size_t mask, const size_t ix) { const uint32_t key = FN(HashBytes)(&data[ix & mask]); /* Wiggle the value with the bucket sweep range. */ const uint32_t off = (ix >> 3) % BUCKET_SWEEP; @@ -89,7 +89,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -125,11 +125,12 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)( - HasherHandle handle, const BrotliDictionary* dictionary, - const uint16_t* dictionary_hash, const uint8_t* BROTLI_RESTRICT data, + HasherHandle handle, const BrotliEncoderDictionary* dictionary, + const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, - const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) { + const size_t gap, const size_t max_distance, + HasherSearchResult* BROTLI_RESTRICT out) { HashLongestMatchQuickly* self = FN(Self)(handle); const size_t best_len_in = out->len; const size_t cur_ix_masked = cur_ix & ring_buffer_mask; @@ -191,7 +192,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)( } } } else { - uint32_t *bucket = self->buckets_ + key; + uint32_t* bucket = self->buckets_ + key; int i; prev_ix = *bucket++; for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) { @@ -221,9 +222,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)( } } if (USE_DICTIONARY && min_score == out->score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_TRUE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_TRUE); } self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix; } diff --git a/c/enc/hash_to_binary_tree_inc.h b/c/enc/hash_to_binary_tree_inc.h index 73774b2..48097b1 100644 --- a/c/enc/hash_to_binary_tree_inc.h +++ b/c/enc/hash_to_binary_tree_inc.h @@ -24,7 +24,7 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; } -static uint32_t FN(HashBytes)(const uint8_t *data) { +static uint32_t FN(HashBytes)(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -200,7 +200,7 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)( sorted by strictly increasing length and (non-strictly) increasing distance. */ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint8_t* data, + const BrotliEncoderDictionary* dictionary, const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix, const size_t max_length, const size_t max_backward, const size_t gap, const BrotliEncoderParams* params, BackwardMatch* matches) { @@ -252,7 +252,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle, uint32_t dict_id = dict_matches[l]; if (dict_id < kInvalidMatch) { size_t distance = max_backward + gap + (dict_id >> 5) + 1; - if (distance < BROTLI_MAX_DISTANCE) { + if (distance <= params->dist.max_distance) { InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31); } } @@ -265,7 +265,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle, /* Stores the hash of the next 4 bytes and re-roots the binary tree at the current sequence, without returning any matches. REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */ -static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, +static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data, const size_t mask, const size_t ix) { HashToBinaryTree* self = FN(Self)(handle); /* Maximum distance is window size - 16, see section 9.1. of the spec. */ @@ -275,7 +275,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i = ix_start; size_t j = ix_start; diff --git a/c/enc/histogram.c b/c/enc/histogram.c index bb7b4c5..6da2ff6 100644 --- a/c/enc/histogram.c +++ b/c/enc/histogram.c @@ -8,9 +8,9 @@ #include "./histogram.h" +#include "../common/context.h" #include "./block_splitter.h" #include "./command.h" -#include "./context.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -63,13 +63,16 @@ void BrotliBuildHistogramsWithContext( BlockSplitIteratorNext(&insert_and_copy_it); HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_], cmd->cmd_prefix_); + /* TODO: unwrap iterator blocks. */ for (j = cmd->insert_len_; j != 0; --j) { size_t context; BlockSplitIteratorNext(&literal_it); - context = context_modes ? - ((literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) + - Context(prev_byte, prev_byte2, context_modes[literal_it.type_])) : - literal_it.type_; + context = literal_it.type_; + if (context_modes) { + ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]); + context = (context << BROTLI_LITERAL_CONTEXT_BITS) + + BROTLI_CONTEXT(prev_byte, prev_byte2, lut); + } HistogramAddLiteral(&literal_histograms[context], ringbuffer[pos & mask]); prev_byte2 = prev_byte; @@ -86,7 +89,7 @@ void BrotliBuildHistogramsWithContext( context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) + CommandDistanceContext(cmd); HistogramAddDistance(©_dist_histograms[context], - cmd->dist_prefix_); + cmd->dist_prefix_ & 0x3FF); } } } diff --git a/c/enc/histogram.h b/c/enc/histogram.h index b1b8d11..42af3c3 100644 --- a/c/enc/histogram.h +++ b/c/enc/histogram.h @@ -12,16 +12,19 @@ #include /* memset */ #include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include #include "./block_splitter.h" #include "./command.h" -#include "./context.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif +/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */ +#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544 + #define FN(X) X ## Literal #define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS #define DataType uint8_t @@ -38,7 +41,7 @@ extern "C" { #undef FN #define FN(X) X ## Distance -#define DATA_SIZE BROTLI_NUM_DISTANCE_SYMBOLS +#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS #include "./histogram_inc.h" /* NOLINT(build/include) */ #undef DataType #undef DATA_SIZE diff --git a/c/enc/histogram_inc.h b/c/enc/histogram_inc.h index 7807036..50eaf74 100644 --- a/c/enc/histogram_inc.h +++ b/c/enc/histogram_inc.h @@ -33,7 +33,7 @@ static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) { } static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self, - const DataType *p, size_t n) { + const DataType* p, size_t n) { self->total_count_ += n; n += 1; while (--n) ++self->data_[*p++]; diff --git a/c/enc/literal_cost.c b/c/enc/literal_cost.c index 9bcb680..c231100 100644 --- a/c/enc/literal_cost.c +++ b/c/enc/literal_cost.c @@ -25,7 +25,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) { return BROTLI_MIN(size_t, 1, clamp); } else { /* Let's decide over the last byte if this ends the sequence. */ - if (last < 0xe0) { + if (last < 0xE0) { return 0; /* Completed two or three byte coding. */ } else { /* Next one is the 'Byte 3' of utf-8 encoding. */ return BROTLI_MIN(size_t, 2, clamp); @@ -34,7 +34,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) { } static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask, - const uint8_t *data) { + const uint8_t* data) { size_t counts[3] = { 0 }; size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */ size_t last_c = 0; @@ -54,7 +54,7 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask, } static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, - const uint8_t *data, float *cost) { + const uint8_t* data, float* cost) { /* max_utf8 is 0 (normal ASCII single byte modeling), 1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */ const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data); @@ -125,7 +125,7 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, } void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask, - const uint8_t *data, float *cost) { + const uint8_t* data, float* cost) { if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) { EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost); return; diff --git a/c/enc/literal_cost.h b/c/enc/literal_cost.h index d2f430c..8f53f39 100644 --- a/c/enc/literal_cost.h +++ b/c/enc/literal_cost.h @@ -21,7 +21,7 @@ extern "C" { ring-buffer (data, mask) will take entropy coded and writes these estimates to the cost[0..len) array. */ BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals( - size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost); + size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost); #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ diff --git a/c/enc/memory.c b/c/enc/memory.c index 3716b98..f6ed7e3 100644 --- a/c/enc/memory.c +++ b/c/enc/memory.c @@ -27,22 +27,12 @@ extern "C" { #define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED #define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED) -static void* DefaultAllocFunc(void* opaque, size_t size) { - BROTLI_UNUSED(opaque); - return malloc(size); -} - -static void DefaultFreeFunc(void* opaque, void* address) { - BROTLI_UNUSED(opaque); - free(address); -} - void BrotliInitMemoryManager( MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { if (!alloc_func) { - m->alloc_func = DefaultAllocFunc; - m->free_func = DefaultFreeFunc; + m->alloc_func = BrotliDefaultAllocFunc; + m->free_func = BrotliDefaultFreeFunc; m->opaque = 0; } else { m->alloc_func = alloc_func; diff --git a/c/enc/metablock.c b/c/enc/metablock.c index 50f2ea2..6219292 100644 --- a/c/enc/metablock.c +++ b/c/enc/metablock.c @@ -10,12 +10,12 @@ #include "./metablock.h" #include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include #include "./bit_cost.h" #include "./block_splitter.h" #include "./cluster.h" -#include "./context.h" #include "./entropy_encode.h" #include "./histogram.h" #include "./memory.h" @@ -398,9 +398,9 @@ static void MapStaticContexts(MemoryManager* m, static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask, - uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode, + uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut, const size_t num_contexts, const uint32_t* static_context_map, - const Command *commands, size_t n_commands, MetaBlockSplit* mb) { + const Command* commands, size_t n_commands, MetaBlockSplit* mb) { union { BlockSplitterLiteral plain; ContextBlockSplitter ctx; @@ -441,7 +441,8 @@ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( if (num_contexts == 1) { BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal); } else { - size_t context = Context(prev_byte, prev_byte2, literal_context_mode); + size_t context = + BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut); ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal, static_context_map[context]); if (BROTLI_IS_OOM(m)) return; @@ -455,7 +456,7 @@ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( prev_byte2 = ringbuffer[(pos - 2) & mask]; prev_byte = ringbuffer[(pos - 1) & mask]; if (cmd.cmd_prefix_ >= 128) { - BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_); + BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_ & 0x3FF); } } } @@ -482,7 +483,7 @@ void BrotliBuildMetaBlockGreedy(MemoryManager* m, size_t mask, uint8_t prev_byte, uint8_t prev_byte2, - ContextType literal_context_mode, + ContextLut literal_context_lut, size_t num_contexts, const uint32_t* static_context_map, const Command* commands, @@ -490,19 +491,17 @@ void BrotliBuildMetaBlockGreedy(MemoryManager* m, MetaBlockSplit* mb) { if (num_contexts == 1) { BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte, - prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb); + prev_byte2, literal_context_lut, 1, NULL, commands, n_commands, mb); } else { BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte, - prev_byte2, literal_context_mode, num_contexts, static_context_map, + prev_byte2, literal_context_lut, num_contexts, static_context_map, commands, n_commands, mb); } } -void BrotliOptimizeHistograms(size_t num_direct_distance_codes, - size_t distance_postfix_bits, +void BrotliOptimizeHistograms(uint32_t num_distance_codes, MetaBlockSplit* mb) { uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS]; - size_t num_distance_codes; size_t i; for (i = 0; i < mb->literal_histograms_size; ++i) { BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_, @@ -513,9 +512,6 @@ void BrotliOptimizeHistograms(size_t num_direct_distance_codes, mb->command_histograms[i].data_, good_for_rle); } - num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES + - num_direct_distance_codes + - ((2 * BROTLI_MAX_DISTANCE_BITS) << distance_postfix_bits); for (i = 0; i < mb->distance_histograms_size; ++i) { BrotliOptimizeHuffmanCountsForRle(num_distance_codes, mb->distance_histograms[i].data_, diff --git a/c/enc/metablock.h b/c/enc/metablock.h index 3fa6d65..76a6594 100644 --- a/c/enc/metablock.h +++ b/c/enc/metablock.h @@ -10,11 +10,11 @@ #ifndef BROTLI_ENC_METABLOCK_H_ #define BROTLI_ENC_METABLOCK_H_ +#include "../common/context.h" #include "../common/platform.h" #include #include "./block_splitter.h" #include "./command.h" -#include "./context.h" #include "./histogram.h" #include "./memory.h" #include "./quality.h" @@ -85,12 +85,11 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m, is the same for all block types. */ BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy( MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask, - uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode, + uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut, size_t num_contexts, const uint32_t* static_context_map, const Command* commands, size_t n_commands, MetaBlockSplit* mb); -BROTLI_INTERNAL void BrotliOptimizeHistograms(size_t num_direct_distance_codes, - size_t distance_postfix_bits, +BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes, MetaBlockSplit* mb); #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/params.h b/c/enc/params.h index acb3668..9bcf236 100755 --- a/c/enc/params.h +++ b/c/enc/params.h @@ -10,6 +10,7 @@ #define BROTLI_ENC_PARAMS_H_ #include +#include "./encoder_dict.h" typedef struct BrotliHasherParams { int type; @@ -19,6 +20,13 @@ typedef struct BrotliHasherParams { int num_last_distances_to_check; } BrotliHasherParams; +typedef struct BrotliDistanceParams { + uint32_t num_direct_distance_codes; + uint32_t distance_postfix_bits; + uint32_t alphabet_size; + size_t max_distance; +} BrotliDistanceParams; + /* Encoding parameters */ typedef struct BrotliEncoderParams { BrotliEncoderMode mode; @@ -27,7 +35,10 @@ typedef struct BrotliEncoderParams { int lgblock; size_t size_hint; BROTLI_BOOL disable_literal_context_modeling; + BROTLI_BOOL large_window; BrotliHasherParams hasher; + BrotliDistanceParams dist; + BrotliEncoderDictionary dictionary; } BrotliEncoderParams; #endif /* BROTLI_ENC_PARAMS_H_ */ diff --git a/c/enc/prefix.h b/c/enc/prefix.h index 0168d4e..fd359a4 100644 --- a/c/enc/prefix.h +++ b/c/enc/prefix.h @@ -39,11 +39,10 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code, size_t prefix = (dist >> bucket) & 1; size_t offset = (2 + prefix) << bucket; size_t nbits = bucket - postfix_bits; - *code = (uint16_t)( + *code = (uint16_t)((nbits << 10) | (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes + ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix)); - *extra_bits = (uint32_t)( - (nbits << 24) | ((dist - offset) >> postfix_bits)); + *extra_bits = (uint32_t)((dist - offset) >> postfix_bits); } } diff --git a/c/enc/quality.h b/c/enc/quality.h index 80b7051..f9b1111 100644 --- a/c/enc/quality.h +++ b/c/enc/quality.h @@ -31,7 +31,7 @@ /* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting, so we buffer at most this much literals and commands. */ -#define MAX_NUM_DELAYED_SYMBOLS 0x2fff +#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF /* Returns hash-table size for quality levels 0 and 1. */ static BROTLI_INLINE size_t MaxHashTableSize(int quality) { @@ -60,10 +60,15 @@ static BROTLI_INLINE size_t MaxZopfliCandidates( static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) { params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY, BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality)); + if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { + params->large_window = BROTLI_FALSE; + } if (params->lgwin < BROTLI_MIN_WINDOW_BITS) { params->lgwin = BROTLI_MIN_WINDOW_BITS; - } else if (params->lgwin > BROTLI_MAX_WINDOW_BITS) { - params->lgwin = BROTLI_MAX_WINDOW_BITS; + } else { + int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS : + BROTLI_MAX_WINDOW_BITS; + if (params->lgwin > max_lgwin) params->lgwin = max_lgwin; } } diff --git a/c/enc/ringbuffer.h b/c/enc/ringbuffer.h index 4e58749..86079a8 100644 --- a/c/enc/ringbuffer.h +++ b/c/enc/ringbuffer.h @@ -41,9 +41,9 @@ typedef struct RingBuffer { uint32_t pos_; /* The actual ring buffer containing the copy of the last two bytes, the data, and the copy of the beginning as a tail. */ - uint8_t *data_; + uint8_t* data_; /* The start of the ring-buffer. */ - uint8_t *buffer_; + uint8_t* buffer_; } RingBuffer; static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) { @@ -91,7 +91,7 @@ static BROTLI_INLINE void RingBufferInitBuffer( } static BROTLI_INLINE void RingBufferWriteTail( - const uint8_t *bytes, size_t n, RingBuffer* rb) { + const uint8_t* bytes, size_t n, RingBuffer* rb) { const size_t masked_pos = rb->pos_ & rb->mask_; if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) { /* Just fill the tail buffer with the beginning data. */ @@ -103,7 +103,7 @@ static BROTLI_INLINE void RingBufferWriteTail( /* Push bytes into the ring buffer. */ static BROTLI_INLINE void RingBufferWrite( - MemoryManager* m, const uint8_t *bytes, size_t n, RingBuffer* rb) { + MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) { if (rb->pos_ == 0 && n < rb->tail_size_) { /* Special case for the first write: to process the first block, we don't need to allocate the whole ring-buffer and we don't need the tail @@ -144,12 +144,16 @@ static BROTLI_INLINE void RingBufferWrite( n - (rb->size_ - masked_pos)); } } - rb->buffer_[-2] = rb->buffer_[rb->size_ - 2]; - rb->buffer_[-1] = rb->buffer_[rb->size_ - 1]; - rb->pos_ += (uint32_t)n; - if (rb->pos_ > (1u << 30)) { - /* Wrap, but preserve not-a-first-lap feature. */ - rb->pos_ = (rb->pos_ & ((1u << 30) - 1)) | (1u << 30); + { + BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0; + uint32_t rb_pos_mask = (1u << 31) - 1; + rb->buffer_[-2] = rb->buffer_[rb->size_ - 2]; + rb->buffer_[-1] = rb->buffer_[rb->size_ - 1]; + rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask); + if (not_first_lap) { + /* Wrap, but preserve not-a-first-lap feature. */ + rb->pos_ |= 1u << 31; + } } } diff --git a/c/enc/static_dict.c b/c/enc/static_dict.c index 36caa61..758ef80 100644 --- a/c/enc/static_dict.c +++ b/c/enc/static_dict.c @@ -8,19 +8,20 @@ #include "../common/dictionary.h" #include "../common/platform.h" +#include "../common/transform.h" +#include "./encoder_dict.h" #include "./find_match_length.h" -#include "./static_dict_lut.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif -static const uint8_t kUppercaseFirst = 10; +/* TODO: use BrotliTransforms.cutOffTransforms instead. */ static const uint8_t kOmitLastNTransforms[10] = { 0, 12, 27, 23, 42, 63, 56, 48, 59, 64, }; -static BROTLI_INLINE uint32_t Hash(const uint8_t *data) { +static BROTLI_INLINE uint32_t Hash(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -79,32 +80,33 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary, } BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( - const BrotliDictionary* dictionary, const uint8_t* data, + const BrotliEncoderDictionary* dictionary, const uint8_t* data, size_t min_length, size_t max_length, uint32_t* matches) { BROTLI_BOOL has_found_match = BROTLI_FALSE; { - size_t offset = kStaticDictionaryBuckets[Hash(data)]; + size_t offset = dictionary->buckets[Hash(data)]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0) { const size_t matchlen = - DictMatchLength(dictionary, data, id, l, max_length); + DictMatchLength(dictionary->words, data, id, l, max_length); const uint8_t* s; size_t minlen; size_t maxlen; size_t len; - /* Transform "" + kIdentity + "" */ + /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */ if (matchlen == l) { AddMatch(id, l, l, matches); has_found_match = BROTLI_TRUE; } - /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */ + /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and + "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */ if (matchlen >= l - 1) { AddMatch(id + 12 * n, l - 1, l, matches); if (l + 2 < max_length && @@ -114,7 +116,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } has_found_match = BROTLI_TRUE; } - /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */ + /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */ minlen = min_length; if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9); maxlen = BROTLI_MIN(size_t, matchlen, l - 2); @@ -126,7 +128,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( continue; } s = &data[l]; - /* Transforms "" + kIdentity + */ + /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + */ if (s[0] == ' ') { AddMatch(id + n, l + 1, l, matches); if (s[1] == 'a') { @@ -273,12 +275,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } } else { - /* Set is_all_caps=0 for kUppercaseFirst and - is_all_caps=1 otherwise (kUppercaseAll) transform. */ + /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and + is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) + transform. */ const BROTLI_BOOL is_all_caps = - TO_BROTLI_BOOL(w.transform != kUppercaseFirst); + TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); const uint8_t* s; - if (!IsMatch(dictionary, w, data, max_length)) { + if (!IsMatch(dictionary->words, w, data, max_length)) { continue; } /* Transform "" + kUppercase{First,All} + "" */ @@ -323,27 +326,29 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( /* Transforms with prefixes " " and "." */ if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' '); - size_t offset = kStaticDictionaryBuckets[Hash(&data[1])]; + size_t offset = dictionary->buckets[Hash(&data[1])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0) { const uint8_t* s; - if (!IsMatch(dictionary, w, &data[1], max_length - 1)) { + if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { continue; } - /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */ + /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and + "." + BROTLI_TRANSFORM_IDENTITY + "" */ AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); has_found_match = BROTLI_TRUE; if (l + 2 >= max_length) { continue; } - /* Transforms " " + kIdentity + and "." + kIdentity + + /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + and + "." + BROTLI_TRANSFORM_IDENTITY + */ s = &data[l + 1]; if (s[0] == ' ') { @@ -370,12 +375,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } } else if (is_space) { - /* Set is_all_caps=0 for kUppercaseFirst and - is_all_caps=1 otherwise (kUppercaseAll) transform. */ + /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and + is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) + transform. */ const BROTLI_BOOL is_all_caps = - TO_BROTLI_BOOL(w.transform != kUppercaseFirst); + TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); const uint8_t* s; - if (!IsMatch(dictionary, w, &data[1], max_length - 1)) { + if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { continue; } /* Transforms " " + kUppercase{First,All} + "" */ @@ -411,22 +417,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } if (max_length >= 6) { - /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */ + /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */ if ((data[1] == ' ' && (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || - (data[0] == 0xc2 && data[1] == 0xa0)) { - size_t offset = kStaticDictionaryBuckets[Hash(&data[2])]; + (data[0] == 0xC2 && data[1] == 0xA0)) { + size_t offset = dictionary->buckets[Hash(&data[2])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0 && - IsMatch(dictionary, w, &data[2], max_length - 2)) { - if (data[0] == 0xc2) { + IsMatch(dictionary->words, w, &data[2], max_length - 2)) { + if (data[0] == 0xC2) { AddMatch(id + 102 * n, l + 2, l, matches); has_found_match = BROTLI_TRUE; } else if (l + 2 < max_length && data[l + 2] == ' ') { @@ -444,17 +450,17 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( data[3] == 'e' && data[4] == ' ') || (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && data[3] == 'm' && data[4] == '/')) { - size_t offset = kStaticDictionaryBuckets[Hash(&data[5])]; + size_t offset = dictionary->buckets[Hash(&data[5])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0 && - IsMatch(dictionary, w, &data[5], max_length - 5)) { + IsMatch(dictionary->words, w, &data[5], max_length - 5)) { AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); has_found_match = BROTLI_TRUE; if (l + 5 < max_length) { diff --git a/c/enc/static_dict.h b/c/enc/static_dict.h index 0b3f6b3..6b5d4eb 100644 --- a/c/enc/static_dict.h +++ b/c/enc/static_dict.h @@ -12,13 +12,14 @@ #include "../common/dictionary.h" #include "../common/platform.h" #include +#include "./encoder_dict.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif #define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37 -static const uint32_t kInvalidMatch = 0xfffffff; +static const uint32_t kInvalidMatch = 0xFFFFFFF; /* Matches data against static dictionary words, and for each length l, for which a match is found, updates matches[l] to be the minimum possible @@ -28,7 +29,7 @@ static const uint32_t kInvalidMatch = 0xfffffff; matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long all elements are initialized to kInvalidMatch */ BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( - const BrotliDictionary* dictionary, + const BrotliEncoderDictionary* dictionary, const uint8_t* data, size_t min_length, size_t max_length, uint32_t* matches); diff --git a/c/enc/static_dict_lut.h b/c/enc/static_dict_lut.h index ba94f76..e299cda 100644 --- a/c/enc/static_dict_lut.h +++ b/c/enc/static_dict_lut.h @@ -23,7 +23,7 @@ typedef struct DictWord { } DictWord; static const int kDictNumBits = 15; -static const uint32_t kDictHashMul32 = 0x1e35a7bd; +static const uint32_t kDictHashMul32 = 0x1E35A7BD; static const uint16_t kStaticDictionaryBuckets[32768] = { 1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29, diff --git a/c/enc/utf8_util.c b/c/enc/utf8_util.c index a334927..04a7805 100644 --- a/c/enc/utf8_util.c +++ b/c/enc/utf8_util.c @@ -25,37 +25,37 @@ static size_t BrotliParseAsUTF8( } /* 2-byte UTF8 */ if (size > 1u && - (input[0] & 0xe0) == 0xc0 && - (input[1] & 0xc0) == 0x80) { - *symbol = (((input[0] & 0x1f) << 6) | - (input[1] & 0x3f)); - if (*symbol > 0x7f) { + (input[0] & 0xE0) == 0xC0 && + (input[1] & 0xC0) == 0x80) { + *symbol = (((input[0] & 0x1F) << 6) | + (input[1] & 0x3F)); + if (*symbol > 0x7F) { return 2; } } /* 3-byte UFT8 */ if (size > 2u && - (input[0] & 0xf0) == 0xe0 && - (input[1] & 0xc0) == 0x80 && - (input[2] & 0xc0) == 0x80) { - *symbol = (((input[0] & 0x0f) << 12) | - ((input[1] & 0x3f) << 6) | - (input[2] & 0x3f)); - if (*symbol > 0x7ff) { + (input[0] & 0xF0) == 0xE0 && + (input[1] & 0xC0) == 0x80 && + (input[2] & 0xC0) == 0x80) { + *symbol = (((input[0] & 0x0F) << 12) | + ((input[1] & 0x3F) << 6) | + (input[2] & 0x3F)); + if (*symbol > 0x7FF) { return 3; } } /* 4-byte UFT8 */ if (size > 3u && - (input[0] & 0xf8) == 0xf0 && - (input[1] & 0xc0) == 0x80 && - (input[2] & 0xc0) == 0x80 && - (input[3] & 0xc0) == 0x80) { + (input[0] & 0xF8) == 0xF0 && + (input[1] & 0xC0) == 0x80 && + (input[2] & 0xC0) == 0x80 && + (input[3] & 0xC0) == 0x80) { *symbol = (((input[0] & 0x07) << 18) | - ((input[1] & 0x3f) << 12) | - ((input[2] & 0x3f) << 6) | - (input[3] & 0x3f)); - if (*symbol > 0xffff && *symbol <= 0x10ffff) { + ((input[1] & 0x3F) << 12) | + ((input[2] & 0x3F) << 6) | + (input[3] & 0x3F)); + if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) { return 4; } } diff --git a/c/enc/write_bits.h b/c/enc/write_bits.h index efa66f8..7733d92 100644 --- a/c/enc/write_bits.h +++ b/c/enc/write_bits.h @@ -35,25 +35,27 @@ extern "C" { and locate the rest in BYTE+1, BYTE+2, etc. */ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits, uint64_t bits, - size_t * BROTLI_RESTRICT pos, - uint8_t * BROTLI_RESTRICT array) { + size_t* BROTLI_RESTRICT pos, + uint8_t* BROTLI_RESTRICT array) { #ifdef BROTLI_LITTLE_ENDIAN /* This branch of the code can write up to 56 bits at a time, 7 bits are lost by being perhaps already in *p and at least 1 bit is needed to initialize the bit-stream ahead (i.e. if 7 bits are in *p and we write 57 bits, then the next write will access a byte that was never initialized). */ - uint8_t *p = &array[*pos >> 3]; + uint8_t* p = &array[*pos >> 3]; uint64_t v = *p; - BROTLI_LOG(("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos)); + BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits, + (uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF), + (int)*pos)); BROTLI_DCHECK((bits >> n_bits) == 0); BROTLI_DCHECK(n_bits <= 56); v |= bits << (*pos & 7); BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */ *pos += n_bits; #else - /* implicit & 0xff is assumed for uint8_t arithmetics */ - uint8_t *array_pos = &array[*pos >> 3]; + /* implicit & 0xFF is assumed for uint8_t arithmetics */ + uint8_t* array_pos = &array[*pos >> 3]; const size_t bits_reserved_in_first_byte = (*pos & 7); size_t bits_left_to_write; bits <<= bits_reserved_in_first_byte; @@ -70,8 +72,8 @@ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits, } static BROTLI_INLINE void BrotliWriteBitsPrepareStorage( - size_t pos, uint8_t *array) { - BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", pos)); + size_t pos, uint8_t* array) { + BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", (int)pos)); BROTLI_DCHECK((pos & 7) == 0); array[pos >> 3] = 0; } diff --git a/c/include/brotli/decode.h b/c/include/brotli/decode.h index 1acf605..61a4326 100644 --- a/c/include/brotli/decode.h +++ b/c/include/brotli/decode.h @@ -34,11 +34,11 @@ typedef struct BrotliDecoderStateStruct BrotliDecoderState; typedef enum { /** Decoding error, e.g. corrupted input or memory allocation problem. */ BROTLI_DECODER_RESULT_ERROR = 0, - /** Decoding successfully completed */ + /** Decoding successfully completed. */ BROTLI_DECODER_RESULT_SUCCESS = 1, - /** Partially done; should be called again with more input */ + /** Partially done; should be called again with more input. */ BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT = 2, - /** Partially done; should be called again with more output */ + /** Partially done; should be called again with more output. */ BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT = 3 } BrotliDecoderResult; @@ -83,8 +83,9 @@ typedef enum { BROTLI_ERROR_CODE(_ERROR_FORMAT_, WINDOW_BITS, -13) SEPARATOR \ BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_1, -14) SEPARATOR \ BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_2, -15) SEPARATOR \ + BROTLI_ERROR_CODE(_ERROR_FORMAT_, DISTANCE, -16) SEPARATOR \ \ - /* -16..-17 codes are reserved */ \ + /* -17 code is reserved */ \ \ BROTLI_ERROR_CODE(_ERROR_, COMPOUND_DICTIONARY, -18) SEPARATOR \ BROTLI_ERROR_CODE(_ERROR_, DICTIONARY_NOT_SET, -19) SEPARATOR \ @@ -135,7 +136,11 @@ typedef enum BrotliDecoderParameter { * Ring buffer is allocated according to window size, despite the real size of * the content. */ - BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0 + BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0, + /** + * Flag that determines if "Large Window Brotli" is used. + */ + BROTLI_DECODER_PARAM_LARGE_WINDOW = 1 } BrotliDecoderParameter; /** diff --git a/c/include/brotli/encode.h b/c/include/brotli/encode.h index 1fa85cc..0b5c8c7 100644 --- a/c/include/brotli/encode.h +++ b/c/include/brotli/encode.h @@ -27,6 +27,11 @@ extern "C" { * @note equal to @c BROTLI_MAX_DISTANCE_BITS constant. */ #define BROTLI_MAX_WINDOW_BITS 24 +/** + * Maximal value for ::BROTLI_PARAM_LGWIN parameter + * in "Large Window Brotli" (32-bit). + */ +#define BROTLI_LARGE_MAX_WINDOW_BITS 30 /** Minimal value for ::BROTLI_PARAM_LGBLOCK parameter. */ #define BROTLI_MIN_INPUT_BLOCK_BITS 16 /** Maximal value for ::BROTLI_PARAM_LGBLOCK parameter. */ @@ -176,7 +181,11 @@ typedef enum BrotliEncoderParameter { * * The default value is 0, which means that the total input size is unknown. */ - BROTLI_PARAM_SIZE_HINT = 5 + BROTLI_PARAM_SIZE_HINT = 5, + /** + * Flag that determines if "Large Window Brotli" is used. + */ + BROTLI_PARAM_LARGE_WINDOW = 6 } BrotliEncoderParameter; /** diff --git a/c/tools/brotli.c b/c/tools/brotli.c index 497ae65..2abfc27 100644 --- a/c/tools/brotli.c +++ b/c/tools/brotli.c @@ -111,8 +111,15 @@ typedef struct { uint8_t* output; const char* current_input_path; const char* current_output_path; + int64_t input_file_length; /* -1, if impossible to calculate */ FILE* fin; FILE* fout; + + /* I/O buffers */ + size_t available_in; + const uint8_t* next_in; + size_t available_out; + uint8_t* next_out; } Context; /* Parse up to 5 decimal digits. */ @@ -185,8 +192,8 @@ static Command ParseParams(Context* params) { /* Too many options. The expected longest option list is: "-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total. - This check is an additinal guard that is never triggered, but provides an - additional guard for future changes. */ + This check is an additional guard that is never triggered, but provides + a guard for future changes. */ if (next_option_index > (MAX_OPTIONS - 2)) { return COMMAND_INVALID; } @@ -414,8 +421,8 @@ static void PrintHelp(const char* name) { " -t, --test test compressed file integrity\n" " -v, --verbose verbose mode\n"); fprintf(stdout, -" -w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d) (default:%d)\n", - BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS, DEFAULT_LGWIN); +" -w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d)\n", + BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS); fprintf(stdout, " window size = 2**NUM - 16\n" " 0 lets compressor choose the optimal value\n"); @@ -473,6 +480,23 @@ static BROTLI_BOOL OpenOutputFile(const char* output_path, FILE** f, return BROTLI_TRUE; } +static int64_t FileSize(const char* path) { + FILE* f = fopen(path, "rb"); + int64_t retval; + if (f == NULL) { + return -1; + } + if (fseek(f, 0L, SEEK_END) != 0) { + fclose(f); + return -1; + } + retval = ftell(f); + if (fclose(f) != 0) { + return -1; + } + return retval; +} + /* Copy file times and permissions. TODO: this is a "best effort" implementation; honest cross-platform fully featured implementation is way too hacky; add more hacks by request. */ @@ -513,6 +537,8 @@ static BROTLI_BOOL NextFile(Context* context) { /* Iterator points to last used arg; increment to search for the next one. */ context->iterator++; + context->input_file_length = -1; + /* No input path; read from console. */ if (context->input_count == 0) { if (context->iterator > 1) return BROTLI_FALSE; @@ -542,6 +568,7 @@ static BROTLI_BOOL NextFile(Context* context) { } context->current_input_path = arg; + context->input_file_length = FileSize(arg); context->current_output_path = context->output_path; if (context->output_path) return BROTLI_TRUE; @@ -626,44 +653,73 @@ static BROTLI_BOOL CloseFiles(Context* context, BROTLI_BOOL success) { static const size_t kFileBufferSize = 1 << 16; +static void InitializeBuffers(Context* context) { + context->available_in = 0; + context->next_in = NULL; + context->available_out = kFileBufferSize; + context->next_out = context->output; +} + +static BROTLI_BOOL HasMoreInput(Context* context) { + return feof(context->fin) ? BROTLI_FALSE : BROTLI_TRUE; +} + +static BROTLI_BOOL ProvideInput(Context* context) { + context->available_in = + fread(context->input, 1, kFileBufferSize, context->fin); + context->next_in = context->input; + if (ferror(context->fin)) { + fprintf(stderr, "failed to read input [%s]: %s\n", + PrintablePath(context->current_input_path), strerror(errno)); + return BROTLI_FALSE; + } + return BROTLI_TRUE; +} + +/* Internal: should be used only in Provide-/Flush-Output. */ +static BROTLI_BOOL WriteOutput(Context* context) { + size_t out_size = (size_t)(context->next_out - context->output); + if (out_size == 0) return BROTLI_TRUE; + if (context->test_integrity) return BROTLI_TRUE; + + fwrite(context->output, 1, out_size, context->fout); + if (ferror(context->fout)) { + fprintf(stderr, "failed to write output [%s]: %s\n", + PrintablePath(context->current_output_path), strerror(errno)); + return BROTLI_FALSE; + } + return BROTLI_TRUE; +} + +static BROTLI_BOOL ProvideOutput(Context* context) { + if (!WriteOutput(context)) return BROTLI_FALSE; + context->available_out = kFileBufferSize; + context->next_out = context->output; + return BROTLI_TRUE; +} + +static BROTLI_BOOL FlushOutput(Context* context) { + if (!WriteOutput(context)) return BROTLI_FALSE; + context->available_out = 0; + return BROTLI_TRUE; +} + static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) { - size_t available_in = 0; - const uint8_t* next_in = NULL; - size_t available_out = kFileBufferSize; - uint8_t* next_out = context->output; BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT; + InitializeBuffers(context); for (;;) { - if (next_out != context->output) { - if (!context->test_integrity) { - size_t out_size = (size_t)(next_out - context->output); - fwrite(context->output, 1, out_size, context->fout); - if (ferror(context->fout)) { - fprintf(stderr, "failed to write output [%s]: %s\n", - PrintablePath(context->current_output_path), strerror(errno)); - return BROTLI_FALSE; - } - } - available_out = kFileBufferSize; - next_out = context->output; - } - if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { - if (feof(context->fin)) { + if (!HasMoreInput(context)) { fprintf(stderr, "corrupt input [%s]\n", PrintablePath(context->current_input_path)); return BROTLI_FALSE; } - available_in = fread(context->input, 1, kFileBufferSize, context->fin); - next_in = context->input; - if (ferror(context->fin)) { - fprintf(stderr, "failed to read input [%s]: %s\n", - PrintablePath(context->current_input_path), strerror(errno)); - return BROTLI_FALSE; - } + if (!ProvideInput(context)) return BROTLI_FALSE; } else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { - /* Nothing to do - output is already written. */ + if (!ProvideOutput(context)) return BROTLI_FALSE; } else if (result == BROTLI_DECODER_RESULT_SUCCESS) { - if (available_in != 0 || !feof(context->fin)) { + if (!FlushOutput(context)) return BROTLI_FALSE; + if (context->available_in != 0 || HasMoreInput(context)) { fprintf(stderr, "corrupt input [%s]\n", PrintablePath(context->current_input_path)); return BROTLI_FALSE; @@ -675,8 +731,8 @@ static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) { return BROTLI_FALSE; } - result = BrotliDecoderDecompressStream( - s, &available_in, &next_in, &available_out, &next_out, 0); + result = BrotliDecoderDecompressStream(s, &context->available_in, + &context->next_in, &context->available_out, &context->next_out, 0); } } @@ -703,46 +759,31 @@ static BROTLI_BOOL DecompressFiles(Context* context) { } static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) { - size_t available_in = 0; - const uint8_t* next_in = NULL; - size_t available_out = kFileBufferSize; - uint8_t* next_out = context->output; BROTLI_BOOL is_eof = BROTLI_FALSE; - + InitializeBuffers(context); for (;;) { - if (available_in == 0 && !is_eof) { - available_in = fread(context->input, 1, kFileBufferSize, context->fin); - next_in = context->input; - if (ferror(context->fin)) { - fprintf(stderr, "failed to read input [%s]: %s\n", - PrintablePath(context->current_input_path), strerror(errno)); - return BROTLI_FALSE; - } - is_eof = feof(context->fin) ? BROTLI_TRUE : BROTLI_FALSE; + if (context->available_in == 0 && !is_eof) { + if (!ProvideInput(context)) return BROTLI_FALSE; + is_eof = !HasMoreInput(context); } if (!BrotliEncoderCompressStream(s, is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS, - &available_in, &next_in, &available_out, &next_out, NULL)) { + &context->available_in, &context->next_in, + &context->available_out, &context->next_out, NULL)) { /* Should detect OOM? */ fprintf(stderr, "failed to compress data [%s]\n", PrintablePath(context->current_input_path)); return BROTLI_FALSE; } - if (available_out != kFileBufferSize) { - size_t out_size = kFileBufferSize - available_out; - fwrite(context->output, 1, out_size, context->fout); - if (ferror(context->fout)) { - fprintf(stderr, "failed to write output [%s]: %s\n", - PrintablePath(context->current_output_path), strerror(errno)); - return BROTLI_FALSE; - } - available_out = kFileBufferSize; - next_out = context->output; + if (context->available_out == 0) { + if (!ProvideOutput(context)) return BROTLI_FALSE; } - if (BrotliEncoderIsFinished(s)) return BROTLI_TRUE; + if (BrotliEncoderIsFinished(s)) { + return FlushOutput(context); + } } } @@ -756,8 +797,30 @@ static BROTLI_BOOL CompressFiles(Context* context) { } BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)context->quality); - BrotliEncoderSetParameter(s, - BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin); + if (context->lgwin > 0) { + /* Specified by user. */ + BrotliEncoderSetParameter(s, + BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin); + } else { + /* 0, or not specified by user; could be chosen by compressor. */ + uint32_t lgwin = DEFAULT_LGWIN; + /* Use file size to limit lgwin. */ + if (context->input_file_length >= 0) { + int32_t size = 1 << BROTLI_MIN_WINDOW_BITS; + lgwin = BROTLI_MIN_WINDOW_BITS; + while (size < context->input_file_length) { + size <<= 1; + lgwin++; + if (lgwin == BROTLI_MAX_WINDOW_BITS) break; + } + } + BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, lgwin); + } + if (context->input_file_length > 0) { + uint32_t size_hint = context->input_file_length < (1 << 30) ? + (uint32_t)context->input_file_length : (1u << 30); + BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, size_hint); + } is_ok = OpenFiles(context); if (is_ok && !context->current_output_path && !context->force_overwrite && isatty(STDOUT_FILENO)) { @@ -779,7 +842,7 @@ int main(int argc, char** argv) { int i; context.quality = 11; - context.lgwin = DEFAULT_LGWIN; + context.lgwin = -1; context.force_overwrite = BROTLI_FALSE; context.junk_source = BROTLI_FALSE; context.copy_stat = BROTLI_TRUE; diff --git a/docs/brotli.1 b/docs/brotli.1 index c55b906..7242a32 100644 --- a/docs/brotli.1 +++ b/docs/brotli.1 @@ -1,4 +1,4 @@ -.TH "BROTLI" "1" "August 2017" "brotli 1.0.0" "User commands" +.TH "BROTLI" "1" "February 2018" "brotli 1.0.0" "User commands" .SH "NAME" \fBbrotli\fR \- brotli, unbrotli \- compress or decompress files .SH SYNOPSIS diff --git a/docs/decode.h.3 b/docs/decode.h.3 index 0948bf7..7b8581c 100644 --- a/docs/decode.h.3 +++ b/docs/decode.h.3 @@ -1,4 +1,4 @@ -.TH "decode.h" 3 "Fri Dec 8 2017" "Brotli" \" -*- nroff -*- +.TH "decode.h" 3 "Thu Feb 22 2018" "Brotli" \" -*- nroff -*- .ad l .nh .SH NAME @@ -143,6 +143,9 @@ Options to be used with \fBBrotliDecoderSetParameter\fP\&. .TP \fB\fIBROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION \fP\fP Disable 'canny' ring buffer allocation strategy\&. Ring buffer is allocated according to window size, despite the real size of the content\&. +.TP +\fB\fIBROTLI_DECODER_PARAM_LARGE_WINDOW \fP\fP +Flag that determines if 'Large Window Brotli' is used\&. .SS "enum \fBBrotliDecoderResult\fP" .PP diff --git a/docs/encode.h.3 b/docs/encode.h.3 index 1e1193e..906ce07 100644 --- a/docs/encode.h.3 +++ b/docs/encode.h.3 @@ -1,4 +1,4 @@ -.TH "encode.h" 3 "Fri Dec 8 2017" "Brotli" \" -*- nroff -*- +.TH "encode.h" 3 "Thu Feb 22 2018" "Brotli" \" -*- nroff -*- .ad l .nh .SH NAME @@ -23,6 +23,10 @@ encode.h \- API for Brotli compression\&. .br .RI "\fIDefault value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. \fP" .ti -1c +.RI "#define \fBBROTLI_LARGE_MAX_WINDOW_BITS\fP 30" +.br +.RI "\fIMaximal value for \fBBROTLI_PARAM_LGWIN\fP parameter in 'Large Window Brotli' (32-bit)\&. \fP" +.ti -1c .RI "#define \fBBROTLI_MAX_INPUT_BLOCK_BITS\fP 24" .br .RI "\fIMaximal value for \fBBROTLI_PARAM_LGBLOCK\fP parameter\&. \fP" @@ -287,6 +291,9 @@ Flag that affects usage of 'literal context modeling' format feature\&. This fla .TP \fB\fIBROTLI_PARAM_SIZE_HINT \fP\fP Estimated total input size for all \fBBrotliEncoderCompressStream\fP calls\&. The default value is 0, which means that the total input size is unknown\&. +.TP +\fB\fIBROTLI_PARAM_LARGE_WINDOW \fP\fP +Flag that determines if 'Large Window Brotli' is used\&. .SH "Function Documentation" .PP .SS "\fBBROTLI_BOOL\fP BrotliEncoderCompress (int quality, int lgwin, \fBBrotliEncoderMode\fP mode, size_t input_size, const uint8_t input_buffer[input_size], size_t * encoded_size, uint8_t encoded_buffer[*encoded_size])" diff --git a/docs/types.h.3 b/docs/types.h.3 index e72ae6e..bef9313 100644 --- a/docs/types.h.3 +++ b/docs/types.h.3 @@ -1,4 +1,4 @@ -.TH "types.h" 3 "Wed Aug 2 2017" "Brotli" \" -*- nroff -*- +.TH "types.h" 3 "Thu Feb 22 2018" "Brotli" \" -*- nroff -*- .ad l .nh .SH NAME diff --git a/java/org/brotli/dec/BUILD b/java/org/brotli/dec/BUILD index 8a2558c..e6d3a4d 100644 --- a/java/org/brotli/dec/BUILD +++ b/java/org/brotli/dec/BUILD @@ -43,6 +43,12 @@ java_test( ) java_test( + name = "EagerStreamTest", + test_class = "org.brotli.dec.EagerStreamTest", + runtime_deps = [":test_lib"], +) + +java_test( name = "SynthTest", test_class = "org.brotli.dec.SynthTest", runtime_deps = [":test_lib"], diff --git a/java/org/brotli/dec/BrotliInputStream.java b/java/org/brotli/dec/BrotliInputStream.java index a2bca95..a27e928 100644 --- a/java/org/brotli/dec/BrotliInputStream.java +++ b/java/org/brotli/dec/BrotliInputStream.java @@ -16,7 +16,7 @@ import java.io.InputStream; */ public class BrotliInputStream extends InputStream { - public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 16384; + public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 256; /** * Internal buffer used for efficient byte-by-byte reading. @@ -44,7 +44,8 @@ public class BrotliInputStream extends InputStream { *

For byte-by-byte reading ({@link #read()}) internal buffer with * {@link #DEFAULT_INTERNAL_BUFFER_SIZE} size is allocated and used. * - *

Will block the thread until first kilobyte of data of source is available. + *

Will block the thread until first {@link BitReader#CAPACITY} bytes of data of source + * are available. * * @param source underlying data source * @throws IOException in case of corrupted data or source stream problems @@ -59,7 +60,8 @@ public class BrotliInputStream extends InputStream { *

For byte-by-byte reading ({@link #read()}) internal buffer of specified size is * allocated and used. * - *

Will block the thread until first kilobyte of data of source is available. + *

Will block the thread until first {@link BitReader#CAPACITY} bytes of data of source + * are available. * * @param source compressed data source * @param byteReadBufferSize size of internal buffer used in case of @@ -82,6 +84,10 @@ public class BrotliInputStream extends InputStream { } } + public void setEager(boolean eager) { + state.isEager = eager ? 1 : 0; + } + /** * {@inheritDoc} */ diff --git a/java/org/brotli/dec/Decode.java b/java/org/brotli/dec/Decode.java index 4a1ded6..9e3d43b 100644 --- a/java/org/brotli/dec/Decode.java +++ b/java/org/brotli/dec/Decode.java @@ -25,10 +25,10 @@ final class Decode { private static final int COPY_UNCOMPRESSED = 5; private static final int INSERT_LOOP = 6; private static final int COPY_LOOP = 7; - private static final int COPY_WRAP_BUFFER = 8; - private static final int TRANSFORM = 9; - private static final int FINISHED = 10; - private static final int CLOSED = 11; + private static final int TRANSFORM = 8; + private static final int FINISHED = 9; + private static final int CLOSED = 10; + private static final int INIT_WRITE = 11; private static final int WRITE = 12; private static final int DEFAULT_CODE_LENGTH = 8; @@ -550,9 +550,7 @@ final class Decode { private static void readNextMetablockHeader(State s) { if (s.inputEnd != 0) { s.nextRunningState = FINISHED; - s.bytesToWrite = s.pos; - s.bytesWritten = 0; - s.runningState = WRITE; + s.runningState = INIT_WRITE; return; } // TODO: Reset? Do we need this? @@ -674,9 +672,7 @@ final class Decode { s.pos += chunkLength; if (s.pos == s.ringBufferSize) { s.nextRunningState = COPY_UNCOMPRESSED; - s.bytesToWrite = s.ringBufferSize; - s.bytesWritten = 0; - s.runningState = WRITE; + s.runningState = INIT_WRITE; return; } @@ -686,12 +682,12 @@ final class Decode { private static int writeRingBuffer(State s) { int toWrite = Math.min(s.outputLength - s.outputUsed, - s.bytesToWrite - s.bytesWritten); + s.ringBufferBytesReady - s.ringBufferBytesWritten); if (toWrite != 0) { - System.arraycopy(s.ringBuffer, s.bytesWritten, s.output, + System.arraycopy(s.ringBuffer, s.ringBufferBytesWritten, s.output, s.outputOffset + s.outputUsed, toWrite); s.outputUsed += toWrite; - s.bytesWritten += toWrite; + s.ringBufferBytesWritten += toWrite; } if (s.outputUsed < s.outputLength) { @@ -712,6 +708,15 @@ final class Decode { return group; } + // Returns offset in ringBuffer that should trigger WRITE when filled. + private static int calculateFence(State s) { + int result = s.ringBufferSize; + if (s.isEager != 0) { + result = Math.min(result, s.ringBufferBytesWritten + s.outputLength - s.outputUsed); + } + return result; + } + /** * Actual decompress implementation. */ @@ -722,6 +727,7 @@ final class Decode { if (s.runningState == CLOSED) { throw new IllegalStateException("Can't decompress after close"); } + int fence = calculateFence(s); int ringBufferMask = s.ringBufferSize - 1; byte[] ringBuffer = s.ringBuffer; @@ -734,6 +740,7 @@ final class Decode { } readNextMetablockHeader(s); /* Ring-buffer would be reallocated here. */ + fence = calculateFence(s); ringBufferMask = s.ringBufferSize - 1; ringBuffer = s.ringBuffer; continue; @@ -787,12 +794,11 @@ final class Decode { BitReader.fillBitWindow(s); ringBuffer[s.pos] = (byte) readSymbol(s.hGroup0, s.literalTree, s); + s.pos++; s.j++; - if (s.pos++ == ringBufferMask) { + if (s.pos >= fence) { s.nextRunningState = INSERT_LOOP; - s.bytesToWrite = s.ringBufferSize; - s.bytesWritten = 0; - s.runningState = WRITE; + s.runningState = INIT_WRITE; break; } } @@ -813,12 +819,11 @@ final class Decode { prevByte1 = readSymbol( s.hGroup0, s.hGroup0[literalTreeIndex], s); ringBuffer[s.pos] = (byte) prevByte1; + s.pos++; s.j++; - if (s.pos++ == ringBufferMask) { + if (s.pos >= fence) { s.nextRunningState = INSERT_LOOP; - s.bytesToWrite = s.ringBufferSize; - s.bytesWritten = 0; - s.runningState = WRITE; + s.runningState = INIT_WRITE; break; } } @@ -868,7 +873,6 @@ final class Decode { s.maxDistance = s.maxBackwardDistance; } - s.copyDst = s.pos; if (s.distance > s.maxDistance) { s.runningState = TRANSFORM; continue; @@ -907,12 +911,11 @@ final class Decode { ringBuffer[s.pos] = ringBuffer[(s.pos - s.distance) & ringBufferMask]; s.metaBlockLength--; + s.pos++; s.j++; - if (s.pos++ == ringBufferMask) { + if (s.pos >= fence) { s.nextRunningState = COPY_LOOP; - s.bytesToWrite = s.ringBufferSize; - s.bytesWritten = 0; - s.runningState = WRITE; + s.runningState = INIT_WRITE; break; } } @@ -933,16 +936,13 @@ final class Decode { int transformIdx = wordId >>> shift; offset += wordIdx * s.copyLength; if (transformIdx < Transform.NUM_TRANSFORMS) { - int len = Transform.transformDictionaryWord(ringBuffer, s.copyDst, + int len = Transform.transformDictionaryWord(ringBuffer, s.pos, Dictionary.getData(), offset, s.copyLength, transformIdx); - s.copyDst += len; s.pos += len; s.metaBlockLength -= len; - if (s.copyDst >= s.ringBufferSize) { - s.nextRunningState = COPY_WRAP_BUFFER; - s.bytesToWrite = s.ringBufferSize; - s.bytesWritten = 0; - s.runningState = WRITE; + if (s.pos >= fence) { + s.nextRunningState = MAIN_LOOP; + s.runningState = INIT_WRITE; continue; } } else { @@ -954,11 +954,6 @@ final class Decode { s.runningState = MAIN_LOOP; continue; - case COPY_WRAP_BUFFER: - Utils.copyBytesWithin(ringBuffer, 0, s.ringBufferSize, s.copyDst); - s.runningState = MAIN_LOOP; - continue; - case READ_METADATA: while (s.metaBlockLength > 0) { BitReader.readMoreInput(s); @@ -975,6 +970,10 @@ final class Decode { copyUncompressedData(s); continue; + case INIT_WRITE: + s.ringBufferBytesReady = Math.min(s.pos, s.ringBufferSize); + s.runningState = WRITE; + // fall through case WRITE: if (writeRingBuffer(s) == 0) { // Output buffer is full. @@ -983,7 +982,14 @@ final class Decode { if (s.pos >= s.maxBackwardDistance) { s.maxDistance = s.maxBackwardDistance; } - s.pos &= ringBufferMask; + // Wrap the ringBuffer. + if (s.pos >= s.ringBufferSize) { + if (s.pos > s.ringBufferSize) { + Utils.copyBytesWithin(ringBuffer, 0, s.ringBufferSize, s.pos); + } + s.pos &= ringBufferMask; + s.ringBufferBytesWritten = 0; + } s.runningState = s.nextRunningState; continue; diff --git a/java/org/brotli/dec/DictionaryData.java b/java/org/brotli/dec/DictionaryData.java index 9ac6e55..2355b28 100644 --- a/java/org/brotli/dec/DictionaryData.java +++ b/java/org/brotli/dec/DictionaryData.java @@ -6,6 +6,7 @@ package org.brotli.dec; +import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; /** @@ -20,31 +21,33 @@ final class DictionaryData { private static void unpackDictionaryData( ByteBuffer dictionary, String data0, String data1, String skipFlip) { - int n0 = data0.length(); - int n1 = data1.length(); - if (n0 + n1 != dictionary.capacity()) { + // Initialize lower 7 bits of every byte in the dictionary. + byte[] dict; + try { + // NB: String#getBytes(String) is present in JDK 1.1, while other variants require JDK 1.6 and + // above. + dict = (data0 + data1).getBytes("US-ASCII"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // cannot happen + } + if (dict.length != dictionary.capacity()) { throw new RuntimeException("Corrupted brotli dictionary"); } + + // Toggle high bit using run-length delta encoded "skipFlip". int offset = 0; - for (int i = 0; i < n0; ++i) { - dictionary.put(offset, (byte) data0.charAt(i)); - offset++; - } - for (int i = 0; i < n1; ++i) { - dictionary.put(offset, (byte) data1.charAt(i)); - offset++; - } - offset = 0; int n = skipFlip.length(); for (int i = 0; i < n; i += 2) { int skip = skipFlip.charAt(i) - 36; int flip = skipFlip.charAt(i + 1) - 36; offset += skip; for (int j = 0; j < flip; ++j) { - dictionary.put(offset, (byte) (dictionary.get(offset) | 0x80)); + dict[offset] |= 0x80; offset++; } } + + dictionary.put(dict); } static { diff --git a/java/org/brotli/dec/EagerStreamTest.java b/java/org/brotli/dec/EagerStreamTest.java new file mode 100755 index 0000000..069ae34 --- /dev/null +++ b/java/org/brotli/dec/EagerStreamTest.java @@ -0,0 +1,386 @@ +/* Copyright 2018 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +package org.brotli.dec; + +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** + * Tests for {@link Decode}. + */ +@RunWith(JUnit4.class) +public class EagerStreamTest { + + private static final byte[] DATA = { + 31, 118, -122, 17, -43, -92, 84, 0, -76, 42, -80, -101, 95, -74, -104, -120, -89, -127, 30, 58, + -4, 11, 91, -104, -99, -81, 44, 86, 61, 108, -74, -97, 68, 32, -120, -78, 97, -107, 88, -52, + -22, -55, -8, -56, -106, -117, 49, 113, -106, -82, -43, -12, -11, -91, -66, 55, 68, 118, -127, + -77, -104, -12, 103, -14, -94, -30, -112, 100, 79, -72, -42, 121, 62, -99, 76, -39, -89, 42, 58, + -110, 91, 65, 32, -102, -113, 49, 4, 73, 60, 122, -106, 107, 16, -123, 30, -97, 90, -102, -83, + -65, -90, 34, 26, -26, 52, 75, -118, 43, -47, -47, 52, 84, -10, -121, -68, -2, 20, 80, 101, 53, + 101, -119, -17, -111, -75, -21, -66, -96, -80, -114, 4, -65, 124, -89, -3, -25, -25, -21, -35, + -15, -114, 55, 14, -76, -68, 71, 9, 123, 46, 78, 67, -18, -127, 70, -93, -128, -44, -87, 3, 36, + -107, -3, 62, 83, 75, -123, -125, -11, 50, 46, -68, 80, 54, 9, -116, -29, 82, -14, -87, -94, 92, + -88, -86, -18, -1, 22, 3, 46, -98, -128, -27, 121, -56, 88, -37, 85, -43, 61, -60, 12, -122, + 107, -64, -27, -45, -110, 123, 60, 99, 108, 46, -29, -77, 76, 65, 100, 92, -104, 40, 63, 19, 36, + -89, 80, -39, 37, -95, -74, 97, 90, -109, 54, 105, -10, -38, 100, 95, 27, 36, 33, -60, 39, 100, + 32, -18, 93, -46, -99, 103, 127, -91, -62, 82, 76, 56, -66, -110, -16, 83, -116, -76, -9, -47, + -5, -32, -65, 111, 0, 55, 47, -60, -95, -56, -100, 65, 125, 38, 77, 38, -32, -62, 55, 119, 10, + 120, -69, 33, -111, -62, -87, 17, 102, -95, -106, 26, -50, -16, -109, 94, 83, -79, 90, 2, 42, + -47, 37, -124, 114, -68, 6, 104, -65, 38, -108, -114, -110, 73, -95, -83, -90, -86, -36, -48, + -63, -97, -120, -25, -53, 93, -77, -50, 59, -74, -9, 36, 85, 11, 76, 95, 74, -61, -9, 116, -14, + -38, 73, 78, 44, -92, 58, -27, -54, 38, 81, 50, -36, -46, -117, 126, 89, 53, -37, -58, -12, 61, + 77, -56, -85, -21, -128, 43, -111, 14, 54, 57, 116, 52, -85, 70, 88, -72, -26, 54, 109, -70, + -84, -13, -1, -54, 91, 81, 101, -65, 49, -48, -16, 26, -115, -39, 100, -21, 105, -121, 38, 72, + -115, 104, -100, 36, 120, 15, -109, 115, 64, 118, -68, -14, -26, -57, -71, 9, -118, -113, 15, + 94, 108, 114, 109, -14, -80, -31, -57, -6, 57, 111, -36, -92, -25, -23, -71, -61, 120, 93, -65, + 104, -123, -53, 35, -77, -8, -23, -31, 99, -3, 73, 75, 98, -2, -94, 73, 91, -109, -38, -78, + -106, -121, -17, -21, 55, 45, -26, -7, -93, 38, 59, -90, -116, 3, -68, -2, -110, 19, -96, 28, + -23, -39, 102, 99, 8, -82, -41, 63, 88, -70, 115, -123, -11, 111, 92, 47, -12, -16, -70, -2, + -29, 101, 61, -45, -57, 54, 24, -125, 20, -37, -75, 89, -56, 52, 125, 22, -68, -63, 105, -91, + -20, 91, 56, -99, -56, 35, -77, -78, -24, -79, 57, 5, -55, 101, -127, 75, -35, -113, -51, -103, + 79, 102, 16, -124, -79, -128, -45, -65, -84, -97, -91, -90, -105, 76, 90, -93, 90, -49, -41, + 104, 44, 81, -37, -84, 103, -120, -51, 79, -43, -114, -101, 38, -78, -94, -1, 15, 109, -62, 34, + -65, -127, 98, 32, 46, -72, 70, 58, -61, -55, 90, 30, -103, 5, 109, -105, -119, 81, 92, -40, + -75, -23, -77, 36, 18, 62, -33, -51, -38, -19, -12, 89, -101, 117, 94, 71, 127, -43, 54, 115, + -67, 34, -83, -115, 127, 127, 42, 126, -121, -121, -40, 56, -113, 60, -27, 30, 44, -21, 98, + -123, -14, 91, -69, 15, -81, 119, -101, 25, -73, 40, 105, 26, -86, -31, 86, -75, 74, 94, -74, + 19, -4, -20, 69, 24, 43, -5, -91, 6, -89, 52, 77, -65, -71, 82, -81, -52, 22, -61, -15, 51, 22, + 1, 70, -43, -3, -39, -27, 123, -13, -127, -86, 65, 51, 45, 127, -101, -27, -3, -44, -34, 75, 69, + 77, 71, -34, 7, -51, 93, -83, -84, -57, -38, -100, 59, -105, -1, 44, -47, 63, 96, -127, 32, -63, + 16, 80, -64, -127, 6, 54, 12, 44, 28, 48, -128, 4, 10, -104, 64, 3, 11, -47, 59, -79, -125, 52, + -16, -78, -66, 19, -6, -33, -107, -10, -4, -42, 102, -31, -32, 99, 115, -22, -96, -45, -112, 28, + 126, -44, -4, -47, -99, 127, -84, 37, -112, -34, 36, 1, -68, -14, -16, 55, 83, -99, 120, -69, + -30, 89, 48, 126, -80, -43, 15, 13, -18, 14, -4, -126, -120, -118, -11, 100, 16, 76, 17, 54, + -75, 114, 101, 37, 121, -23, -65, 39, 94, -48, -78, 67, -61, 75, 48, 23, -127, 83, -124, 95, -5, + 67, 13, 87, 18, -2, 117, -36, -121, 115, -112, -107, -54, -36, 14, -4, -68, 35, 32, 79, -118, + 81, 94, -56, -110, 37, -84, -121, 72, -7, -52, -40, -44, -1, 73, 123, 12, 42, -67, -87, 63, -2, + -100, 29, -41, 112, 98, -125, 88, 97, -56, 90, 7, -40, -111, -126, 74, 121, -95, -45, -69, 48, + -98, 18, -20, -124, 3, 46, -5, 26, 24, -79, 109, 4, 43, 60, 97, 96, -76, -21, 95, -52, -40, -45, + 2, -103, -107, -9, 79, -79, -82, -73, -51, -74, -10, 81, -77, 111, -96, -41, -120, -38, 24, -87, + 93, -41, 64, 72, 57, -81, -32, 60, -79, 36, -84, -89, -7, -25, 81, -98, 36, -22, -69, 86, 123, + 120, -16, -113, -70, 47, -125, 2, 97, 78, -91, 102, 120, -91, 5, -71, 39, 116, -12, -79, -29, + -9, 87, -5, -37, 87, -73, 116, -15, -10, -106, -49, -3, -21, 5, 120, 47, 72, -40, 79, -3, 85, + -84, -87, 57, -83, -67, -64, 122, -39, 36, 70, -27, 71, -73, 42, -100, -99, 124, -90, 90, -29, + -54, -115, 7, 89, -51, 9, -43, 32, 79, -104, 127, -38, 7, 93, -80, -124, 27, 96, 54, -51, -7, + 57, 57, 63, 21, 110, 70, 122, 76, 51, 124, 78, -5, 126, -100, -98, 116, 59, 125, -106, -113, + -111, -128, 92, 43, -19, -2, 105, -90, 96, -116, -116, -30, 115, -20, -106, 64, -108, -111, 94, + -9, -123, 52, -71, -88, -84, 87, -25, -54, -117, -2, -29, 29, -85, -22, -20, -94, -25, 98, 101, + 114, 80, -55, -51, 97, 99, 117, -86, 2, 79, 48, 110, 44, -94, -127, -85, 61, -95, 30, -91, -125, + 83, 113, -93, -4, -126, -98, -93, -68, -99, -70, -37, -73, -90, 4, 53, -2, 78, -35, 101, 42, -6, + -3, 106, -117, -127, 48, 31, 88, 117, 116, 106, -98, -23, -117, -7, -57, -128, -118, -117, -118, + 115, 30, -61, 6, -38, -114, -103, 37, 53, -4, -100, -121, 98, -110, -113, 2, -20, 26, -88, -118, + 19, -71, 39, -54, -11, -28, 47, 28, 89, 35, -13, -20, -48, 14, -6, -91, -85, -119, -7, 116, 112, + 114, 41, 44, -1, -39, 60, -85, -54, 101, -119, 95, -77, -64, -121, 47, 75, -78, -30, -66, -38, + -15, 98, 14, 82, -60, 85, -90, -78, 112, -7, 64, 5, 28, 64, 41, -64, 57, 85, 21, 122, -52, 90, + 70, -73, 17, 47, -125, 40, -45, -7, -91, 100, -21, -120, -51, 21, 65, 31, 110, -105, -79, -80, + 105, -43, 73, -61, 45, -30, -4, 83, 95, 3, 109, 55, -92, 120, 74, -36, -111, 54, -26, 76, -69, + 7, -20, 55, 4, 70, -124, -31, -32, 127, -63, -58, 73, 106, 109, -41, -45, 96, 30, 63, 14, 8, 16, + -88, 69, -115, -17, -14, -116, 115, -88, 119, -65, 16, -64, -112, -73, -10, -46, -7, 113, 5, 54, + -38, -47, -18, 106, 23, 12, -117, 120, -107, 121, -62, -35, -6, -56, 112, 81, 3, 5, 31, -11, + -92, -85, -29, 102, 43, 108, 88, -69, 55, -74, 110, 97, -128, 29, -63, -114, -19, 77, 123, 23, + 76, 81, 57, 51, 117, -74, -1, 74, 84, 70, 86, -109, -127, 122, 10, 9, 23, 71, 110, -116, -30, + -85, -104, 2, 40, -62, 20, 46, 8, 95, 46, 13, 113, -83, 124, 33, 38, 105, -99, 72, -62, -80, + -16, -118, 92, -66, -14, -124, 112, 79, 103, 53, -127, 61, -31, -92, 92, -42, -37, -37, -24, + -116, 2, -81, 40, 46, -44, 23, -68, -113, 88, 92, 95, 11, 118, 98, 19, -80, -102, 96, 73, -20, + 47, -105, -120, -74, -83, -77, -87, -59, -97, 112, 99, -52, 80, 116, -119, -44, 18, 62, 108, 73, + -34, 70, 28, 73, 81, -26, 87, -125, -55, 64, -53, -73, 114, -3, -45, -109, 19, -2, 68, 119, 14, + 26, 72, 19, 13, -121, -98, 26, -52, 85, 34, 17, -95, -7, 20, -12, 106, -11, 104, 20, -106, -42, + -26, 107, -106, 112, 103, -53, -62, 13, -58, -23, 23, 65, -104, -55, -90, 107, 55, -77, -25, + -125, 63, -61, -21, 117, -102, -70, -93, -67, -45, -61, 18, -63, 7, -127, 90, 16, -25, 116, 80, + 35, 105, 80, -93, 105, -44, 114, -126, -103, 88, -102, -76, -94, -66, 69, -35, 22, 36, 95, -55, + 22, 43, 78, -111, 109, 72, 104, -49, -9, -48, 59, 102, -54, -43, -128, 111, 127, -9, 35, 23, + -79, 40, -122, -52, 36, -81, -4, -102, -2, -62, 53, -111, -117, 40, 122, -95, 55, 32, -127, -9, + -91, 79, -109, -81, -3, 98, -78, 56, -119, 69, -41, 76, -102, 18, 90, -15, 12, -60, 86, -106, + 34, -118, -43, -13, 61, -106, -56, 48, 27, -15, -70, -41, 127, 61, -2, -80, -13, 86, 28, 91, + -10, -8, 98, -20, 54, 122, -116, -55, -70, -94, 54, -64, 71, 102, -106, -1, 99, -73, -71, -18, + -11, 56, 11, -27, -5, 11, -86, 126, 8, 46, -21, 63, -66, -43, 88, 46, -113, -5, 113, 26, -9, + -32, 18, -3, -6, -38, 81, 38, -110, 111, 97, 34, 65, 114, -71, -118, 9, -110, -109, -61, -113, + 31, -82, -102, -127, 16, -7, -16, -11, -87, -76, -41, -52, 58, -116, 100, 102, -127, 6, 127, 64, + 14, 110, 112, 43, 44, 87, 42, -118, -119, -39, 64, -7, 57, 16, 2, -69, -12, -54, -94, 36, -48, + 123, -119, 82, 46, 26, -62, 30, 97, -17, 34, 80, 32, -15, 116, -96, -3, 33, 34, 51, 59, -63, + -100, -7, -79, -126, -21, -15, -18, -113, 30, -25, 107, -25, -125, 53, 82, -15, -80, 96, -24, + -47, 94, -25, -109, -94, 114, -62, 112, -104, 26, -107, -68, -14, -36, -9, -89, 27, -75, 62, 62, + -20, -125, -77, -57, -127, 80, 58, -118, 63, -27, -82, -126, 74, -23, -91, -28, -95, 8, -122, + -73, 28, -87, -74, 80, -15, -119, 14, 32, 124, 73, 15, 61, -32, -68, 81, 56, -119, 66, 105, 3, + -15, 20, -86, 124, -70, -113, 100, -72, -117, -97, 127, 103, 16, 105, 8, 39, -128, -64, -47, 66, + 123, -110, 13, 123, -124, -24, 42, 102, -4, 47, 107, 125, 63, -52, -35, 113, -74, 13, 8, 17, 16, + -106, -21, -69, 47, -3, 103, -2, 19, -100, 111, -11, 1, 112, 90, -38, -31, -45, -55, 25, 92, + -122, 66, -18, -98, -82, -49, 119, -35, -128, 26, 60, -79, -23, 127, 82, -52, 115, 77, -109, + -111, 17, -99, 31, 33, 41, 35, 87, -47, -126, -18, -25, 81, -71, 9, -72, -92, 64, -92, 23, 116, + 96, 40, 55, -87, 119, -105, 66, 49, 46, -10, 26, -25, -105, 127, -124, 86, -2, 39, 116, -108, 6, + 21, 15, 1, 75, -5, 101, 13, 57, 70, 126, -50, -97, 123, -73, -77, 53, -11, -73, 44, -99, 91, 85, + 21, -59, -1, 117, 64, -100, 47, 75, 93, 9, -4, 83, -55, 15, 99, 31, 43, -49, 15, -89, -115, + -114, -50, -35, -19, -65, 122, -39, 92, -21, -3, -66, 8, -70, 107, -55, -86, -36, -23, -21, 80, + -79, 48, 116, 57, -71, 33, -111, -68, -75, 37, 55, 39, 124, 96, -66, 10, 14, 118, 50, 85, -33, + 54, -101, -7, 21, 88, -122, 50, -92, 123, 37, 109, -60, -127, 26, 110, -20, -31, -66, -56, -24, + 47, -14, -60, -101, 69, -38, 78, 0, 44, -71, 108, 4, 25, -68, -106, 20, -40, -103, 108, -70, + -56, 78, 12, 82, 81, 46, -105, -123, -46, -20, -127, -67, -77, 76, -74, 40, 105, 2, 27, 112, + -107, -121, -53, 6, -88, -11, 26, 41, 64, -69, -44, 27, 47, 24, -31, -86, -4, 4, -46, 42, 50, + -55, 37, -11, -95, 108, 54, 37, 67, 37, -14, -40, 41, 124, 22, 108, 99, 16, 55, 88, 19, 49, -87, + 27, 17, -68, -107, 15, -62, 84, 109, 72, -26, 71, 63, -17, -72, -63, -101, -8, 62, 24, -112, + 126, -102, -64, 29, -19, -75, 74, 29, 90, -90, 83, -22, 106, -27, -114, 56, -111, -33, 11, 3, + -16, 94, 115, -97, 67, -78, 62, -93, -36, 60, -65, -54, 72, 70, 44, -77, 73, 29, -106, 38, 72, + -37, -110, 79, -98, -15, -58, 96, -85, -68, -15, 73, 57, -127, 14, -123, -40, 70, 63, -64, 115, + -63, 127, 94, 85, 52, 30, -62, 83, -30, -97, 82, 39, 2, 36, -50, 106, 116, 66, 104, -14, 73, 14, + -106, -127, 11, 41, -27, 56, -99, -74, 55, 123, 124, 9, 46, 12, -97, -37, -10, 122, 124, -27, + -64, 93, -70, 9, 119, 13, -9, -71, -118, 19, 50, -36, 114, 120, -24, -62, 40, 127, 9, -62, 84, + 57, 66, 91, -114, 120, -49, 63, 99, -73, -66, -64, 84, -31, 67, -52, 12, 38, -62, 37, -122, -50, + -95, 24, 19, 54, -80, 57, -118, -84, 124, 90, 53, 72, 29, -123, 67, -65, 99, -58, -28, 20, -110, + -103, 92, -91, -108, 23, -118, 44, 74, 76, -29, 94, -121, -37, -32, 107, -62, -67, -55, -45, + -50, -44, 25, -77, -102, 90, -128, -31, -5, -64, 110, 122, 88, -18, -53, -85, 122, -11, 100, + -106, 97, 59, -103, -110, 5, -16, 59, -126, -74, 9, -119, 115, 49, -73, -42, 32, 100, 59, -98, + 106, 55, -101, 87, 126, 59, -23, 106, -102, 100, -69, -46, 76, 53, -107, -119, -113, 104, 117, + -27, 75, -32, 8, -81, -10, 50, 108, -32, 51, -79, -53, -2, 66, -9, 113, 14, 99, -100, -34, -21, + 13, 2, 45, -33, 0, -16, -64, -126, 69, -25, -34, 28, 105, -48, -38, 82, 12, 27, -71, 35, 13, 11, + 21, 26, -19, -4, 44, -52, -126, -63, -32, -84, -22, -63, -29, 96, -97, -82, -12, -53, 98, 41, + -69, -38, 101, -31, 47, -9, 16, -10, 9, -36, -103, -91, -65, -36, -93, -45, 94, 110, 54, -94, + 68, -39, -116, -40, 61, -112, -91, -79, 98, -36, 87, 35, 88, -61, 125, 112, -84, 48, -38, 105, + -92, 69, -68, 92, 0, 27, -72, -65, 97, 98, 66, 97, -74, 29, 46, -21, 102, 61, 120, -62, 38, + -125, -60, -43, 4, 5, -27, 113, -43, 105, -22, -110, 68, 13, -14, -23, 18, 95, -79, -108, 87, + 19, -80, 16, 54, -121, 88, -64, -113, 73, 3, -20, 17, 0, 26, -88, -49, -2, 21, 120, -105, -85, + -113, 76, 106, 37, -13, -75, 29, -127, 10, -17, -53, -124, 24, 37, -31, 26, -1, 109, 88, -88, + -37, -51, -32, -125, 48, -40, 123, -108, 55, -120, -62, -91, 47, 62, -127, -25, 99, 68, 22, -40, + 58, 119, -31, -93, -122, 39, -92, 25, -127, -42, 97, 69, -6, 110, -61, -21, -94, 82, 123, -93, + -51, -90, 50, -96, 127, -32, 125, -76, 117, 75, -52, 79, 110, -51, -15, -81, 49, 62, 118, 120, + -27, 22, 84, -22, 77, -105, 87, -7, -23, 47, -8, 108, 82, -12, 84, -52, -85, 68, -89, -24, -32, + 6, -34, -83, 80, 44, 12, -51, 50, 74, -121, -106, 6, 85, 32, 42, 76, -59, -52, -99, 102, 108, + -127, -49, 0, 60, 62, 2, 13, -19, -92, -41, -69, 55, -70, 94, 23, 36, 89, 70, -115, -51, 26, + -95, 13, -69, 42, 62, 59, -24, -63, -50, -6, -86, -97, -115, -58, -107, 69, -12, -109, 73, 4, + 63, 12, 32, 13, -123, -72, -41, -7, -81, 37, -91, -128, 109, -79, -80, 88, -22, 108, 126, 103, + 27, -29, -81, 52, 55, -91, -13, -43, -75, -59, 80, -6, 6, 83, -103, -64, 8, 63, -34, -59, 21, + 55, -115, 62, 77, 30, -50, -71, -66, 87, 99, -47, 0, 124, 76, 120, 79, -12, 54, -16, -98, -72, + -41, -66, -14, 114, -27, 108, 57, -49, 107, -73, 90, 107, -103, 25, -107, 112, -119, -54, 106, + -54, -8, -13, -81, -62, 92, -84, 113, 77, 74, -63, 104, 92, -94, -128, -43, -54, -71, 117, 27, + 14, 98, 52, 119, -93, -77, -80, -46, 88, 35, 123, 86, 87, 122, 62, 108, 19, 27, 111, 2, 62, -67, + 89, 14, -82, 41, 123, -117, 74, 109, -124, -115, 15, 123, -65, 42, -81, -105, 19, -30, 86, -72, + 84, 63, -109, 34, -65, -127, 6, -104, 77, 103, -111, 90, 16, 31, -74, -33, 122, 58, 52, 10, 2, + 65, 72, 68, 79, 52, 31, -19, 100, -86, 21, -49, 116, 101, 82, 111, -96, -76, 67, -40, -62, -15, + -79, 109, -58, 6, 11, -91, -29, 65, 21, 75, 74, -28, 21, 103, 46, 48, -42, 51, -110, 80, -95, + -102, -9, 8, -95, 102, 102, 16, 105, 103, 92, -106, -109, 77, 93, 32, -12, -25, 5, 17, -86, -34, + 58, -50, 55, 63, -8, -72, 3, 26, 91, 72, 71, -77, 94, 91, 39, 45, 7, 0, 30, -45, -100, 35, 43, + -41, -72, 16, -103, -115, -4, 51, 39, -23, -89, -84, 105, 94, -91, -88, 82, 123, -26, 51, -16, + 97, 47, -39, 35, 46, -89, 74, 7, -80, 116, -21, 82, -84, -13, -99, 31, -58, -93, 36, 99, 36, 44, + -65, 45, 94, -91, -41, 115, -10, 116, -67, 45, 19, -20, 113, -62, 111, 124, 108, 71, -121, -64, + 122, -121, -105, 114, 115, -126, -93, -108, -113, -1, -80, -86, 116, -111, -29, 53, -76, 87, 19, + 45, -30, 91, 91, -7, -49, 12, 112, -8, -26, 82, 58, -82, -76, 119, -50, 14, 85, 113, 20, 48, + -102, 37, 24, -120, -107, -52, 67, -44, -92, -79, -40, 28, 21, 55, 116, 88, 19, -49, -78, 86, + -89, 74, -4, 118, 75, 11, -103, -127, -47, -16, -77, -78, 8, 2, -88, 50, 23, -99, 102, -100, + -116, -99, -109, -112, -115, 78, 55, -39, -84, 100, -91, -101, 73, -9, 39, -23, 62, -125, -106, + -55, 119, -118, 114, -33, -99, 20, -53, 91, 115, 47, -93, 51, -99, -9, 92, -71, 120, 57, -44, + -87, -11, 108, 30, 43, -4, 118, 90, 126, -54, -99, -47, -2, -61, -3, -62, 45, 92, -70, -105, 30, + 98, 112, -94, 56, 35, -22, 32, -93, -6, -36, -5, -77, -78, 120, 45, 104, 69, -49, -30, 39, 75, + 38, -94, -12, 34, 34, -44, 48, -100, 74, 34, 69, 94, -12, 73, 27, -111, 90, 33, -38, 93, 40, + -16, 89, 26, -110, -116, -10, -65, 85, -57, 48, -86, 121, 118, -41, 63, 33, 109, -78, -26, 122, + 111, -115, -52, 95, 26, -70, -14, -86, -80, -27, -6, 12, -44, 123, 28, 93, -74, 14, -124, 87, + -28, -12, 111, -117, -83, 48, -41, -3, 60, -51, -91, 118, 54, 110, 18, -2, -120, -66, 46, -35, + -91, 106, 94, -91, -11, 41, -92, -22, 96, -113, -109, 105, 56, -80, 17, -118, 124, -16, 30, 30, + 117, 126, -99, -106, -69, -28, 85, 85, -41, 21, -95, -85, -112, -125, -45, 69, 10, -34, -120, + 33, -58, 120, 51, -22, -7, 31, -34, 4, 55, -102, -70, 118, -83, 49, 111, -45, -9, 69, -95, -66, + 116, -3, 104, -61, 17, 21, -20, 121, 117, 127, -70, 5, 89, -89, 51, 15, 64, 126, -73, 97, 90, + 119, -22, -37, -54, 52, -33, 26, -54, 75, 79, 73, 100, 44, 3, 53, -25, 49, -123, -101, -80, -54, + -81, -32, 88, 49, -14, -4, 18, 42, 52, -65, -33, 68, 83, -89, -11, 57, 102, 71, 122, 74, -92, + -44, -94, -108, 14, 104, -107, -124, -63, 8, 32, 85, -18, -16, -91, -63, -38, 27, -108, 24, 19, + -33, 53, 70, -32, 41, 38, -77, -30, 89, 28, -15, -89, -86, 32, 51, 28, 67, 124, -96, -103, -34, + -113, 22, 15, -8, 104, -38, -56, 65, -96, -111, 104, -9, -38, 107, 55, 112, 47, 99, 50, -18, 90, + -69, 116, 80, 95, 52, -27, -98, 6, 12, -11, 124, -120, -96, -91, 118, -51, -120, 90, -92, -104, + -83, -73, 84, 61, 78, -39, -99, 33, 58, -45, -14, 127, -20, -44, 125, 21, -26, -21, -36, 51, 73, + 71, 73, -17, 83, 11, 107, 91, 36, -65, -24, 56, 117, 114, -126, -34, 1, 120, 66, -50, 14, 91, + 97, -35, 75, 87, 123, -53, 63, -38, -74, -62, -117, -45, -40, 125, -5, 53, 50, 0, -110, 7, 7, + 45, 37, -71, -21, 70, -95, -60, 74, -55, -54, -96, 115, -62, -32, -3, -121, -18, 27, -107, 49, + -39, 58, -39, 91, 107, 65, -99, -64, -19, -10, -126, 38, -40, -112, 0, 16, 107, -59, 119, -70, + 79, 49, -18, -76, -22, -38, -98, 35, -99, 61, 67, -100, 29, -104, -17, 22, 108, 105, 88, -114, + -65, 84, 99, -69, -84, -87, -81, -28, 68, -66, 3, 69, -69, 83, 16, 61, -102, 50, 67, 46, -98, + -77, -40, -78, 48, 68, -85, 123, -92, 37, 14, 75, 13, -23, -110, 23, 26, 90, -81, -1, -109, 85, + 121, -68, -55, -7, 21, -81, -35, 41, 3, -72, -52, 36, 35, -83, -9, -81, -124, -104, 31, 54, 8, + -32, 80, 73, 89, -41, 116, 127, -110, 68, -82, 82, -79, 105, 113, -110, -70, 121, -24, -54, 37, + -12, -70, -77, 15, 14, 105, -19, 16, -6, 73, 102, 121, -116, -62, 54, 65, 119, 43, 60, -79, -66, + -17, 1, 97, -1, -11, -5, 104, 10, 59, -108, 21, -8, 64, -71, -86, 14, -98, -87, -49, 30, -45, + 109, 43, -67, 10, -122, 25, 98, -102, 127, -27, -52, -61, -66, -47, 114, -94, -126, 4, 0, -65, + -11, -51, -67, 84, -43, 44, 88, 53, -6, 124, 11, -123, 34, 12, 102, -13, -106, 47, 62, -71, 43, + -65, 28, 37, 32, 80, 23, 6, 75, -103, 73, 112, 33, 84, -89, 12, -81, 42, 65, 58, 14, -102, 90, + 29, -116, 104, 107, -99, -1, -43, 122, 118, 88, -2, 117, 84, 1, -123, -2, 2, -32, -18, -122, + -36, -58, 16, 76, 115, 27, -121, -2, -79, -44, -39, 33, -29, 33, -34, 55, 71, 61, 117, -22, + -126, 51, 29, 55, -34, -48, 17, -57, 74, 71, -33, -50, 60, 41, -119, -93, -45, -127, -30, 104, + 35, 60, -117, -113, 81, -59, -39, -84, -39, -46, -106, 57, 77, 62, -11, -44, -87, 71, 35, -117, + -87, -77, -98, 68, -29, -121, -16, -16, 39, 48, -74, 23, 82, -62, 32, 62, 27, 125, 84, 39, -91, + -91, -93, 76, -24, 98, 123, -58, -114, 17, 28, 93, -17, 74, 92, -17, 9, -86, -116, -72, 54, -74, + 71, 9, -97, -33, -20, -126, -50, 117, 102, 54, 123, 124, -70, 30, -102, 27, 23, 105, -40, -35, + -89, -33, 89, 3, 44, 18, -15, 10, 116, -111, 1, -81, -31, -125, -102, 103, -93, -15, 72, 84, 19, + -30, -17, -115, 99, 43, 5, -92, 52, 59, -55, -105, -128, 19, 8, -78, 43, 7, -55, -126, -106, 11, + 69, 118, 24, -128, -54, -86, 22, -121, -43, 69, -15, 96, 52, 52, 90, -118, -10, -58, 121, 63, + -48, -13, 22, -101, 17, 42, -28, -54, -63, 121, -96, 111, 113, 103, 126, 37, -52, -40, -106, + -104, 123, -48, -92, 83, 100, -70, -52, -59, -93, -116, -90, -93, 82, -117, 103, 52, -71, -42, + 57, 25, 57, -74, 71, 7, 32, 96, -60, 11, 121, 58, 71, 40, -92, 35, 88, -12, -109, -56, -122, + -30, -118, 103, 65, -5, -90, -97, 103, -117, 66, -20, -42, -46, 67, -29, -23, 72, -97, 26, -54, + -103, -76, -47, -71, 23, -83, -20, 95, 111, 101, -83, 106, -71, -70, -63, 55, -85, -41, 117, -9, + 37, 96, -71, -118, -44, -43, 2, 107, 113, -39, -107, 41, -13, 0, -87, 77, 83, 99, 68, -84, -6, + -1, 67, 124, -57, 115, 29, 24, 26, -42, 104, 58, -87, -38, 12, -98, 11, 109, 62, 59, -66, -48, + -20, 70, -111, 11, 120, 21, -58, -29, -76, 44, -7, 26, -119, -59, -87, 44, 122, 8, 114, -58, + -109, -119, -63, -58, -51, 33, 35, -109, 81, 110, -90, 121, -21, 64, -60, 68, 18, 75, -82, -81, + -103, -76, -116, 23, 53, 58, -41, -23, 49, -102, 81, 101, 39, -59, -91, -98, 111, 2, 65, 110, + 121, 5, 13, 97, -119, 109, 40, 82, 47, -51, 47, -57, 35, -109, 53, -42, 10, 3, -15, 122, -25, + -67, -62, -121, -120, -31, 18, -20, 87, -88, 75, 95, -121, -93, 33, 61, -88, -96, 88, -69, -54, + -121, -99, 49, 122, -53, -49, -125, 53, -79, -46, -128, 109, 125, -93, -83, 44, -101, 69, 68, + -91, -17, 55, -13, -75, -80, 21, 32, -13, 40, 86, -65, 85, 80, -82, -38, -52, 110, -119, 100, 8, + 77, -23, 67, -41, 73, 27, 38, 9, -11, -32, -30, 75, -15, 67, -41, 46, 27, -89, 9, 117, -38, -14, + -81, -4, 71, 113, -79, 81, -36, 63, 15, -70, 104, 34, -56, -39, 93, -34, -127, 90, -36, 73, 47, + -76, 113, 55, 123, -92, 48, 116, 108, -123, 31, -67, -39, 3, -9, 6, 13, -17, -50, -125, 1, 105, + 121, 100, 79, 82, -85, 123, -33, -73, 54, -61, -113, 121, -110, 69, 119, 94, -112, -120, -34, + -35, -104, -116, 44, 85, 109, -104, 127, 120, 87, 75, -48, -115, 74, 85, -47, -53, 16, -5, 92, + 67, -32, 12, 79, 109, 105, 5, -92, 51, 46, 96, -96, 63, 106, 82, -54, -95, 20, -60, -23, 48, -5, + -128, 22, 23, -93, 93, -64, 35, 21, -121, -79, 59, -1, -50, 55, -7, -10, -85, 3, -7, 121, 98, 5, + -19, 76, -78, -128, -47, -42, 61, -59, -46, -24, -16, -51, -48, 122, -26, 74, -91, 54, 53, 46, + 74, 25, -30, -74, 52, -22, 118, -103, -53, -113, 44, -19, 70, -86, 106, 72, -68, -86, 110, 34, + -35, 57, -43, 32, -4, 14, 102, 25, -76, -84, -86, -83, -2, -107, -4, 49, -97, -83, -95, 6, 100, + -73, 6, 34, 49, 59, 50, 30, -8, 6, -55, 24, -6, 67, -121, 115, 40, -50, -75, -46, -26, 111, -20, + -75, -83, -16, -48, 65, -64, 119, 62, -59, 3, -12, 109, 0, -118, -94, 17, -51, 124, 63, 42, -3, + 44, 53, -81, -35, -33, -83, 115, -114, -4, -104, 44, 7, -81, -97, -102, 104, 29, -97, 70, 91, 3, + 88, 67, -127, 78, -92, -16, -34, -18, -81, -125, -38, 117, -78, -36, 9, 76, -85, 121, 2, 10, + 114, 65, -5, -29, -34, 101, 20, -108, 46, -90, -98, 85, -62, -51, 108, -72, -51, 44, 22, 112, + 121, 58, -58, 109, -96, 58, 103, 27, -88, -81, 99, -7, -33, -113, 64, -122, 115, 19, -93, 37, + -19, 93, -98, 78, 115, 91, -88, -82, -36, 61, 90, 77, 27, 26, -116, 80, 90, 85, 6, -87, 59, 110, + 63, 20, -81, -127, -53, 18, -73, 39, 75, 79, -106, 29, -50, -13, 43, -99, -92, 109, 80, -83, 69, + -102, 38, 90, -41, 48, -47, -93, 18, 116, 32, 90, -73, -96, 90, 49, 19, 73, -35, 60, 53, -72, + -52, 84, 52, 27, -67, -114, 82, 79, -89, -80, -111, 124, -51, 80, 110, -76, 125, 18, -73, 44, + -100, 118, -16, -64, -35, 22, -86, -116, -19, -101, -35, 42, 85, -83, 69, -65, 37, -104, -88, + -108, -25, -9, 15, 91, -100, -86, 8, -75, -37, 103, 3, -69, -9, 114, -25, 25, -87, 118, -75, + -115, -8, 74, 53, 73, 46, -22, -108, 30, 71, -96, 40, -76, 121, 71, -63, 95, 96, 113, -54, 87, + 1, -79, 2, -40, 11, 22, -118, -117, 94, -44, -112, -27, -86, 96, -4, -58, 121, -71, 54, -58, + -71, -125, -65, 126, -116, -107, 125, -28, -74, 97, 15, -76, 59, -26, 58, -38, -39, 122, 55, 85, + -109, -114, 75, 25, -74, 57, -78, -10, -76, -115, -12, 29, 84, 86, 97, 5, 116, -114, 62, -98, + -36, 105, -119, -19, 12, 11, 49, 76, 21, 56, 1, 115, 115, 42, -67, 60, -40, 19, 38, 50, 33, 112, + 98, 123, -76, -74, 50, 66, 18, -61, -114, 36, -95, 92, 124, 20, -56, 29, -41, 28, -4, -106, 115, + -83, 98, -47, 96, 87, -72, 96, -83, -93, 1, 112, -43, 59, -80, -24, 46, -45, 87, 92, -108, -78, + 101, -112, 111, -119, -67, 26, 97, 1, 36, -128, 120, 8, -20, 84, 107, -9, -104, 25, 0, -36, 58, + 111, 81, -83, 65, 42, 51, 61, -71, 118, 111, 29, -93, 39, -56, -72, -18, -53, 0, 34, -77, -59, + 112, -79, 51, 86, 82, -24, 64, -120, -1, -102, -3, 42, -93, 16, 38, 100, 39, -124, 92, -89, 31, + 94, -32, 40, 19, -8, 48, -83, -66, -68, 110, -72, 36, -38, -91, -63, 33, 35, -96, -121, -119, + -59, 56, 89, -117, -123, -79, -68, 42, -4, -116, -108, -104, -84, -111, -26, 94, -38, 61, 94, + -72, -85, -18, -30, 118, -14, -94, -74, -24, -21, -90, -83, -116, -38, -8, 9, -17, 72, -62, -78, + -75, 47, -117, 109, 127, -87, -36, 53, 90, 16, -72, -50, 40, 87, 97, -51, -96, -55, -120, -32, + -58, -21, 102, 117, -121, -98, 74, -67, 104, -122, 108, -3, -96, 64, -114, -3, 30, 48, -14, 44, + -41, 91, 54, 58, 80, -13, -88, 121, 32, 122, 25, 24, 9, 72, 17, -1, -93, -66, 96, -84, 4, 37, + 69, 91, 64, 32, 46, 89, 7, -32, -120, 10, -38, -3, -59, -75, 14, 116, 115, 121, 99, 122, -95, + 107, 1, 65, 70, -45, 35, -52, -87, -56, 43, 121, 12, -93, -8, 83, -118, 15, -33, -67, 45, 74, + -66, -31, -28, 5, 104, -13, 113, 19, -89, 105, 66, -82, 74, 54, -104, 69, 103, 86, 118, -44, + -75, -47, 81, -75, 8, -32, -95, 121, 48, -121, -106, -88, -15, -52, -99, -78, 58, 113, 16, 71, + -48, 76, 80, 81, 59, 43, -106, 27, -49, 2, -11, -71, -30, -80, -44, 62, -113, -20, 12, -60, -87, + 22, -30, 64, -120, 127, 121, 47, 127, 58, -98, -4, 79, -72, -117, 115, 52, 95, 40, -59, -125, + -33, 125, -96, -93, -92, 17, -99, -85, 10, -119, 91, -115, -63, -32, -11, -102, -105, -93, 90, + 37, 94, -104, -47, -63, -94, 15, -34, 20, 73, -59, 85, -31, 6, 106, -67, 14, -125, 28, -63, 40, + 86, -68, 104, -22, 124, -27, -84, -13, 43, -45, -30, -95, 95, 16, 79, 23, -66, -78, -74, 43, 86, + 70, -95, 90, -65, -1, -58, 54, 12, 47, -47, 28, 91, -54, -19, -75, -43, 12, -108, 12, 71, 38, + 118, -8, 1, 42, -113, -6, 1, -93, 118, 67, -79, 25, -80, 118, 34, -29, 0, -23, 86, 53, -118, 89, + 112, 0, -61, -88, 76, -24, 59, -75, 23, -1, 64, -80, -52, -40, 34, -50, -19, -127, 57, 79, 43, + 92, -113, -96, 73, 0, 33, 122, 42, 104, -62, -66, -108, -104, 45, -120, 69, -3, -20, -113, -40, + -70, -96, 72, -21, -95, 1, -16, -124, -87, 125, 56, -108, 7, -112, -104, 105, 80, -34, -93, 24, + -6, 35, -38, 42, -4, 23, -112, 40, 45, 106, -72, 29, 44, -36, -61, -8, -93, -34, 3, -41, -26, + 121, 6, 100, -14, -112, -117, -15, -120, -92, 44, -43, 94, -13, 121, -59, -82, -68, 7, -19, + -110, -121, -58, -118, -121, 92, -8, 33, -120, -28, -95, -31, -120, -62, 49, 51, 3, 68, 4, -56, + 51, -13, -90, 47, -16, -24, 63, 125, -11, -94, 99, 69, -84, -54, 127, 81, -120, 42, -47, -128, + -13, 38, 115, 59, -112, -30, -9, -116, 121, 63, 111, 32, -116, -2, 0, -33, 79, -67, 90, -65, + -108, -107, -5, -107, 11, -102, 91, 106, -42, 74, 45, -80, -65, 54, 36, 121, -125, -118, -34, + -51, 36, -85, -78, 86, 121, -103, -39, 35, -76, 17, 59, 68, -40, -43, -27, 63, -76, 126, -94, + 18, 87, 20, 92, 38, -6, -54, 9, 45, 93, -57, 53, -11, -44, -38, -24, -126, -40, -24, -35, -121, + -55, -87, -63, 70, -88, 13, -78, -89, 2, 50, 59, 4, -14, 81, 25, 34, -20, 87, 116, -76, -31, + -93, 15, 112, 61, -43, -11, -86, -25, 10, 41, 1, 60, 105, -42, -90, -44, 38, 98, 126, -128, 28, + 99, 20, -97, 105, -101, 27, -106, 13, -108, -18, 23, -79, 121, 57, 93, -16, -37, -82, -1, -128, + -67, 99, 117, 79, 85, 83, 12, 53, -101, -52, -75, 72, -128, -62, 45, -54, 11, 0, -58, -88, 11, + 121, 33, 86, -87, 31, -54, 109, -37, 10, 119, -9, 55, -7, 77, -52, 93, 64, -62, 115, -88, -4, + 67, -1, -37, 31, 107, 90, -109, -121, 71, 105, -123, 61, 75, 89, 108, -91, -6, 115, 45, 109, 10, + -35, -84, 41, 127, 104, -84, -70, -6, -118, 6, 110, 99, -7, -112, 15, -79, -20, 51, -41, 78, 25, + -97, -2, -121, -117, 7, -87, -76, 60, -7, -7, 0, 51, 91, 34, 85, 21, -1, 108, 41, 8, 126, -25, + -30, 68, 109, -52, -51, 1, -111, 11, -22, -70, -33, 95, 40, 6, 63, 52, -66, -20, -6, -104, 81, + 57, 22, 82, 119, 126, 76, -10, -108, -63, -123, 19, 23, -106, -1, 117, 26, 112, -85, -78, 81, + -116, 53, 86, -126, -80, 122, 36, 67, 18, 19, -114, 73, 125, -3, -69, 99, 10, -30, 19, 112, + -103, 0, -61, 47, -106, -45, -105, -107, -56, 23, 14, 51, -70, 30, -32, 30, 7, 22, -31, -41, 19, + -47, -64, -52, 119, -66, 54, -109, -87, 3, 95, -124, 94, -48, 36, -40, 13, 19, 91, -14, -115, + 103, 66, 20, 44, 47, 8, -40, 4, -114, -110, -47, -28, -108, 89, 0, -7, -71, -91, -43, 98, 8, + -85, -98, -113, 103, -71, 69, 14, -95, -36, 92, -17, -66, -95, 123, -15, 52, 88, -60, -23, 123, + -61, -4, -33, -45, 77, 57, -121, 119, 116, -40, -31, -15, 96, 54, -49, -44, 36, -37, 111, -45, + -17, 12, 14, 21, 105, 48, 51, 42, -89, 55, 61, -5, -2, -36, -88, 36, -35, -29, -7, -68, -28, + -76, 5, -38, -66, -72, 24, -120, 8, -86, -28, 0, 71, -89, 20, -40, -100, 61, -57, 52, 23, 66, + -2, -24, -7, 86, -100, 111, -114, -47, -25, -40, -61, -67, -104, 33, 49, 16, -115, 9, -64, 27, + 122, 34, -33, -89, -113, -50, 42, 111, -14, 110, 43, 32, -112, 101, -59, 28, 76, -2, -117, 47, + 5, -73, -75, 21, -91, 99, 81, 93, -17, -119, 68, -21, -84, -51, -64, -98, 58, -33, 77, 4, 18, + 116, 62, 111, -105, -13, 91, -92, 81, -34, 40, 17, -128, 85, -19, 20, 8, 92, 83, 10, 3, 40, 89, + 60, 109, -23, 59, -66, -22, 43, 124, 25, -105, 77, 14, 75, -111, 13, 45, -90, -108, -79, 78, + -45, -55, -44, -86, -20, -41, -11, 65, 76, -79, 91, -23, 77, -84, 114, -109, 2, -71, 68, 8, -31, + 99, 97, -104, -94, 69, 64, -16, -48, -78, 99, -58, -17, 95, 96, -64, 47, 96, -69, 60, 28, 114, + 64, -128, -128, 114, 28, -124, 72, -41, -48, 82, -6, 63, -27, -126, -86, -121, 0, 4, 4, 35, + -111, 66, 64, -61, 117, -92, 48, 88, -128, 116, 7, -24, -111, -55, 96, -59, -96, 49, -70, -41, + -47, 85, 86, -37, -32, 53, -49, 62, 68, 80, -37, 95, 29, -114, 11, -65, 90, -99, -97, 101, 96, + -88, 5, 34, 3, 23, -22, 42, 4, -4, 17, -121, 106, -60, 33, -38, -32, -8, 41, -87, -4, -35, -102, + 7, 18, 35, -7, 85, -18, 60, 15, 34, 82, 46, 68, 63, 80, -38, 4, 51, -74, -34, 83, -33, -8, 44, + 87, -18, -8, 46, -53, -109, -121, -114, 10, 63, -36, -1, -123, 69, 107, -58, 33, -11, 63, -117, + 60, 22, 73, -36, 22, -76, -92, -74, -37, -35, 87, 40, -97, -6, 95, -25, -2, -99, -101, 102, -48, + 45, -55, 85, 94, -48, 57, -100, 34, 16, -63, -16, 106, -75, -7, -109, 71, -74, 20, -16, 37, 90, + -61, 69, 19, -111, 95, -104, 116, 75, -68, 85, -80, 66, 127, 127, 67, -98, 121, 53, 23, -3, 56, + -89, 99, 57, 9, 122, 76, 119, 1, -117, 47, -105, -42, -7, 51, -8, -81, 48, -60, -69, -29, 24, + 19, -81, 43, 31, -36, 62, 96, 20, -58, 39, -122, -115, 7, -114, 118, 27, 27, 78, -101, 75, -93, + -104, -8, 119, 121, -97, -84, 58, 33, 18, -35, -29, 20, 20, 7, 112, 60, 31, -12, 7, -128, -55, + -68, -7, -12, -115, 97, 115, 44, -46, -68, 108, 36, 121, -1, 84, -4, -26, -126, 85, -32, 36, 26, + -19, 71, -121, -92, -51, -116, 81, -71, -83, -50, 21, -119, -60, -78, -84, 102, 19, -26, 118, + -53, -13, 16, 36, -64, -83, -66, 32, -99, 54, 83, 104, 61, -19, 107, 95, -66, -42, -6, 25, 86, + -13, -53, -49, -9, 74, -13, 58, 125, -96, -32, -22, -21, -12, -38, -114, -88, -100, 35, -87, + -108, -2, -103, 87, -119, -109, 50, -28, -101, -4, -43, 105, 119, -118, 103, -104, 41, 47, 71, + 53, 11, -53, 59, -13, -11, 83, -33, 28, 11, 78, -59, 73, -33, -60, 119, -73, -127, 98, 39, 77, + 21, -8, -103, 103, 44, -87, -52, -74, 56, -63, -70, -121, 40, 103, 7, -100, 113, 53, -46, 44, + 16, 31, 102, -31, 104, -38, -120, 118, -122, -55, 25, 1, 92, 22, -14, 24, 108, 92, -90, -93, + -16, -99, -13, -127, 75, 101, -42, -86, -29, -51, -49, -105, -118, 91, -56, -51, -73, 117, 53, + -39, -73, 121, 83, -49, -10, -86, 11, -97, 40, -33, 6, -40, -9, -32, 92, -101, -83, 116, -5, + -57, -93, -121, 2, 38, -65, -6, 45, 100, 92, 92, 74, 115, 45, -33, 92, -11, 70, 33, 76, 85, 94, + 1, -111, -103, 6, -4, -31, 44, -53, -77, -45, 100, -83, 92, -11, 10, -7, 126, 23, 36, 61, -18, + -28, 67, 126, 53, -45, -77, 95, 43, -73, 30, -37, 122, -53, -79, -77, -42, 71, -124, 43, -89, + 60, -80, -89, -68, 96, 29, 103, -50, -93, 105, 7 + }; + + static class ProxyStream extends FilterInputStream { + int readBytes; + + ProxyStream(InputStream is) { + super(is); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int result = super.read(b, off, len); + if (result > 0) { + readBytes += result; + } + return result; + } + } + + @Test + public void testEagerStream() throws IOException { + ProxyStream ps = new ProxyStream(new ByteArrayInputStream(DATA)); + BrotliInputStream reader = new BrotliInputStream(ps, 1); + byte[] buffer = new byte[1]; + reader.read(buffer); + reader.close(); + int normalReadBytes = ps.readBytes; + + ps = new ProxyStream(new ByteArrayInputStream(DATA)); + reader = new BrotliInputStream(ps, 1); + reader.setEager(true); + reader.read(buffer); + reader.close(); + int eagerReadBytes = ps.readBytes; + + // Did not continue decoding - suspended as soon as enough data was decoded. + assertTrue(eagerReadBytes < normalReadBytes); + } +} diff --git a/java/org/brotli/dec/State.java b/java/org/brotli/dec/State.java index 183df44..16d1072 100644 --- a/java/org/brotli/dec/State.java +++ b/java/org/brotli/dec/State.java @@ -63,16 +63,17 @@ final class State { int distancePostfixBits; int distance; int copyLength; - int copyDst; int maxBackwardDistance; int maxRingBufferSize; int ringBufferSize; + int ringBufferFence; int expectedTotalSize; int outputOffset; int outputLength; int outputUsed; - int bytesWritten; - int bytesToWrite; + int ringBufferBytesWritten; + int ringBufferBytesReady; + int isEager; InputStream input; // BitReader diff --git a/java/org/brotli/wrapper/dec/BrotliInputStream.java b/java/org/brotli/wrapper/dec/BrotliInputStream.java index d1aa76b..76bcf1d 100644 --- a/java/org/brotli/wrapper/dec/BrotliInputStream.java +++ b/java/org/brotli/wrapper/dec/BrotliInputStream.java @@ -34,6 +34,10 @@ public class BrotliInputStream extends InputStream { this(source, DEFAULT_BUFFER_SIZE); } + public void setEager(boolean eager) { + decoder.setEager(eager); + } + @Override public void close() throws IOException { decoder.close(); diff --git a/java/org/brotli/wrapper/dec/Decoder.java b/java/org/brotli/wrapper/dec/Decoder.java index 95060ae..0326403 100644 --- a/java/org/brotli/wrapper/dec/Decoder.java +++ b/java/org/brotli/wrapper/dec/Decoder.java @@ -19,6 +19,7 @@ public class Decoder { private final DecoderJNI.Wrapper decoder; ByteBuffer buffer; boolean closed; + boolean eager; /** * Creates a Decoder wrapper. @@ -47,6 +48,10 @@ public class Decoder { throw new IOException(message); } + public void setEager(boolean eager) { + this.eager = eager; + } + /** * Continue decoding. * @@ -71,6 +76,11 @@ public class Decoder { break; case NEEDS_MORE_INPUT: + // In "eager" more pulling preempts pushing. + if (eager && decoder.hasOutput()) { + buffer = decoder.pull(); + break; + } ByteBuffer inputBuffer = decoder.getInputBuffer(); inputBuffer.clear(); int bytesRead = source.read(inputBuffer); diff --git a/java/org/brotli/wrapper/dec/DecoderJNI.java b/java/org/brotli/wrapper/dec/DecoderJNI.java index 3a59053..320705c 100644 --- a/java/org/brotli/wrapper/dec/DecoderJNI.java +++ b/java/org/brotli/wrapper/dec/DecoderJNI.java @@ -12,13 +12,13 @@ import java.nio.ByteBuffer; /** * JNI wrapper for brotli decoder. */ -class DecoderJNI { +public class DecoderJNI { private static native ByteBuffer nativeCreate(long[] context); private static native void nativePush(long[] context, int length); private static native ByteBuffer nativePull(long[] context); private static native void nativeDestroy(long[] context); - enum Status { + public enum Status { ERROR, DONE, NEEDS_MORE_INPUT, @@ -26,12 +26,12 @@ class DecoderJNI { OK }; - static class Wrapper { - private final long[] context = new long[2]; + public static class Wrapper { + private final long[] context = new long[3]; private final ByteBuffer inputBuffer; private Status lastStatus = Status.NEEDS_MORE_INPUT; - Wrapper(int inputBufferSize) throws IOException { + public Wrapper(int inputBufferSize) throws IOException { this.context[1] = inputBufferSize; this.inputBuffer = nativeCreate(this.context); if (this.context[0] == 0) { @@ -39,7 +39,7 @@ class DecoderJNI { } } - void push(int length) { + public void push(int length) { if (length < 0) { throw new IllegalArgumentException("negative block length"); } @@ -71,19 +71,23 @@ class DecoderJNI { } } - Status getStatus() { + public Status getStatus() { return lastStatus; } - ByteBuffer getInputBuffer() { + public ByteBuffer getInputBuffer() { return inputBuffer; } - ByteBuffer pull() { + public boolean hasOutput() { + return context[2] != 0; + } + + public ByteBuffer pull() { if (context[0] == 0) { throw new IllegalStateException("brotli decoder is already destroyed"); } - if (lastStatus != Status.NEEDS_MORE_OUTPUT) { + if (lastStatus != Status.NEEDS_MORE_OUTPUT && !hasOutput()) { throw new IllegalStateException("pulling output from decoder in " + lastStatus + " state"); } ByteBuffer result = nativePull(context); @@ -94,7 +98,7 @@ class DecoderJNI { /** * Releases native resources. */ - void destroy() { + public void destroy() { if (context[0] == 0) { throw new IllegalStateException("brotli decoder is already destroyed"); } diff --git a/java/org/brotli/wrapper/dec/EagerStreamTest.java b/java/org/brotli/wrapper/dec/EagerStreamTest.java new file mode 100755 index 0000000..9166092 --- /dev/null +++ b/java/org/brotli/wrapper/dec/EagerStreamTest.java @@ -0,0 +1,75 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +package org.brotli.wrapper.dec; + +import static org.junit.Assert.assertEquals; + +import org.brotli.integration.BrotliJniTestBase; +import java.io.IOException; +import java.io.InputStream; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link org.brotli.wrapper.dec.BrotliInputStream}. */ +@RunWith(JUnit4.class) +public class EagerStreamTest extends BrotliJniTestBase { + + @Test + public void testEagerReading() throws IOException { + final StringBuilder log = new StringBuilder(); + final byte[] data = {0, 0, 16, 42, 3}; + InputStream source = new InputStream() { + int index; + + @Override + public int read() { + if (index < data.length) { + log.append("<").append(index); + return data[index++]; + } else { + log.append("<#"); + return -1; + } + } + + @Override + public int read(byte[] b) throws IOException { + return read(b, 0, b.length); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (len < 1) { + return 0; + } + int d = read(); + if (d == -1) { + return 0; + } + b[off] = (byte) d; + return 1; + } + }; + BrotliInputStream reader = new BrotliInputStream(source); + reader.setEager(true); + int count = 0; + while (true) { + log.append("^").append(count); + int b = reader.read(); + if (b == -1) { + log.append(">#"); + break; + } else { + log.append(">").append(count++); + } + } + // Lazy log: ^0<0<1<2<3<4>0^1># + assertEquals("^0<0<1<2<3>0^1<4>#", log.toString()); + } + +} diff --git a/java/org/brotli/wrapper/dec/decoder_jni.cc b/java/org/brotli/wrapper/dec/decoder_jni.cc index 19c022b..268a10b 100644 --- a/java/org/brotli/wrapper/dec/decoder_jni.cc +++ b/java/org/brotli/wrapper/dec/decoder_jni.cc @@ -45,10 +45,11 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativeCreate( JNIEnv* env, jobject /*jobj*/, jlongArray ctx) { bool ok = true; DecoderHandle* handle = nullptr; - jlong context[2]; - env->GetLongArrayRegion(ctx, 0, 2, context); + jlong context[3]; + env->GetLongArrayRegion(ctx, 0, 3, context); size_t input_size = context[1]; context[0] = 0; + context[2] = 0; handle = new (std::nothrow) DecoderHandle(); ok = !!handle; @@ -79,7 +80,7 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativeCreate( delete handle; } - env->SetLongArrayRegion(ctx, 0, 2, context); + env->SetLongArrayRegion(ctx, 0, 3, context); if (!ok) { return nullptr; @@ -105,11 +106,12 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativeCreate( JNIEXPORT void JNICALL Java_org_brotli_wrapper_dec_DecoderJNI_nativePush( JNIEnv* env, jobject /*jobj*/, jlongArray ctx, jint input_length) { - jlong context[2]; - env->GetLongArrayRegion(ctx, 0, 2, context); + jlong context[3]; + env->GetLongArrayRegion(ctx, 0, 3, context); DecoderHandle* handle = getHandle(reinterpret_cast(context[0])); context[1] = 0; /* ERROR */ - env->SetLongArrayRegion(ctx, 0, 2, context); + context[2] = 0; + env->SetLongArrayRegion(ctx, 0, 3, context); if (input_length != 0) { /* Still have unconsumed data. Workflow is broken. */ @@ -145,7 +147,8 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativePush( context[1] = 0; break; } - env->SetLongArrayRegion(ctx, 0, 2, context); + context[2] = BrotliDecoderHasMoreOutput(handle->state) ? 1 : 0; + env->SetLongArrayRegion(ctx, 0, 3, context); } /** @@ -158,12 +161,13 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativePush( JNIEXPORT jobject JNICALL Java_org_brotli_wrapper_dec_DecoderJNI_nativePull( JNIEnv* env, jobject /*jobj*/, jlongArray ctx) { - jlong context[2]; - env->GetLongArrayRegion(ctx, 0, 2, context); + jlong context[3]; + env->GetLongArrayRegion(ctx, 0, 3, context); DecoderHandle* handle = getHandle(reinterpret_cast(context[0])); size_t data_length = 0; const uint8_t* data = BrotliDecoderTakeOutput(handle->state, &data_length); - if (BrotliDecoderHasMoreOutput(handle->state)) { + bool hasMoreOutput = !!BrotliDecoderHasMoreOutput(handle->state); + if (hasMoreOutput) { context[1] = 3; } else if (BrotliDecoderIsFinished(handle->state)) { /* Bytes after stream end are not allowed. */ @@ -172,7 +176,8 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativePull( /* Can proceed, or more data is required? */ context[1] = (handle->input_offset == handle->input_length) ? 2 : 4; } - env->SetLongArrayRegion(ctx, 0, 2, context); + context[2] = hasMoreOutput ? 1 : 0; + env->SetLongArrayRegion(ctx, 0, 3, context); return env->NewDirectByteBuffer(const_cast(data), data_length); } @@ -184,8 +189,8 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativePull( JNIEXPORT void JNICALL Java_org_brotli_wrapper_dec_DecoderJNI_nativeDestroy( JNIEnv* env, jobject /*jobj*/, jlongArray ctx) { - jlong context[2]; - env->GetLongArrayRegion(ctx, 0, 2, context); + jlong context[3]; + env->GetLongArrayRegion(ctx, 0, 3, context); DecoderHandle* handle = getHandle(reinterpret_cast(context[0])); BrotliDecoderDestroyInstance(handle->state); delete[] handle->input_start; diff --git a/research/BUILD b/research/BUILD index 6ff5ac2..211b3e7 100755 --- a/research/BUILD +++ b/research/BUILD @@ -14,6 +14,13 @@ cc_library( ) cc_library( + name = "durchschlag", + srcs = ["durchschlag.cc"], + hdrs = ["durchschlag.h"], + deps = ["@divsufsort//:libdivsufsort"], +) + +cc_library( name = "sieve", srcs = ["sieve.cc"], hdrs = ["sieve.h"], @@ -24,6 +31,7 @@ cc_binary( srcs = ["dictionary_generator.cc"], deps = [ ":dm", + ":durchschlag", ":sieve", ], ) diff --git a/research/BUILD.libdivsufsort b/research/BUILD.libdivsufsort new file mode 100644 index 0000000..ce60e9c --- /dev/null +++ b/research/BUILD.libdivsufsort @@ -0,0 +1,55 @@ +package( + default_visibility = ["//visibility:public"], +) + +cc_library( + name = "libdivsufsort", + srcs = [ + "lib/divsufsort.c", + "lib/sssort.c", + "lib/trsort.c", + "lib/utils.c", + ], + hdrs = [ + "include/config.h", + "include/divsufsort.h", + "include/divsufsort_private.h", + ], + copts = [ + "-DHAVE_CONFIG_H=1", + ], + includes = ["include"], +) + +commom_awk_replaces = ( + "gsub(/#cmakedefine/, \"#define\"); " + + "gsub(/@DIVSUFSORT_EXPORT@/, \"\"); " + + "gsub(/@DIVSUFSORT_IMPORT@/, \"\"); " + + "gsub(/@INLINE@/, \"inline\"); " + + "gsub(/@INCFILE@/, \"#include \"); " + + "gsub(/@SAUCHAR_TYPE@/, \"uint8_t\"); " + + "gsub(/@SAINT32_TYPE@/, \"int32_t\"); " + + "gsub(/@SAINT_PRId@/, \"PRId32\"); " +) + +genrule( + name = "config_h", + srcs = ["include/config.h.cmake"], + outs = ["include/config.h"], + cmd = ("awk '{ " + + "gsub(/@HAVE_IO_H 1@/, \"HAVE_IO_H 0\"); " + + commom_awk_replaces + + "print; }' $(<) > $(@)"), +) + +genrule( + name = "divsufsort_h", + srcs = ["include/divsufsort.h.cmake"], + outs = ["include/divsufsort.h"], + cmd = ("awk '{ " + + "gsub(/@W64BIT@/, \"\"); " + + "gsub(/@SAINDEX_TYPE@/, \"int32_t\"); " + + "gsub(/@SAINDEX_PRId@/, \"PRId32\"); " + + commom_awk_replaces + + "print; }' $(<) > $(@)"), +) diff --git a/research/deorummolae.cc b/research/deorummolae.cc index c53a53c..d15b7ee 100644 --- a/research/deorummolae.cc +++ b/research/deorummolae.cc @@ -15,20 +15,31 @@ /* Non tunable definitions. */ #define CHUNK_MASK (CHUNK_SIZE - 1) -#define COVERAGE_SIZE (1 << (LOG_MAX_FILES - 6)) +#define COVERAGE_SIZE (1 << (DM_LOG_MAX_FILES - 6)) /* File coverage: every bit set to 1 denotes a file covered by an isle. */ typedef std::array Coverage; -static int popcount(uint64_t u) { return __builtin_popcountll(u); } +/* Symbol of text alphabet. */ +typedef int32_t TextChar; + +/* Pointer to position in text. */ +typedef uint32_t TextIdx; + +/* SAIS sarray_type; unfortunately, must be a signed type. */ +typedef int32_t TextSaIdx; + +static size_t popcount(uint64_t u) { + return static_cast(__builtin_popcountll(u)); +} /* Condense terminators and pad file entries. */ -static void rewriteText(std::vector* text) { - int terminator = text->back(); - int prev = terminator; - size_t to = 0; - for (size_t from = 0; from < text->size(); ++from) { - int next = text->at(from); +static void rewriteText(std::vector* text) { + TextChar terminator = text->back(); + TextChar prev = terminator; + TextIdx to = 0; + for (TextIdx from = 0; from < text->size(); ++from) { + TextChar next = text->at(from); if (next < 256 || prev < 256) { text->at(to++) = next; if (next >= 256) terminator = next; @@ -41,11 +52,12 @@ static void rewriteText(std::vector* text) { } /* Reenumerate terminators for smaller alphabet. */ -static void remapTerminators(std::vector* text, int* next_terminator) { - int prev = -1; - int x = 256; - for (size_t i = 0; i < text->size(); ++i) { - int next = text->at(i); +static void remapTerminators(std::vector* text, + TextChar* next_terminator) { + TextChar prev = -1; + TextChar x = 256; + for (TextIdx i = 0; i < text->size(); ++i) { + TextChar next = text->at(i); if (next < 256) { // Char. // Do nothing. } else if (prev < 256) { // Terminator after char. @@ -60,15 +72,15 @@ static void remapTerminators(std::vector* text, int* next_terminator) { } /* Combine all file entries; create mapping position->file. */ -static void buildFullText(std::vector>* data, - std::vector* full_text, std::vector* file_map, - std::vector* file_offset, int* next_terminator) { +static void buildFullText(std::vector>* data, + std::vector* full_text, std::vector* file_map, + std::vector* file_offset, TextChar* next_terminator) { file_map->resize(0); file_offset->resize(0); full_text->resize(0); - for (size_t i = 0; i < data->size(); ++i) { + for (TextIdx i = 0; i < data->size(); ++i) { file_offset->push_back(full_text->size()); - std::vector& file = data->at(i); + std::vector& file = data->at(i); rewriteText(&file); full_text->insert(full_text->end(), file.begin(), file.end()); file_map->insert(file_map->end(), file.size() / CHUNK_SIZE, i); @@ -78,18 +90,19 @@ static void buildFullText(std::vector>* data, /* Build longest-common-prefix based on suffix array and text. TODO: borrowed -> unknown efficiency. */ -static void buildLcp(std::vector* text, std::vector* sa, - std::vector* lcp, std::vector* invese_sa) { - int size = static_cast(text->size()); +static void buildLcp(std::vector* text, std::vector* sa, + std::vector* lcp, std::vector* invese_sa) { + TextIdx size = static_cast(text->size()); lcp->resize(size); - int k = 0; + TextIdx k = 0; lcp->at(size - 1) = 0; - for (int i = 0; i < size; ++i) { + for (TextIdx i = 0; i < size; ++i) { if (invese_sa->at(i) == size - 1) { k = 0; continue; } - int j = sa->at(invese_sa->at(i) + 1); // Suffix which follow i-th suffix. + // Suffix which follow i-th suffix. + TextIdx j = sa->at(invese_sa->at(i) + 1); while (i + k < size && j + k < size && text->at(i + k) == text->at(j + k)) { ++k; } @@ -102,21 +115,21 @@ static void buildLcp(std::vector* text, std::vector* sa, When we raise the LCP requirement, the isle sunks and smaller isles appear instead. */ typedef struct { - int lcp; - int l; - int r; + TextIdx lcp; + TextIdx l; + TextIdx r; Coverage coverage; } Isle; /* Helper routine for `cutMatch`. */ -static void poisonData(int pos, int length, std::vector>* data, - std::vector* file_map, std::vector* file_offset, - int* next_terminator) { - size_t f = file_map->at(pos / CHUNK_SIZE); +static void poisonData(TextIdx pos, TextIdx length, + std::vector>* data, std::vector* file_map, + std::vector* file_offset, TextChar* next_terminator) { + TextIdx f = file_map->at(pos / CHUNK_SIZE); pos -= file_offset->at(f); - std::vector& file = data->at(f); - int l = (length == CUT_MATCH) ? CUT_MATCH : 1; - for (int j = 0; j < l; j++, pos++) { + std::vector& file = data->at(f); + TextIdx l = (length == CUT_MATCH) ? CUT_MATCH : 1; + for (TextIdx j = 0; j < l; j++, pos++) { if (file[pos] >= 256) continue; if (file[pos + 1] >= 256) { file[pos] = file[pos + 1]; @@ -131,12 +144,12 @@ static void poisonData(int pos, int length, std::vector>* data, /* Remove substrings of a given match from files. Substrings are replaced with unique terminators, so next iteration SA would not allow to cross removed areas. */ -static void cutMatch(std::vector>* data, int index, int length, - std::vector* sa, std::vector* lcp, std::vector* invese_sa, - int* next_terminator, std::vector* file_map, - std::vector* file_offset) { +static void cutMatch(std::vector>* data, TextIdx index, + TextIdx length, std::vector* sa, std::vector* lcp, + std::vector* invese_sa, TextChar* next_terminator, + std::vector* file_map, std::vector* file_offset) { while (length >= CUT_MATCH) { - int i = index; + TextIdx i = index; while (lcp->at(i) >= length) { i++; poisonData( @@ -156,54 +169,70 @@ static void cutMatch(std::vector>* data, int index, int length, std::string DM_generate(size_t dictionary_size_limit, const std::vector& sample_sizes, const uint8_t* sample_data) { { - uint64_t tmp = 0; - if (popcount(tmp - 1u) != 64) { - fprintf(stderr, "64-bit platform is required\n"); - return 0; + TextIdx tmp = static_cast(dictionary_size_limit); + if ((tmp != dictionary_size_limit) || (tmp > 1u << 30)) { + fprintf(stderr, "dictionary_size_limit is too large\n"); + return ""; } } /* Could use 256 + '0' for easier debugging. */ - int next_terminator = 256; + TextChar next_terminator = 256; std::string output; - std::vector> data; + std::vector> data; - size_t offset = 0; + TextIdx offset = 0; size_t num_samples = sample_sizes.size(); - if (num_samples > MAX_FILES) num_samples = MAX_FILES; + if (num_samples > DM_MAX_FILES) num_samples = DM_MAX_FILES; for (size_t n = 0; n < num_samples; ++n) { - size_t next_offset = offset + sample_sizes[n]; + TextIdx delta = static_cast(sample_sizes[n]); + if (delta != sample_sizes[n]) { + fprintf(stderr, "sample is too large\n"); + return ""; + } + if (delta == 0) { + fprintf(stderr, "0-length samples are prohibited\n"); + return ""; + } + TextIdx next_offset = offset + delta; + if (next_offset <= offset) { + fprintf(stderr, "corpus is too large\n"); + return ""; + } data.push_back( - std::vector(sample_data + offset, sample_data + next_offset)); + std::vector(sample_data + offset, sample_data + next_offset)); offset = next_offset; data.back().push_back(next_terminator++); } /* Most arrays are allocated once, and then just resized to smaller and smaller sizes. */ - std::vector full_text; - std::vector file_map; - std::vector file_offset; - std::vector sa; - std::vector invese_sa; - std::vector lcp; + std::vector full_text; + std::vector file_map; + std::vector file_offset; + std::vector sa; + std::vector invese_sa; + std::vector lcp; std::vector isles; std::vector output_data; - size_t total = 0; - size_t total_cost = 0; - size_t best_cost; + TextIdx total = 0; + TextIdx total_cost = 0; + TextIdx best_cost; Isle best_isle; - int min_count = num_samples; + size_t min_count = num_samples; while (true) { - size_t max_match = dictionary_size_limit - total; + TextIdx max_match = static_cast(dictionary_size_limit) - total; buildFullText(&data, &full_text, &file_map, &file_offset, &next_terminator); sa.resize(full_text.size()); - saisxx(full_text.data(), sa.data(), static_cast(full_text.size()), - next_terminator); + /* Hopefully, non-negative TextSaIdx is the same sa TextIdx counterpart. */ + saisxx(full_text.data(), reinterpret_cast(sa.data()), + static_cast(full_text.size()), next_terminator); invese_sa.resize(full_text.size()); - for (int i = 0; i < full_text.size(); ++i) invese_sa[sa[i]] = i; + for (TextIdx i = 0; i < full_text.size(); ++i) { + invese_sa[sa[i]] = i; + } buildLcp(&full_text, &sa, &lcp, &invese_sa); /* Do not rebuild SA/LCP, just use different selection. */ @@ -213,22 +242,22 @@ std::string DM_generate(size_t dictionary_size_limit, isles.resize(0); isles.push_back(best_isle); - for (int i = 0; i < static_cast(lcp.size()); ++i) { - int l = i; + for (TextIdx i = 0; i < lcp.size(); ++i) { + TextIdx l = i; Coverage cov = {{0}}; - int f = file_map[sa[i] / CHUNK_SIZE]; - cov[f >> 6] = ((uint64_t)1) << (f & 63); + size_t f = file_map[sa[i] / CHUNK_SIZE]; + cov[f >> 6] = (static_cast(1)) << (f & 63); while (lcp[i] < isles.back().lcp) { Isle& top = isles.back(); top.r = i; l = top.l; for (size_t x = 0; x < cov.size(); ++x) cov[x] |= top.coverage[x]; - int count = 0; + size_t count = 0; for (size_t x = 0; x < cov.size(); ++x) count += popcount(cov[x]); - int effective_lcp = top.lcp; + TextIdx effective_lcp = top.lcp; /* Restrict (last) dictionary entry length. */ if (effective_lcp > max_match) effective_lcp = max_match; - int cost = count * effective_lcp; + TextIdx cost = count * effective_lcp; if (cost > best_cost && count >= min_count && effective_lcp >= MIN_MATCH) { best_cost = cost; @@ -251,14 +280,14 @@ std::string DM_generate(size_t dictionary_size_limit, if (best_cost == 0 || best_isle.lcp < MIN_MATCH) { if (min_count >= 8) { min_count = (min_count * 7) / 8; - fprintf(stderr, "Retry: min_count=%d\n", min_count); + fprintf(stderr, "Retry: min_count=%zu\n", min_count); goto retry; } break; } /* Save the entry. */ - fprintf(stderr, "Savings: %zu+%zu, dictionary: %zu+%d\n", + fprintf(stderr, "Savings: %d+%d, dictionary: %d+%d\n", total_cost, best_cost, total, best_isle.lcp); int* piece = &full_text[sa[best_isle.l]]; output.insert(output.end(), piece, piece + best_isle.lcp); diff --git a/research/deorummolae.h b/research/deorummolae.h index 7f24add..5815097 100644 --- a/research/deorummolae.h +++ b/research/deorummolae.h @@ -1,17 +1,16 @@ #ifndef BROTLI_RESEARCH_DEORUMMOLAE_H_ #define BROTLI_RESEARCH_DEORUMMOLAE_H_ -#include -#include - +#include +#include #include #include /* log2(maximal number of files). Value 6 provides some speedups. */ -#define LOG_MAX_FILES 6 +#define DM_LOG_MAX_FILES 6 /* Non tunable definitions. */ -#define MAX_FILES (1 << LOG_MAX_FILES) +#define DM_MAX_FILES (1 << DM_LOG_MAX_FILES) /** * Generate a dictionary for given samples. diff --git a/research/dictionary_generator.cc b/research/dictionary_generator.cc index b3ee89c..00cfaba 100755 --- a/research/dictionary_generator.cc +++ b/research/dictionary_generator.cc @@ -1,15 +1,20 @@ +#include #include #include #include #include #include "./deorummolae.h" +#include "./durchschlag.h" #include "./sieve.h" #define METHOD_DM 0 #define METHOD_SIEVE 1 +#define METHOD_DURCHSCHLAG 2 +#define METHOD_DISTILL 3 +#define METHOD_PURIFY 4 -size_t readInt(const char* str) { +static size_t readInt(const char* str) { size_t result = 0; if (str[0] == 0 || str[0] == '0') { return 0; @@ -51,10 +56,25 @@ static std::string readFile(const std::string& path) { static void writeFile(const char* file, const std::string& content) { std::ofstream outfile(file, std::ofstream::binary); - outfile.write(content.c_str(), content.size()); + outfile.write(content.c_str(), static_cast(content.size())); outfile.close(); } +static void writeSamples(char const* argv[], const std::vector& pathArgs, + const std::vector& sizes, const uint8_t* data) { + size_t offset = 0; + for (size_t i = 0; i < pathArgs.size(); ++i) { + int j = pathArgs[i]; + const char* file = argv[j]; + size_t sampleSize = sizes[i]; + std::ofstream outfile(file, std::ofstream::binary); + outfile.write(reinterpret_cast(data + offset), + static_cast(sampleSize)); + outfile.close(); + offset += sampleSize; + } +} + /* Returns "base file name" or its tail, if it contains '/' or '\'. */ static const char* fileName(const char* path) { const char* separator_position = strrchr(path, '/'); @@ -68,21 +88,32 @@ static void printHelp(const char* name) { fprintf(stderr, "Usage: %s [OPTION]... DICTIONARY [SAMPLE]...\n", name); fprintf(stderr, "Options:\n" - " --dm use 'deorummolae' engine\n" - " --sieve use 'sieve' engine (default)\n" - " -t# set target dictionary size (limit); default: 16K\n" - " -s# set slize length for 'sieve'; default: 33\n" - "# is a decimal number with optional k/K/m/M suffix.\n\n"); + " --dm use 'deorummolae' engine\n" + " --distill rewrite samples; unique text parts are removed\n" + " --dsh use 'durchschlag' engine (default)\n" + " --purify rewrite samples; unique text parts are zeroed out\n" + " --sieve use 'sieve' engine\n" + " -b# set block length for 'durchschlag'; default: 1024\n" + " -s# set slice length for 'distill', 'durchschlag', 'purify'\n" + " and 'sieve'; default: 16\n" + " -t# set target dictionary size (limit); default: 16K\n" + " -u# set minimum slice population (for rewrites); default: 2\n" + "# is a decimal number with optional k/K/m/M suffix.\n" + "WARNING: 'distill' and 'purify' will overwrite original samples!\n" + " Completely unique samples might become empty files.\n\n"); } int main(int argc, char const* argv[]) { int dictionaryArg = -1; - int method = METHOD_SIEVE; - int sieveSliceLen = 33; - int targetSize = 16 << 10; + int method = METHOD_DURCHSCHLAG; + size_t sliceLen = 16; + size_t targetSize = 16 << 10; + size_t blockSize = 1024; + size_t minimumPopulation = 2; std::vector data; std::vector sizes; + std::vector pathArgs; size_t total = 0; for (int i = 1; i < argc; ++i) { if (argv[i] == nullptr) { @@ -90,6 +121,12 @@ int main(int argc, char const* argv[]) { } if (argv[i][0] == '-') { if (argv[i][1] == '-') { + if (dictionaryArg != -1) { + fprintf(stderr, + "Method should be specified before dictionary / sample '%s'\n", + argv[i]); + exit(1); + } if (std::strcmp("--sieve", argv[i]) == 0) { method = METHOD_SIEVE; continue; @@ -98,13 +135,32 @@ int main(int argc, char const* argv[]) { method = METHOD_DM; continue; } + if (std::strcmp("--dsh", argv[i]) == 0) { + method = METHOD_DURCHSCHLAG; + continue; + } + if (std::strcmp("--distill", argv[i]) == 0) { + method = METHOD_DISTILL; + continue; + } + if (std::strcmp("--purify", argv[i]) == 0) { + method = METHOD_PURIFY; + continue; + } printHelp(fileName(argv[0])); fprintf(stderr, "Invalid option '%s'\n", argv[i]); exit(1); } - if (argv[i][1] == 's') { - sieveSliceLen = readInt(&argv[i][2]); - if (sieveSliceLen < 4 || sieveSliceLen > 256) { + if (argv[i][1] == 'b') { + blockSize = readInt(&argv[i][2]); + if (blockSize < 16 || blockSize > 65536) { + printHelp(fileName(argv[0])); + fprintf(stderr, "Invalid option '%s'\n", argv[i]); + exit(1); + } + } else if (argv[i][1] == 's') { + sliceLen = readInt(&argv[i][2]); + if (sliceLen < 4 || sliceLen > 256) { printHelp(fileName(argv[0])); fprintf(stderr, "Invalid option '%s'\n", argv[i]); exit(1); @@ -116,6 +172,13 @@ int main(int argc, char const* argv[]) { fprintf(stderr, "Invalid option '%s'\n", argv[i]); exit(1); } + } else if (argv[i][1] == 'u') { + minimumPopulation = readInt(&argv[i][2]); + if (minimumPopulation < 256 || minimumPopulation > 65536) { + printHelp(fileName(argv[0])); + fprintf(stderr, "Invalid option '%s'\n", argv[i]); + exit(1); + } } else { printHelp(fileName(argv[0])); fprintf(stderr, "Unrecognized option '%s'\n", argv[i]); @@ -124,26 +187,42 @@ int main(int argc, char const* argv[]) { continue; } if (dictionaryArg == -1) { - dictionaryArg = i; - continue; + if (method != METHOD_DISTILL && method != METHOD_PURIFY) { + dictionaryArg = i; + continue; + } } std::string content = readFile(argv[i]); data.insert(data.end(), content.begin(), content.end()); total += content.size(); + pathArgs.push_back(i); sizes.push_back(content.size()); } - if (dictionaryArg == -1 || total == 0) { + bool wantDictionary = (dictionaryArg == -1); + if (method == METHOD_DISTILL || method == METHOD_PURIFY) { + wantDictionary = false; + } + if (wantDictionary || total == 0) { printHelp(fileName(argv[0])); fprintf(stderr, "Not enough arguments\n"); exit(1); } if (method == METHOD_SIEVE) { - writeFile(argv[dictionaryArg], - sieve_generate(targetSize, sieveSliceLen, sizes, data.data())); + writeFile(argv[dictionaryArg], sieve_generate( + targetSize, sliceLen, sizes, data.data())); } else if (method == METHOD_DM) { - writeFile(argv[dictionaryArg], - DM_generate(targetSize, sizes, data.data())); + writeFile(argv[dictionaryArg], DM_generate( + targetSize, sizes, data.data())); + } else if (method == METHOD_DURCHSCHLAG) { + writeFile(argv[dictionaryArg], durchschlag_generate( + targetSize, sliceLen, blockSize, sizes, data.data())); + } else if (method == METHOD_DISTILL) { + durchschlag_distill(sliceLen, minimumPopulation, &sizes, data.data()); + writeSamples(argv, pathArgs, sizes, data.data()); + } else if (method == METHOD_PURIFY) { + durchschlag_purify(sliceLen, minimumPopulation, sizes, data.data()); + writeSamples(argv, pathArgs, sizes, data.data()); } else { printHelp(fileName(argv[0])); fprintf(stderr, "Unknown generator\n"); diff --git a/research/draw_diff.cc b/research/draw_diff.cc index 01b6716..6541dac 100644 --- a/research/draw_diff.cc +++ b/research/draw_diff.cc @@ -20,18 +20,23 @@ #define CHECK(X) if (!(X)) exit(EXIT_FAILURE); #endif -void ReadPGM(FILE* f, uint8_t*** image, size_t* height, size_t* width) { +typedef uint8_t* ScanLine; +typedef ScanLine* Image; + +void ReadPGM(FILE* f, Image* image, size_t* height, size_t* width) { int colors; CHECK(fscanf(f, "P5\n%lu %lu\n%d\n", width, height, &colors) == 3); assert(colors == 255); - *image = new uint8_t*[*height]; + ScanLine* lines = new ScanLine[*height]; + *image = lines; for (int i = *height - 1; i >= 0; --i) { - (*image)[i] = new uint8_t[*width]; - CHECK(fread((*image)[i], 1, *width, f) == *width); + ScanLine line = new uint8_t[*width]; + lines[i] = line; + CHECK(fread(line, 1, *width, f) == *width); } } -void CalculateDiff(int** diff, uint8_t** image1, uint8_t** image2, +void CalculateDiff(int** diff, Image image1, Image image2, size_t height, size_t width) { for (size_t i = 0; i < height; ++i) { for (size_t j = 0; j < width; ++j) { @@ -40,7 +45,7 @@ void CalculateDiff(int** diff, uint8_t** image1, uint8_t** image2, } } -void DrawDiff(int** diff, uint8_t** image1, uint8_t** image2, +void DrawDiff(int** diff, Image image1, Image image2, size_t height, size_t width, FILE* f) { int max = -1234; int min = +1234; @@ -78,13 +83,13 @@ void DrawDiff(int** diff, uint8_t** image1, uint8_t** image2, delete[] row; } -int main(int argc, char* argv[]) { +int main(int argc, char** argv) { if (argc != 4) { printf("usage: %s pgm1 pgm2 diff_ppm_path\n", argv[0]); return 1; } - uint8_t **image1, **image2; + Image image1, image2; size_t h1, w1, h2, w2; FILE* fimage1 = fopen(argv[1], "rb"); diff --git a/research/durchschlag.cc b/research/durchschlag.cc new file mode 100755 index 0000000..cc4ed68 --- /dev/null +++ b/research/durchschlag.cc @@ -0,0 +1,714 @@ +#include "./durchschlag.h" + +#include +#include /* terminate */ + +#include "divsufsort.h" + +/* Pointer to position in text. */ +typedef DurchschlagTextIdx TextIdx; + +/* (Sum of) value(s) of slice(s). */ +typedef uint32_t Score; + +typedef struct HashSlot { + TextIdx next; + TextIdx offset; +} HashSlot; + +typedef struct MetaSlot { + TextIdx mark; + Score score; +} MetaSlot; + +typedef struct Range { + TextIdx start; + TextIdx end; +} Range; + +typedef struct Candidate { + Score score; + TextIdx position; +} Candidate; + +struct greaterScore { + bool operator()(const Candidate& a, const Candidate& b) const { + return (a.score > b.score) || + ((a.score == b.score) && (a.position < b.position)); + } +}; + +struct lessScore { + bool operator()(const Candidate& a, const Candidate& b) const { + return (a.score < b.score) || + ((a.score == b.score) && (a.position > b.position)); + } +}; + +#define CANDIDATE_BUNDLE_SIZE (1 << 18) + +static void fatal(const char* error) { + fprintf(stderr, "%s\n", error); + std::terminate(); +} + +static TextIdx calculateDictionarySize(const std::vector& ranges) { + TextIdx result = 0; + for (size_t i = 0; i < ranges.size(); ++i) { + const Range& r = ranges[i]; + result += r.end - r.start; + } + return result; +} + +static std::string createDictionary( + const uint8_t* data, const std::vector& ranges, size_t limit) { + std::string output; + output.reserve(calculateDictionarySize(ranges)); + for (size_t i = 0; i < ranges.size(); ++i) { + const Range& r = ranges[i]; + output.insert(output.end(), &data[r.start], &data[r.end]); + } + if (output.size() > limit) { + output.resize(limit); + } + return output; +} + +static Score buildCandidatesList(std::vector* candidates, + std::vector* map, TextIdx span, const TextIdx* shortcut, + TextIdx end) { + candidates->resize(0); + + size_t n = map->size(); + MetaSlot* slots = map->data(); + for (size_t j = 0; j < n; ++j) { + slots[j].mark = 0; + } + + Score score = 0; + for (size_t j = 0; j < span; ++j) { + MetaSlot& item = slots[shortcut[j]]; + if (item.mark == 0) { + score += item.score; + } + item.mark++; + } + + TextIdx i = 0; + TextIdx limit = std::min(end, CANDIDATE_BUNDLE_SIZE); + Score maxScore = 0; + for (; i < limit; ++i) { + MetaSlot& pick = slots[shortcut[i + span]]; + if (pick.mark == 0) { + score += pick.score; + } + pick.mark++; + + if (score > maxScore) { + maxScore = score; + } + candidates->push_back({score, i}); + + MetaSlot& drop = slots[shortcut[i]]; + drop.mark--; + if (drop.mark == 0) { + score -= drop.score; + } + } + + std::make_heap(candidates->begin(), candidates->end(), greaterScore()); + Score minScore = candidates->at(0).score; + for (; i < end; ++i) { + MetaSlot& pick = slots[shortcut[i + span]]; + if (pick.mark == 0) { + score += pick.score; + } + pick.mark++; + + if (score > maxScore) { + maxScore = score; + } + if (score >= minScore) { + candidates->push_back({score, i}); + std::push_heap(candidates->begin(), candidates->end(), greaterScore()); + if (candidates->size() > CANDIDATE_BUNDLE_SIZE && maxScore != minScore) { + while (candidates->at(0).score == minScore) { + std::pop_heap(candidates->begin(), candidates->end(), greaterScore()); + candidates->pop_back(); + } + minScore = candidates->at(0).score; + } + } + + MetaSlot& drop = slots[shortcut[i]]; + drop.mark--; + if (drop.mark == 0) { + score -= drop.score; + } + } + + for (size_t j = 0; j < n; ++j) { + slots[j].mark = 0; + } + + std::make_heap(candidates->begin(), candidates->end(), lessScore()); + return minScore; +} + +static Score rebuildCandidatesList(std::vector* candidates, + std::vector* map, TextIdx span, const TextIdx* shortcut, + TextIdx end, TextIdx* next) { + size_t n = candidates->size(); + TextIdx* data = candidates->data(); + for (size_t i = 0; i < n; ++i) { + data[i] = 0; + } + + n = map->size(); + MetaSlot* slots = map->data(); + for (size_t i = 0; i < n; ++i) { + slots[i].mark = 0; + } + + Score score = 0; + for (TextIdx i = 0; i < span; ++i) { + MetaSlot& item = slots[shortcut[i]]; + if (item.mark == 0) { + score += item.score; + } + item.mark++; + } + + Score maxScore = 0; + for (TextIdx i = 0; i < end; ++i) { + MetaSlot& pick = slots[shortcut[i + span]]; + if (pick.mark == 0) { + score += pick.score; + } + pick.mark++; + + if (candidates->size() <= score) { + candidates->resize(score + 1); + } + if (score > maxScore) { + maxScore = score; + } + next[i] = candidates->at(score); + candidates->at(score) = i; + + MetaSlot& drop = slots[shortcut[i]]; + drop.mark--; + if (drop.mark == 0) { + score -= drop.score; + } + } + + for (size_t i = 0; i < n; ++i) { + slots[i].mark = 0; + } + + candidates->resize(maxScore + 1); + return maxScore; +} + +static void addRange(std::vector* ranges, TextIdx start, TextIdx end) { + for (auto it = ranges->begin(); it != ranges->end();) { + if (end < it->start) { + ranges->insert(it, {start, end}); + return; + } + if (it->end < start) { + it++; + continue; + } + // Combine with existing. + start = std::min(start, it->start); + end = std::max(end, it->end); + // Remove consumed vector and continue. + it = ranges->erase(it); + } + ranges->push_back({start, end}); +} + +std::string durchschlag_generate( + size_t dictionary_size_limit, size_t slice_len, size_t block_len, + const std::vector& sample_sizes, const uint8_t* sample_data) { + DurchschlagContext ctx = durchschlag_prepare( + slice_len, sample_sizes, sample_data); + return durchschlag_generate(DURCHSCHLAG_COLLABORATIVE, + dictionary_size_limit, block_len, ctx, sample_data); +} + +DurchschlagContext durchschlag_prepare(size_t slice_len, + const std::vector& sample_sizes, const uint8_t* sample_data) { + /* Parameters aliasing */ + TextIdx sliceLen = static_cast(slice_len); + if (sliceLen != slice_len) fatal("slice_len is too large"); + if (sliceLen < 1) fatal("slice_len is too small"); + const uint8_t* data = sample_data; + + TextIdx total = 0; + std::vector offsets; + offsets.reserve(sample_sizes.size()); + for (size_t i = 0; i < sample_sizes.size(); ++i) { + TextIdx delta = static_cast(sample_sizes[i]); + if (delta != sample_sizes[i]) fatal("sample is too large"); + if (delta == 0) fatal("0-length samples are prohibited"); + TextIdx next_total = total + delta; + if (next_total <= total) fatal("corpus is too large"); + total = next_total; + offsets.push_back(total); + } + + if (total < sliceLen) fatal("slice_len is larger than corpus size"); + TextIdx end = total - static_cast(sliceLen) + 1; + TextIdx hashLen = 11; + while (hashLen < 29 && ((1u << hashLen) < end)) { + hashLen += 3; + } + hashLen -= 3; + TextIdx hashMask = (1u << hashLen) - 1u; + std::vector hashHead(1 << hashLen); + TextIdx hash = 0; + TextIdx lShift = 3; + TextIdx rShift = hashLen - lShift; + for (TextIdx i = 0; i < sliceLen - 1; ++i) { + TextIdx v = data[i]; + hash = (((hash << lShift) | (hash >> rShift)) & hashMask) ^ v; + } + TextIdx lShiftX = (lShift * (sliceLen - 1)) % hashLen; + TextIdx rShiftX = hashLen - lShiftX; + + std::vector map; + map.push_back({0, 0}); + TextIdx hashSlot = 1; + std::vector sliceMap; + sliceMap.reserve(end); + for (TextIdx i = 0; i < end; ++i) { + TextIdx v = data[i + sliceLen - 1]; + TextIdx bucket = (((hash << lShift) | (hash >> rShift)) & hashMask) ^ v; + v = data[i]; + hash = bucket ^ (((v << lShiftX) | (v >> rShiftX)) & hashMask); + TextIdx slot = hashHead[bucket]; + while (slot != 0) { + HashSlot& item = map[slot]; + TextIdx start = item.offset; + bool miss = false; + for (TextIdx j = 0; j < sliceLen; ++j) { + if (data[i + j] != data[start + j]) { + miss = true; + break; + } + } + if (!miss) { + sliceMap.push_back(slot); + break; + } + slot = item.next; + } + if (slot == 0) { + map.push_back({hashHead[bucket], i}); + hashHead[bucket] = hashSlot; + sliceMap.push_back(hashSlot); + hashSlot++; + } + } + + return {total, sliceLen, static_cast(map.size()), + std::move(offsets), std::move(sliceMap)}; +} + +DurchschlagContext durchschlag_prepare(size_t slice_len, + const std::vector& sample_sizes, const DurchschlagIndex& index) { + /* Parameters aliasing */ + TextIdx sliceLen = static_cast(slice_len); + if (sliceLen != slice_len) fatal("slice_len is too large"); + if (sliceLen < 1) fatal("slice_len is too small"); + const TextIdx* lcp = index.lcp.data(); + const TextIdx* sa = index.sa.data(); + + TextIdx total = 0; + std::vector offsets; + offsets.reserve(sample_sizes.size()); + for (size_t i = 0; i < sample_sizes.size(); ++i) { + TextIdx delta = static_cast(sample_sizes[i]); + if (delta != sample_sizes[i]) fatal("sample is too large"); + if (delta == 0) fatal("0-length samples are prohibited"); + TextIdx next_total = total + delta; + if (next_total <= total) fatal("corpus is too large"); + total = next_total; + offsets.push_back(total); + } + + if (total < sliceLen) fatal("slice_len is larger than corpus size"); + TextIdx counter = 1; + TextIdx end = total - sliceLen + 1; + std::vector sliceMap(total); + TextIdx last = 0; + TextIdx current = 1; + while (current <= total) { + if (lcp[current - 1] < sliceLen) { + for (TextIdx i = last; i < current; ++i) { + sliceMap[sa[i]] = counter; + } + counter++; + last = current; + } + current++; + } + sliceMap.resize(end); + + // Reorder items for the better locality. + std::vector reorder(counter); + counter = 1; + for (TextIdx i = 0; i < end; ++i) { + if (reorder[sliceMap[i]] == 0) { + reorder[sliceMap[i]] = counter++; + } + } + for (TextIdx i = 0; i < end; ++i) { + sliceMap[i] = reorder[sliceMap[i]]; + } + + return {total, sliceLen, counter, std::move(offsets), std::move(sliceMap)}; +} + +DurchschlagIndex durchschlag_index(const std::vector& data) { + TextIdx total = static_cast(data.size()); + if (total != data.size()) fatal("corpus is too large"); + saidx_t saTotal = static_cast(total); + if (saTotal < 0) fatal("corpus is too large"); + if (static_cast(saTotal) != total) fatal("corpus is too large"); + std::vector sa(total); + /* Hopefully, non-negative int32_t values match TextIdx ones. */ + if (sizeof(TextIdx) != sizeof(int32_t)) fatal("type length mismatch"); + int32_t* saData = reinterpret_cast(sa.data()); + divsufsort(data.data(), saData, saTotal); + + std::vector isa(total); + for (TextIdx i = 0; i < total; ++i) isa[sa[i]] = i; + + // TODO: borrowed -> unknown efficiency. + std::vector lcp(total); + TextIdx k = 0; + lcp[total - 1] = 0; + for (TextIdx i = 0; i < total; ++i) { + TextIdx current = isa[i]; + if (current == total - 1) { + k = 0; + continue; + } + TextIdx j = sa[current + 1]; // Suffix which follow i-th suffix. + while ((i + k < total) && (j + k < total) && (data[i + k] == data[j + k])) { + ++k; + } + lcp[current] = k; + if (k > 0) --k; + } + + return {std::move(lcp), std::move(sa)}; +} + +static void ScoreSlices(const std::vector& offsets, + std::vector& map, const TextIdx* shortcut, TextIdx end) { + TextIdx piece = 0; + /* Fresh map contains all zeroes -> initial mark should be different. */ + TextIdx mark = 1; + for (TextIdx i = 0; i < end; ++i) { + if (offsets[piece] == i) { + piece++; + mark++; + } + MetaSlot& item = map[shortcut[i]]; + if (item.mark != mark) { + item.mark = mark; + item.score++; + } + } +} + +static std::string durchschlagGenerateExclusive( + size_t dictionary_size_limit, size_t block_len, + const DurchschlagContext& context, const uint8_t* sample_data) { + /* Parameters aliasing */ + TextIdx targetSize = static_cast(dictionary_size_limit); + if (targetSize != dictionary_size_limit) { + fprintf(stderr, "dictionary_size_limit is too large\n"); + return ""; + } + TextIdx sliceLen = context.sliceLen; + TextIdx total = context.dataSize; + TextIdx blockLen = static_cast(block_len); + if (blockLen != block_len) { + fprintf(stderr, "block_len is too large\n"); + return ""; + } + const uint8_t* data = sample_data; + const std::vector& offsets = context.offsets; + std::vector map(context.numUniqueSlices); + const TextIdx* shortcut = context.sliceMap.data(); + + /* Initialization */ + if (blockLen < sliceLen) { + fprintf(stderr, "sliceLen is larger than block_len\n"); + return ""; + } + if (targetSize < blockLen || total < blockLen) { + fprintf(stderr, "block_len is too large\n"); + return ""; + } + TextIdx end = total - sliceLen + 1; + ScoreSlices(offsets, map, shortcut, end); + end = total - blockLen + 1; + std::vector candidates; + std::vector next(end); + TextIdx span = blockLen - sliceLen + 1; + Score maxScore = rebuildCandidatesList( + &candidates, &map, span, shortcut, end, next.data()); + + /* Block selection */ + const size_t triesLimit = (600 * 1000000) / span; + const size_t candidatesLimit = (150 * 1000000) / span; + std::vector ranges; + TextIdx mark = 0; + size_t numTries = 0; + while (true) { + TextIdx dictSize = calculateDictionarySize(ranges); + size_t numCandidates = 0; + if (dictSize > targetSize - blockLen) { + break; + } + if (maxScore == 0) { + break; + } + while (true) { + TextIdx candidate = 0; + while (maxScore > 0) { + if (candidates[maxScore] != 0) { + candidate = candidates[maxScore]; + candidates[maxScore] = next[candidate]; + break; + } + maxScore--; + } + if (maxScore == 0) { + break; + } + mark++; + numTries++; + numCandidates++; + Score score = 0; + for (size_t j = candidate; j <= candidate + span; ++j) { + MetaSlot& item = map[shortcut[j]]; + if (item.mark != mark) { + score += item.score; + item.mark = mark; + } + } + if (score < maxScore) { + if (numTries < triesLimit && numCandidates < candidatesLimit) { + next[candidate] = candidates[score]; + candidates[score] = candidate; + } else { + maxScore = rebuildCandidatesList( + &candidates, &map, span, shortcut, end, next.data()); + mark = 0; + numTries = 0; + numCandidates = 0; + } + continue; + } else if (score > maxScore) { + fprintf(stderr, "Broken invariant\n"); + return ""; + } + for (TextIdx j = candidate; j <= candidate + span; ++j) { + MetaSlot& item = map[shortcut[j]]; + item.score = 0; + } + addRange(&ranges, candidate, candidate + blockLen); + break; + } + } + + return createDictionary(data, ranges, targetSize); +} + +static std::string durchschlagGenerateCollaborative( + size_t dictionary_size_limit, size_t block_len, + const DurchschlagContext& context, const uint8_t* sample_data) { + /* Parameters aliasing */ + TextIdx targetSize = static_cast(dictionary_size_limit); + if (targetSize != dictionary_size_limit) { + fprintf(stderr, "dictionary_size_limit is too large\n"); + return ""; + } + TextIdx sliceLen = context.sliceLen; + TextIdx total = context.dataSize; + TextIdx blockLen = static_cast(block_len); + if (blockLen != block_len) { + fprintf(stderr, "block_len is too large\n"); + return ""; + } + const uint8_t* data = sample_data; + const std::vector& offsets = context.offsets; + std::vector map(context.numUniqueSlices); + const TextIdx* shortcut = context.sliceMap.data(); + + /* Initialization */ + if (blockLen < sliceLen) { + fprintf(stderr, "sliceLen is larger than block_len\n"); + return ""; + } + if (targetSize < blockLen || total < blockLen) { + fprintf(stderr, "block_len is too large\n"); + return ""; + } + TextIdx end = total - sliceLen + 1; + ScoreSlices(offsets, map, shortcut, end); + end = total - blockLen + 1; + std::vector candidates; + candidates.reserve(CANDIDATE_BUNDLE_SIZE + 1024); + TextIdx span = blockLen - sliceLen + 1; + Score minScore = buildCandidatesList(&candidates, &map, span, shortcut, end); + + /* Block selection */ + std::vector ranges; + TextIdx mark = 0; + while (true) { + TextIdx dictSize = calculateDictionarySize(ranges); + if (dictSize > targetSize - blockLen) { + break; + } + if (minScore == 0 && candidates.empty()) { + break; + } + while (true) { + if (candidates.empty()) { + minScore = buildCandidatesList(&candidates, &map, span, shortcut, end); + mark = 0; + } + TextIdx candidate = candidates[0].position; + Score expectedScore = candidates[0].score; + if (expectedScore == 0) { + candidates.resize(0); + break; + } + std::pop_heap(candidates.begin(), candidates.end(), lessScore()); + candidates.pop_back(); + mark++; + Score score = 0; + for (TextIdx j = candidate; j <= candidate + span; ++j) { + MetaSlot& item = map[shortcut[j]]; + if (item.mark != mark) { + score += item.score; + item.mark = mark; + } + } + if (score < expectedScore) { + if (score >= minScore) { + candidates.push_back({score, candidate}); + std::push_heap(candidates.begin(), candidates.end(), lessScore()); + } + continue; + } else if (score > expectedScore) { + fatal("Broken invariant"); + } + for (TextIdx j = candidate; j <= candidate + span; ++j) { + MetaSlot& item = map[shortcut[j]]; + item.score = 0; + } + addRange(&ranges, candidate, candidate + blockLen); + break; + } + } + + return createDictionary(data, ranges, targetSize); +} + +std::string durchschlag_generate(DurchschalgResourceStrategy strategy, + size_t dictionary_size_limit, size_t block_len, + const DurchschlagContext& context, const uint8_t* sample_data) { + if (strategy == DURCHSCHLAG_COLLABORATIVE) { + return durchschlagGenerateCollaborative( + dictionary_size_limit, block_len, context, sample_data); + } else { + return durchschlagGenerateExclusive( + dictionary_size_limit, block_len, context, sample_data); + } +} + +void durchschlag_distill(size_t slice_len, size_t minimum_population, + std::vector* sample_sizes, uint8_t* sample_data) { + /* Parameters aliasing */ + uint8_t* data = sample_data; + + /* Build slice map. */ + DurchschlagContext context = durchschlag_prepare( + slice_len, *sample_sizes, data); + + /* Calculate slice population. */ + const std::vector& offsets = context.offsets; + std::vector map(context.numUniqueSlices); + const TextIdx* shortcut = context.sliceMap.data(); + TextIdx sliceLen = context.sliceLen; + TextIdx total = context.dataSize; + TextIdx end = total - sliceLen + 1; + ScoreSlices(offsets, map, shortcut, end); + + /* Condense samples, omitting unique slices. */ + TextIdx readPos = 0; + TextIdx writePos = 0; + TextIdx lastNonUniquePos = 0; + for (TextIdx i = 0; i < sample_sizes->size(); ++i) { + TextIdx sampleStart = writePos; + TextIdx oldSampleEnd = + readPos + static_cast(sample_sizes->at(i)); + while (readPos < oldSampleEnd) { + if (readPos < end) { + MetaSlot& item = map[shortcut[readPos]]; + if (item.score >= minimum_population) { + lastNonUniquePos = readPos + sliceLen; + } + } + if (readPos < lastNonUniquePos) { + data[writePos++] = data[readPos]; + } + readPos++; + } + sample_sizes->at(i) = writePos - sampleStart; + } +} + +void durchschlag_purify(size_t slice_len, size_t minimum_population, + const std::vector& sample_sizes, uint8_t* sample_data) { + /* Parameters aliasing */ + uint8_t* data = sample_data; + + /* Build slice map. */ + DurchschlagContext context = durchschlag_prepare( + slice_len, sample_sizes, data); + + /* Calculate slice population. */ + const std::vector& offsets = context.offsets; + std::vector map(context.numUniqueSlices); + const TextIdx* shortcut = context.sliceMap.data(); + TextIdx sliceLen = context.sliceLen; + TextIdx total = context.dataSize; + TextIdx end = total - sliceLen + 1; + ScoreSlices(offsets, map, shortcut, end); + + /* Rewrite samples, zeroing out unique slices. */ + TextIdx lastNonUniquePos = 0; + for (TextIdx readPos = 0; readPos < total; ++readPos) { + if (readPos < end) { + MetaSlot& item = map[shortcut[readPos]]; + if (item.score >= minimum_population) { + lastNonUniquePos = readPos + sliceLen; + } + } + if (readPos >= lastNonUniquePos) { + data[readPos] = 0; + } + } +} diff --git a/research/durchschlag.h b/research/durchschlag.h new file mode 100755 index 0000000..adbc531 --- /dev/null +++ b/research/durchschlag.h @@ -0,0 +1,99 @@ +#ifndef BROTLI_RESEARCH_DURCHSCHLAG_H_ +#define BROTLI_RESEARCH_DURCHSCHLAG_H_ + +#include +#include +#include +#include + +/** + * Generate a dictionary for given samples. + * + * @param dictionary_size_limit maximal dictionary size + * @param slice_len text slice size + * @param block_len score block length + * @param sample_sizes vector with sample sizes + * @param sample_data concatenated samples + * @return generated dictionary + */ +std::string durchschlag_generate( + size_t dictionary_size_limit, size_t slice_len, size_t block_len, + const std::vector& sample_sizes, const uint8_t* sample_data); + +//------------------------------------------------------------------------------ +// Lower level API for repetitive dictionary generation. +//------------------------------------------------------------------------------ + +/* Pointer to position in text. */ +typedef uint32_t DurchschlagTextIdx; + +/* Context is made public for flexible serialization / deserialization. */ +typedef struct DurchschlagContext { + DurchschlagTextIdx dataSize; + DurchschlagTextIdx sliceLen; + DurchschlagTextIdx numUniqueSlices; + std::vector offsets; + std::vector sliceMap; +} DurchschlagContext; + +DurchschlagContext durchschlag_prepare(size_t slice_len, + const std::vector& sample_sizes, const uint8_t* sample_data); + +typedef enum DurchschalgResourceStrategy { + // Faster + DURCHSCHLAG_EXCLUSIVE = 0, + // Uses much less memory + DURCHSCHLAG_COLLABORATIVE = 1 +} DurchschalgResourceStrategy; + +std::string durchschlag_generate(DurchschalgResourceStrategy strategy, + size_t dictionary_size_limit, size_t block_len, + const DurchschlagContext& context, const uint8_t* sample_data); + +//------------------------------------------------------------------------------ +// Suffix Array based preparation. +//------------------------------------------------------------------------------ + +typedef struct DurchschlagIndex { + std::vector lcp; + std::vector sa; +} DurchschlagIndex; + +DurchschlagIndex durchschlag_index(const std::vector& data); + +DurchschlagContext durchschlag_prepare(size_t slice_len, + const std::vector& sample_sizes, const DurchschlagIndex& index); + +//------------------------------------------------------------------------------ +// Data preparation. +//------------------------------------------------------------------------------ + +/** + * Cut out unique slices. + * + * Both @p sample_sizes and @p sample_data are modified in-place. Number of + * samples remains unchanged, but some samples become shorter. + * + * @param slice_len (unique) slice size + * @param minimum_population minimum non-unique slice occurrence + * @param sample_sizes [in / out] vector with sample sizes + * @param sample_data [in / out] concatenated samples + */ +void durchschlag_distill(size_t slice_len, size_t minimum_population, + std::vector* sample_sizes, uint8_t* sample_data); + +/** + * Replace unique slices with zeroes. + * + * @p sample_data is modified in-place. Number of samples and their length + * remain unchanged. + * + * @param slice_len (unique) slice size + * @param minimum_population minimum non-unique slice occurrence + * @param sample_sizes vector with sample sizes + * @param sample_data [in / out] concatenated samples + */ +void durchschlag_purify(size_t slice_len, size_t minimum_population, + const std::vector& sample_sizes, uint8_t* sample_data); + +#endif // BROTLI_RESEARCH_DURCHSCHLAG_H_ diff --git a/research/libdivsufsort b/research/libdivsufsort new file mode 160000 index 0000000..5f60d6f --- /dev/null +++ b/research/libdivsufsort @@ -0,0 +1 @@ +Subproject commit 5f60d6f026c30fb4ac296f696b3c8b0eb71bd428 diff --git a/research/sieve.cc b/research/sieve.cc index fbc1dbf..4d147e1 100755 --- a/research/sieve.cc +++ b/research/sieve.cc @@ -1,19 +1,27 @@ #include "./sieve.h" +/* Pointer to position in (combined corpus) text. */ +typedef uint32_t TextIdx; + +/* Index of sample / generation. */ +typedef uint16_t SampleIdx; + typedef struct Slot { - uint32_t next; - uint32_t offset; - uint16_t presence; - uint16_t mark; + TextIdx next; + TextIdx offset; + SampleIdx presence; + SampleIdx mark; } Slot; -static size_t dryRun(size_t sliceLen, Slot* map, uint32_t* shortcut, size_t end, - size_t middle, uint16_t minPresence, uint16_t iteration) { - int from = -2; - int to = -1; - size_t result = 0; - uint16_t targetPresence = minPresence; - for (uint32_t i = 0; i < end; ++i) { +static const TextIdx kNowhere = static_cast(-1); + +static TextIdx dryRun(TextIdx sliceLen, Slot* map, TextIdx* shortcut, + TextIdx end, TextIdx middle, SampleIdx minPresence, SampleIdx iteration) { + TextIdx from = kNowhere; + TextIdx to = kNowhere; + TextIdx result = 0; + SampleIdx targetPresence = minPresence; + for (TextIdx i = 0; i < end; ++i) { if (i == middle) { targetPresence++; } @@ -21,8 +29,8 @@ static size_t dryRun(size_t sliceLen, Slot* map, uint32_t* shortcut, size_t end, if (item.mark != iteration) { item.mark = iteration; if (item.presence >= targetPresence) { - if (to < i) { - if (from > 0) { + if ((to == kNowhere) || (to < i)) { + if (from != kNowhere) { result += to - from; } from = i; @@ -31,20 +39,20 @@ static size_t dryRun(size_t sliceLen, Slot* map, uint32_t* shortcut, size_t end, } } } - if (from > 0) { + if (from != kNowhere) { result += to - from; } return result; } -static std::string createDictionary(const uint8_t* data, size_t sliceLen, - Slot* map, uint32_t* shortcut, size_t end, size_t middle, - uint16_t minPresence, uint16_t iteration) { +static std::string createDictionary(const uint8_t* data, TextIdx sliceLen, + Slot* map, TextIdx* shortcut, TextIdx end, TextIdx middle, + SampleIdx minPresence, SampleIdx iteration) { std::string output; - int from = -2; - int to = -1; - uint16_t targetPresence = minPresence; - for (uint32_t i = 0; i < end; ++i) { + TextIdx from = kNowhere; + TextIdx to = kNowhere; + SampleIdx targetPresence = minPresence; + for (TextIdx i = 0; i < end; ++i) { if (i == middle) { targetPresence++; } @@ -52,8 +60,8 @@ static std::string createDictionary(const uint8_t* data, size_t sliceLen, if (item.mark != iteration) { item.mark = iteration; if (item.presence >= targetPresence) { - if (to < i) { - if (from > 0) { + if ((to == kNowhere) || (to < i)) { + if (from != kNowhere) { output.insert(output.end(), &data[from], &data[to]); } from = i; @@ -62,7 +70,7 @@ static std::string createDictionary(const uint8_t* data, size_t sliceLen, } } } - if (from > 0) { + if (from != kNowhere) { output.insert(output.end(), &data[from], &data[to]); } return output; @@ -71,55 +79,95 @@ static std::string createDictionary(const uint8_t* data, size_t sliceLen, std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len, const std::vector& sample_sizes, const uint8_t* sample_data) { /* Parameters aliasing */ - size_t targetSize = dictionary_size_limit; - size_t sliceLen = slice_len; + TextIdx targetSize = static_cast(dictionary_size_limit); + if (targetSize != dictionary_size_limit) { + fprintf(stderr, "dictionary_size_limit is too large\n"); + return ""; + } + TextIdx sliceLen = static_cast(slice_len); + if (sliceLen != slice_len) { + fprintf(stderr, "slice_len is too large\n"); + return ""; + } + if (sliceLen < 1) { + fprintf(stderr, "slice_len is too small\n"); + return ""; + } + SampleIdx numSamples = static_cast(sample_sizes.size()); + if ((numSamples != sample_sizes.size()) || (numSamples * 2 < numSamples)) { + fprintf(stderr, "too many samples\n"); + return ""; + } const uint8_t* data = sample_data; - size_t total = 0; - std::vector offsets; - for (size_t i = 0; i < sample_sizes.size(); ++i) { - total += sample_sizes[i]; + TextIdx total = 0; + std::vector offsets; + for (SampleIdx i = 0; i < numSamples; ++i) { + TextIdx delta = static_cast(sample_sizes[i]); + if (delta != sample_sizes[i]) { + fprintf(stderr, "sample is too large\n"); + return ""; + } + if (delta == 0) { + fprintf(stderr, "empty samples are prohibited\n"); + return ""; + } + if (total + delta <= total) { + fprintf(stderr, "corpus is too large\n"); + return ""; + } + total += delta; offsets.push_back(total); } + if (total * 2 < total) { + fprintf(stderr, "corpus is too large\n"); + return ""; + } + + if (total < sliceLen) { + fprintf(stderr, "slice_len is larger than corpus size\n"); + return ""; + } + /***************************************************************************** * Build coverage map. ****************************************************************************/ std::vector map; - std::vector shortcut; + std::vector shortcut; map.push_back({0, 0, 0, 0}); - size_t end = total - sliceLen; - int hashLen = 8; - while ((1 << hashLen) < end) { + TextIdx end = total - sliceLen; + TextIdx hashLen = 11; + while (hashLen < 29 && ((1u << hashLen) < end)) { hashLen += 3; } hashLen -= 3; - uint32_t hashMask = (1u << hashLen) - 1u; - std::vector hashHead(1 << hashLen); - uint32_t hashSlot = 1; - uint16_t piece = 0; - uint32_t hash = 0; - int lShift = 3; - int rShift = hashLen - lShift; - for (int i = 0; i < sliceLen - 1; ++i) { - uint32_t v = data[i]; + TextIdx hashMask = (1u << hashLen) - 1u; + std::vector hashHead(1 << hashLen); + TextIdx hashSlot = 1; + SampleIdx piece = 0; + TextIdx hash = 0; + TextIdx lShift = 3; + TextIdx rShift = hashLen - lShift; + for (TextIdx i = 0; i < sliceLen - 1; ++i) { + TextIdx v = data[i]; hash = (((hash << lShift) | (hash >> rShift)) & hashMask) ^ v; } - int lShiftX = (lShift * (sliceLen - 1)) % hashLen; - int rShiftX = hashLen - lShiftX; - for (uint32_t i = 0; i < end; ++i) { - uint32_t v = data[i + sliceLen - 1]; + TextIdx lShiftX = (lShift * (sliceLen - 1)) % hashLen; + TextIdx rShiftX = hashLen - lShiftX; + for (TextIdx i = 0; i < end; ++i) { + TextIdx v = data[i + sliceLen - 1]; hash = (((hash << lShift) | (hash >> rShift)) & hashMask) ^ v; if (offsets[piece] == i) { piece++; } - uint32_t slot = hashHead[hash]; + TextIdx slot = hashHead[hash]; while (slot != 0) { Slot& item = map[slot]; - int start = item.offset; + TextIdx start = item.offset; bool miss = false; - for (size_t j = 0; j < sliceLen; ++j) { + for (TextIdx j = 0; j < sliceLen; ++j) { if (data[i + j] != data[start + j]) { miss = true; break; @@ -148,8 +196,8 @@ std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len, /***************************************************************************** * Build dictionary of specified size. ****************************************************************************/ - size_t a = 1; - size_t size = dryRun( + SampleIdx a = 1; + TextIdx size = dryRun( sliceLen, map.data(), shortcut.data(), end, end, a, ++piece); /* Maximal output is smaller than target. */ if (size <= targetSize) { @@ -157,7 +205,7 @@ std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len, data, sliceLen, map.data(), shortcut.data(), end, end, a, ++piece); } - size_t b = offsets.size(); + SampleIdx b = numSamples; size = dryRun(sliceLen, map.data(), shortcut.data(), end, end, b, ++piece); if (size == targetSize) { return createDictionary( @@ -167,7 +215,7 @@ std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len, if (size < targetSize) { /* size(a) > targetSize > size(b) && a < m < b */ while (a + 1 < b) { - size_t m = (a + b) / 2; + SampleIdx m = static_cast((a + b) / 2); size = dryRun( sliceLen, map.data(), shortcut.data(), end, end, m, ++piece); if (size < targetSize) { @@ -183,18 +231,18 @@ std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len, a = b; } /* size(minPresence) > targetSize > size(minPresence + 1) */ - size_t minPresence = a; - a = 0; - b = end; + SampleIdx minPresence = a; + TextIdx c = 0; + TextIdx d = end; /* size(a) < targetSize < size(b) && a < m < b */ - while (a + 1 < b) { - size_t m = (a + b) / 2; + while (c + 1 < d) { + TextIdx m = (c + d) / 2; size = dryRun( sliceLen, map.data(), shortcut.data(), end, m, minPresence, ++piece); if (size < targetSize) { - a = m; + c = m; } else if (size > targetSize) { - b = m; + d = m; } else { return createDictionary(data, sliceLen, map.data(), shortcut.data(), end, m, minPresence, ++piece); @@ -204,8 +252,8 @@ std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len, bool unrestricted = false; if (minPresence <= 2 && !unrestricted) { minPresence = 2; - a = end; + c = end; } - return createDictionary(data, sliceLen, map.data(), shortcut.data(), end, a, + return createDictionary(data, sliceLen, map.data(), shortcut.data(), end, c, minPresence, ++piece); } diff --git a/research/sieve.h b/research/sieve.h index 2aae669..6c65dc8 100755 --- a/research/sieve.h +++ b/research/sieve.h @@ -1,9 +1,8 @@ #ifndef BROTLI_RESEARCH_SIEVE_H_ #define BROTLI_RESEARCH_SIEVE_H_ -#include -#include - +#include +#include #include #include diff --git a/scripts/sources.lst b/scripts/sources.lst index cd61a7f..cdddb37 100644 --- a/scripts/sources.lst +++ b/scripts/sources.lst @@ -5,11 +5,15 @@ BROTLI_CLI_C = \ c/tools/brotli.c BROTLI_COMMON_C = \ - c/common/dictionary.c + c/common/dictionary.c \ + c/common/transform.c BROTLI_COMMON_H = \ c/common/constants.h \ + c/common/context.h \ c/common/dictionary.h \ + c/common/platform.h \ + c/common/transform.h \ c/common/version.h BROTLI_DEC_C = \ @@ -20,12 +24,9 @@ BROTLI_DEC_C = \ BROTLI_DEC_H = \ c/dec/bit_reader.h \ - c/dec/context.h \ c/dec/huffman.h \ - c/dec/port.h \ c/dec/prefix.h \ - c/dec/state.h \ - c/dec/transform.h + c/dec/state.h BROTLI_ENC_C = \ c/enc/backward_references.c \ @@ -38,6 +39,7 @@ BROTLI_ENC_C = \ c/enc/compress_fragment_two_pass.c \ c/enc/dictionary_hash.c \ c/enc/encode.c \ + c/enc/encoder_dict.c \ c/enc/entropy_encode.c \ c/enc/histogram.c \ c/enc/literal_cost.c \ @@ -61,14 +63,14 @@ BROTLI_ENC_H = \ c/enc/command.h \ c/enc/compress_fragment.h \ c/enc/compress_fragment_two_pass.h \ - c/enc/context.h \ c/enc/dictionary_hash.h \ + c/enc/encoder_dict.h \ c/enc/entropy_encode.h \ c/enc/entropy_encode_static.h \ c/enc/fast_log.h \ c/enc/find_match_length.h \ - c/enc/hash_forgetful_chain_inc.h \ c/enc/hash.h \ + c/enc/hash_forgetful_chain_inc.h \ c/enc/hash_longest_match64_inc.h \ c/enc/hash_longest_match_inc.h \ c/enc/hash_longest_match_quickly_inc.h \ @@ -79,7 +81,6 @@ BROTLI_ENC_H = \ c/enc/memory.h \ c/enc/metablock.h \ c/enc/metablock_inc.h \ - c/enc/port.h \ c/enc/prefix.h \ c/enc/quality.h \ c/enc/ringbuffer.h \ diff --git a/setup.py b/setup.py index a8a2ebe..d8478b3 100644 --- a/setup.py +++ b/setup.py @@ -182,6 +182,7 @@ EXT_MODULES = [ sources=[ 'python/_brotli.cc', 'c/common/dictionary.c', + 'c/common/transform.c', 'c/dec/bit_reader.c', 'c/dec/decode.c', 'c/dec/huffman.c', @@ -196,6 +197,7 @@ EXT_MODULES = [ 'c/enc/compress_fragment_two_pass.c', 'c/enc/dictionary_hash.c', 'c/enc/encode.c', + 'c/enc/encoder_dict.c', 'c/enc/entropy_encode.c', 'c/enc/histogram.c', 'c/enc/literal_cost.c', @@ -206,15 +208,15 @@ EXT_MODULES = [ ], depends=[ 'c/common/constants.h', + 'c/common/context.h', 'c/common/dictionary.h', + 'c/common/platform.h', + 'c/common/transform.h', 'c/common/version.h', 'c/dec/bit_reader.h', - 'c/dec/context.h', 'c/dec/huffman.h', - 'c/dec/port.h', 'c/dec/prefix.h', 'c/dec/state.h', - 'c/dec/transform.h', 'c/enc/backward_references.h', 'c/enc/backward_references_hq.h', 'c/enc/backward_references_inc.h', @@ -229,8 +231,8 @@ EXT_MODULES = [ 'c/enc/command.h', 'c/enc/compress_fragment.h', 'c/enc/compress_fragment_two_pass.h', - 'c/enc/context.h', 'c/enc/dictionary_hash.h', + 'c/enc/encoder_dict.h', 'c/enc/entropy_encode.h', 'c/enc/entropy_encode_static.h', 'c/enc/fast_log.h', @@ -247,7 +249,6 @@ EXT_MODULES = [ 'c/enc/memory.h', 'c/enc/metablock.h', 'c/enc/metablock_inc.h', - 'c/enc/port.h', 'c/enc/prefix.h', 'c/enc/quality.h', 'c/enc/ringbuffer.h', -- cgit v1.1