aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugene Kliuchnikov <eustas@google.com>2018-02-26 09:04:36 -0500
committerGitHub <noreply@github.com>2018-02-26 09:04:36 -0500
commit35e69fc7cf9421ab04ffc9d52cb36d07fa12984a (patch)
treea1ed614391936d455da2b0610ef8e8caf88b4289
parent3af18990f50d8f040038aaa08c41f5d27d62efb5 (diff)
downloadbrotli-35e69fc7cf9421ab04ffc9d52cb36d07fa12984a.zip
brotli-35e69fc7cf9421ab04ffc9d52cb36d07fa12984a.tar.gz
brotli-35e69fc7cf9421ab04ffc9d52cb36d07fa12984a.tar.bz2
New feature: "Large Window Brotli" (#640)
* New feature: "Large Window Brotli" By setting special encoder/decoder flag it is now possible to extend LZ-window up to 30 bits; though produced stream will not be RFC7932 compliant. Added new dictionary generator - "DSH". It combines speed of "Sieve" and quality of "DM". Plus utilities to prepare train corpora (remove unique strings). Improved compression ratio: now two sub-blocks could be stitched: the last copy command could be extended to span the next sub-block. Fixed compression ineffectiveness caused by floating numbers rounding and wrong cost heuristic. Other C changes: - combined / moved `context.h` to `common` - moved transforms to `common` - unified some aspects of code formatting - added an abstraction for encoder (static) dictionary - moved default allocator/deallocator functions to `common` brotli CLI: - window size is auto-adjusted if not specified explicitly Java: - added "eager" decoding both to JNI wrapper and pure decoder - huge speed-up of `DictionaryData` initialization * Add dictionaryless compressed dictionary * Fix `sources.lst` * Fix `sources.lst` and add a note that `libtool` is also required. * Update setup.py * Fix `EagerStreamTest` * Fix BUILD file * Add missing `libdivsufsort` dependency * Fix "unused parameter" warning.
-rw-r--r--.gitmodules3
-rw-r--r--WORKSPACE6
-rwxr-xr-xbootstrap2
-rw-r--r--c/common/constants.h19
-rwxr-xr-x[-rw-r--r--]c/common/context.h (renamed from c/dec/context.h)354
-rwxr-xr-xc/common/dictionary.bin.brbin0 -> 51687 bytes
-rwxr-xr-xc/common/platform.h22
-rwxr-xr-xc/common/transform.c236
-rwxr-xr-xc/common/transform.h80
-rw-r--r--c/dec/bit_reader.h33
-rw-r--r--c/dec/decode.c455
-rw-r--r--c/dec/huffman.c22
-rw-r--r--c/dec/huffman.h20
-rw-r--r--c/dec/prefix.h7
-rw-r--r--c/dec/state.c37
-rw-r--r--c/dec/state.h47
-rw-r--r--c/dec/transform.h300
-rw-r--r--c/enc/backward_references.c21
-rw-r--r--c/enc/backward_references.h1
-rw-r--r--c/enc/backward_references_hq.c91
-rw-r--r--c/enc/backward_references_hq.h16
-rw-r--r--c/enc/backward_references_inc.h13
-rw-r--r--c/enc/bit_cost.h8
-rw-r--r--c/enc/block_encoder_inc.h13
-rw-r--r--c/enc/block_splitter.c2
-rw-r--r--c/enc/block_splitter_inc.h2
-rw-r--r--c/enc/brotli_bit_stream.c187
-rw-r--r--c/enc/brotli_bit_stream.h50
-rw-r--r--c/enc/command.h30
-rw-r--r--c/enc/compress_fragment.c4
-rw-r--r--c/enc/compress_fragment_two_pass.c2
-rw-r--r--c/enc/context.h184
-rw-r--r--c/enc/encode.c338
-rwxr-xr-xc/enc/encoder_dict.c32
-rwxr-xr-xc/enc/encoder_dict.h42
-rw-r--r--c/enc/entropy_encode.c22
-rw-r--r--c/enc/entropy_encode.h8
-rw-r--r--c/enc/entropy_encode_static.h4
-rw-r--r--c/enc/hash.h35
-rw-r--r--c/enc/hash_forgetful_chain_inc.h15
-rw-r--r--c/enc/hash_longest_match64_inc.h16
-rw-r--r--c/enc/hash_longest_match_inc.h14
-rw-r--r--c/enc/hash_longest_match_quickly_inc.h19
-rw-r--r--c/enc/hash_to_binary_tree_inc.h10
-rw-r--r--c/enc/histogram.c15
-rw-r--r--c/enc/histogram.h7
-rw-r--r--c/enc/histogram_inc.h2
-rw-r--r--c/enc/literal_cost.c8
-rw-r--r--c/enc/literal_cost.h2
-rw-r--r--c/enc/memory.c14
-rw-r--r--c/enc/metablock.c24
-rw-r--r--c/enc/metablock.h7
-rwxr-xr-xc/enc/params.h11
-rw-r--r--c/enc/prefix.h5
-rw-r--r--c/enc/quality.h11
-rw-r--r--c/enc/ringbuffer.h24
-rw-r--r--c/enc/static_dict.c80
-rw-r--r--c/enc/static_dict.h5
-rw-r--r--c/enc/static_dict_lut.h2
-rw-r--r--c/enc/utf8_util.c40
-rw-r--r--c/enc/write_bits.h18
-rw-r--r--c/include/brotli/decode.h15
-rw-r--r--c/include/brotli/encode.h11
-rw-r--r--c/tools/brotli.c189
-rw-r--r--docs/brotli.12
-rw-r--r--docs/decode.h.35
-rw-r--r--docs/encode.h.39
-rw-r--r--docs/types.h.32
-rw-r--r--java/org/brotli/dec/BUILD6
-rw-r--r--java/org/brotli/dec/BrotliInputStream.java12
-rw-r--r--java/org/brotli/dec/Decode.java84
-rw-r--r--java/org/brotli/dec/DictionaryData.java29
-rwxr-xr-xjava/org/brotli/dec/EagerStreamTest.java386
-rw-r--r--java/org/brotli/dec/State.java7
-rw-r--r--java/org/brotli/wrapper/dec/BrotliInputStream.java4
-rw-r--r--java/org/brotli/wrapper/dec/Decoder.java10
-rw-r--r--java/org/brotli/wrapper/dec/DecoderJNI.java26
-rwxr-xr-xjava/org/brotli/wrapper/dec/EagerStreamTest.java75
-rw-r--r--java/org/brotli/wrapper/dec/decoder_jni.cc31
-rwxr-xr-xresearch/BUILD8
-rw-r--r--research/BUILD.libdivsufsort55
-rw-r--r--research/deorummolae.cc173
-rw-r--r--research/deorummolae.h9
-rwxr-xr-xresearch/dictionary_generator.cc119
-rw-r--r--research/draw_diff.cc21
-rwxr-xr-xresearch/durchschlag.cc714
-rwxr-xr-xresearch/durchschlag.h99
m---------research/libdivsufsort0
-rwxr-xr-xresearch/sieve.cc174
-rwxr-xr-xresearch/sieve.h5
-rw-r--r--scripts/sources.lst17
-rw-r--r--setup.py11
92 files changed, 3612 insertions, 1793 deletions
diff --git a/.gitmodules b/.gitmodules
index af7df38..3ec8760 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
[submodule "research/esaxx"]
path = research/esaxx
url = https://github.com/hillbig/esaxx
+[submodule "research/libdivsufsort"]
+ path = research/libdivsufsort
+ url = https://github.com/y-256/libdivsufsort.git
diff --git a/WORKSPACE b/WORKSPACE
index b239745..59c1c4f 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -82,6 +82,12 @@ filegroup(
)""",
)
+new_local_repository(
+ name = "divsufsort",
+ build_file = "//research:BUILD.libdivsufsort",
+ path = "research/libdivsufsort",
+)
+
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
closure_repositories()
diff --git a/bootstrap b/bootstrap
index dbaea15..64aca2c 100755
--- a/bootstrap
+++ b/bootstrap
@@ -7,6 +7,8 @@ sed --version >/dev/null 2>&1 || { echo >&2 "'sed' $REQUIRED"; exit 1; }
fi
autoreconf --version >/dev/null 2>&1 || { echo >&2 "'autoconf' $REQUIRED"; exit 1; }
+# If libtool is not installed -> "error: Libtool library used but 'LIBTOOL' is undefined"
+
mkdir m4 2>/dev/null
BROTLI_ABI_HEX=`sed -n 's/#define BROTLI_ABI_VERSION 0x//p' c/common/version.h`
diff --git a/c/common/constants.h b/c/common/constants.h
index 416ec55..26edcd5 100644
--- a/c/common/constants.h
+++ b/c/common/constants.h
@@ -28,18 +28,25 @@
/* "code length of 8 is repeated" */
#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
+/* "Large Window Brotli" */
+#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
+#define BROTLI_LARGE_MIN_WBITS 10
+#define BROTLI_LARGE_MAX_WBITS 30
+
/* Specification: 4. Encoding of distances */
#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
#define BROTLI_MAX_NPOSTFIX 3
#define BROTLI_MAX_NDIRECT 120
#define BROTLI_MAX_DISTANCE_BITS 24U
-/* BROTLI_NUM_DISTANCE_SYMBOLS == 520 */
-#define BROTLI_NUM_DISTANCE_SYMBOLS (BROTLI_NUM_DISTANCE_SHORT_CODES + \
- BROTLI_MAX_NDIRECT + \
- (BROTLI_MAX_DISTANCE_BITS << \
- (BROTLI_MAX_NPOSTFIX + 1)))
-/* Distance that is guaranteed to be representable in any stream. */
+#define BROTLI_DISTANCE_ALPHABET_SIZE(NDIRECT, NPOSTFIX, MAXNBITS) ( \
+ BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) + \
+ ((MAXNBITS) << ((NPOSTFIX) + 1)))
+/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */
+#define BROTLI_NUM_DISTANCE_SYMBOLS \
+ BROTLI_DISTANCE_ALPHABET_SIZE( \
+ BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
#define BROTLI_MAX_DISTANCE 0x3FFFFFC
+#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
/* 7.1. Context modes and context ID lookup for literals */
/* "context IDs for literals are in the range of 0..63" */
diff --git a/c/dec/context.h b/c/common/context.h
index 9402cbe..24b3eb4 100644..100755
--- a/c/dec/context.h
+++ b/c/common/context.h
@@ -6,110 +6,171 @@
/* Lookup table to map the previous two bytes to a context id.
- There are four different context modeling modes defined here:
- CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
- CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
- CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
- CONTEXT_SIGNED: second-order context model tuned for signed integers.
-
- The context id for the UTF8 context model is calculated as follows. If p1
- and p2 are the previous two bytes, we calculate the context as
-
- context = kContextLookup[p1] | kContextLookup[p2 + 256].
-
- If the previous two bytes are ASCII characters (i.e. < 128), this will be
- equivalent to
-
- context = 4 * context1(p1) + context2(p2),
-
- where context1 is based on the previous byte in the following way:
-
- 0 : non-ASCII control
- 1 : \t, \n, \r
- 2 : space
- 3 : other punctuation
- 4 : " '
- 5 : %
- 6 : ( < [ {
- 7 : ) > ] }
- 8 : , ; :
- 9 : .
- 10 : =
- 11 : number
- 12 : upper-case vowel
- 13 : upper-case consonant
- 14 : lower-case vowel
- 15 : lower-case consonant
-
- and context2 is based on the second last byte:
-
- 0 : control, space
- 1 : punctuation
- 2 : upper-case letter, number
- 3 : lower-case letter
-
- If the last byte is ASCII, and the second last byte is not (in a valid UTF8
- stream it will be a continuation byte, value between 128 and 191), the
- context is the same as if the second last byte was an ASCII control or space.
-
- If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
- be a continuation byte and the context id is 2 or 3 depending on the LSB of
- the last byte and to a lesser extent on the second last byte if it is ASCII.
-
- If the last byte is a UTF8 continuation byte, the second last byte can be:
- - continuation byte: the next byte is probably ASCII or lead byte (assuming
- 4-byte UTF8 characters are rare) and the context id is 0 or 1.
- - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
- - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
-
- The possible value combinations of the previous two bytes, the range of
- context ids and the type of the next byte is summarized in the table below:
-
- |--------\-----------------------------------------------------------------|
- | \ Last byte |
- | Second \---------------------------------------------------------------|
- | last byte \ ASCII | cont. byte | lead byte |
- | \ (0-127) | (128-191) | (192-) |
- |=============|===================|=====================|==================|
- | ASCII | next: ASCII/lead | not valid | next: cont. |
- | (0-127) | context: 4 - 63 | | context: 2 - 3 |
- |-------------|-------------------|---------------------|------------------|
- | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
- | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
- |-------------|-------------------|---------------------|------------------|
- | lead byte | not valid | next: ASCII/lead | not valid |
- | (192-207) | | context: 0 - 1 | |
- |-------------|-------------------|---------------------|------------------|
- | lead byte | not valid | next: cont. | not valid |
- | (208-) | | context: 2 - 3 | |
- |-------------|-------------------|---------------------|------------------|
-
- The context id for the signed context mode is calculated as:
-
- context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2].
-
- For any context modeling modes, the context ids can be calculated by |-ing
- together two lookups from one table using context model dependent offsets:
-
- context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2].
-
- where offset1 and offset2 are dependent on the context mode.
+ There are four different context modeling modes defined here:
+ CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
+ CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
+ CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
+ CONTEXT_SIGNED: second-order context model tuned for signed integers.
+
+ If |p1| and |p2| are the previous two bytes, and |mode| is current context
+ mode, we calculate the context as:
+
+ context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256].
+
+ For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters
+ (i.e. < 128), this will be equivalent to
+
+ context = 4 * context1(p1) + context2(p2),
+
+ where context1 is based on the previous byte in the following way:
+
+ 0 : non-ASCII control
+ 1 : \t, \n, \r
+ 2 : space
+ 3 : other punctuation
+ 4 : " '
+ 5 : %
+ 6 : ( < [ {
+ 7 : ) > ] }
+ 8 : , ; :
+ 9 : .
+ 10 : =
+ 11 : number
+ 12 : upper-case vowel
+ 13 : upper-case consonant
+ 14 : lower-case vowel
+ 15 : lower-case consonant
+
+ and context2 is based on the second last byte:
+
+ 0 : control, space
+ 1 : punctuation
+ 2 : upper-case letter, number
+ 3 : lower-case letter
+
+ If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+ stream it will be a continuation byte, value between 128 and 191), the
+ context is the same as if the second last byte was an ASCII control or space.
+
+ If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+ be a continuation byte and the context id is 2 or 3 depending on the LSB of
+ the last byte and to a lesser extent on the second last byte if it is ASCII.
+
+ If the last byte is a UTF8 continuation byte, the second last byte can be:
+ - continuation byte: the next byte is probably ASCII or lead byte (assuming
+ 4-byte UTF8 characters are rare) and the context id is 0 or 1.
+ - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+ - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+
+ The possible value combinations of the previous two bytes, the range of
+ context ids and the type of the next byte is summarized in the table below:
+
+ |--------\-----------------------------------------------------------------|
+ | \ Last byte |
+ | Second \---------------------------------------------------------------|
+ | last byte \ ASCII | cont. byte | lead byte |
+ | \ (0-127) | (128-191) | (192-) |
+ |=============|===================|=====================|==================|
+ | ASCII | next: ASCII/lead | not valid | next: cont. |
+ | (0-127) | context: 4 - 63 | | context: 2 - 3 |
+ |-------------|-------------------|---------------------|------------------|
+ | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
+ | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
+ |-------------|-------------------|---------------------|------------------|
+ | lead byte | not valid | next: ASCII/lead | not valid |
+ | (192-207) | | context: 0 - 1 | |
+ |-------------|-------------------|---------------------|------------------|
+ | lead byte | not valid | next: cont. | not valid |
+ | (208-) | | context: 2 - 3 | |
+ |-------------|-------------------|---------------------|------------------|
*/
-#ifndef BROTLI_DEC_CONTEXT_H_
-#define BROTLI_DEC_CONTEXT_H_
+#ifndef BROTLI_COMMON_CONTEXT_H_
+#define BROTLI_COMMON_CONTEXT_H_
#include <brotli/types.h>
-enum ContextType {
+typedef enum ContextType {
CONTEXT_LSB6 = 0,
CONTEXT_MSB6 = 1,
CONTEXT_UTF8 = 2,
CONTEXT_SIGNED = 3
-};
+} ContextType;
/* Common context lookup table for all context modes. */
-static const uint8_t kContextLookup[1792] = {
+static const uint8_t kContextLookup[2048] = {
+ /* CONTEXT_LSB6, last byte. */
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+
+ /* CONTEXT_LSB6, second last byte, */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ /* CONTEXT_MSB6, last byte. */
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+ 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
+ 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
+ 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
+ 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
+ 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
+ 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
+ 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
+ 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
+ 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
+ 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
+ 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
+ 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
+ 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
+ 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
+
+ /* CONTEXT_MSB6, second last byte, */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
/* CONTEXT_UTF8, last byte. */
/* ASCII range. */
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
@@ -130,6 +191,7 @@ static const uint8_t kContextLookup[1792] = {
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+
/* CONTEXT_UTF8 second last byte. */
/* ASCII range. */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -150,23 +212,7 @@ static const uint8_t kContextLookup[1792] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- /* CONTEXT_SIGNED, second last byte. */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+
/* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
@@ -184,68 +230,32 @@ static const uint8_t kContextLookup[1792] = {
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
- /* CONTEXT_LSB6, last byte. */
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
- /* CONTEXT_MSB6, last byte. */
- 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
- 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
- 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
- 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
- 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
- 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
- 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
- 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
- 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
- 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
- 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
- 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
- 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
- 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
- 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
- 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
- /* CONTEXT_{M,L}SB6, second last byte, */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-};
-static const int kContextLookupOffsets[8] = {
- /* CONTEXT_LSB6 */
- 1024, 1536,
- /* CONTEXT_MSB6 */
- 1280, 1536,
- /* CONTEXT_UTF8 */
- 0, 256,
- /* CONTEXT_SIGNED */
- 768, 512,
+ /* CONTEXT_SIGNED, second last byte. */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
};
-#endif /* BROTLI_DEC_CONTEXT_H_ */
+typedef const uint8_t* ContextLut;
+
+/* typeof(MODE) == ContextType; returns ContextLut */
+#define BROTLI_CONTEXT_LUT(MODE) (&kContextLookup[(MODE) << 9])
+
+/* typeof(LUT) == ContextLut */
+#define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2])
+
+#endif /* BROTLI_COMMON_CONTEXT_H_ */
diff --git a/c/common/dictionary.bin.br b/c/common/dictionary.bin.br
new file mode 100755
index 0000000..6a55d42
--- /dev/null
+++ b/c/common/dictionary.bin.br
Binary files differ
diff --git a/c/common/platform.h b/c/common/platform.h
index 804fd25..d6fd3ee 100755
--- a/c/common/platform.h
+++ b/c/common/platform.h
@@ -10,6 +10,7 @@
#define BROTLI_COMMON_PLATFORM_H_
#include <string.h> /* memcpy */
+#include <stdlib.h> /* malloc, free */
#include <brotli/port.h>
#include <brotli/types.h>
@@ -204,7 +205,7 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
-#else /* BROTLI_ALIGNED_READ */
+#else /* BROTLI_ALIGNED_READ */
/* Unaligned memory access is allowed: just cast pointer to requested type. */
static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) {
return *(const uint16_t*)p;
@@ -218,7 +219,7 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
*(uint64_t*)p = v;
}
-#endif /* BROTLI_ALIGNED_READ */
+#endif /* BROTLI_ALIGNED_READ */
#if BROTLI_LITTLE_ENDIAN
/* Straight endianness. Just read / write values. */
@@ -390,6 +391,18 @@ BROTLI_MIN_MAX(size_t) BROTLI_MIN_MAX(uint32_t) BROTLI_MIN_MAX(uint8_t)
(A)[(J)] = __brotli_swap_tmp; \
}
+/* Default brotli_alloc_func */
+static void* BrotliDefaultAllocFunc(void* opaque, size_t size) {
+ BROTLI_UNUSED(opaque);
+ return malloc(size);
+}
+
+/* Default brotli_free_func */
+static void BrotliDefaultFreeFunc(void* opaque, void* address) {
+ BROTLI_UNUSED(opaque);
+ free(address);
+}
+
BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
BROTLI_UNUSED(BrotliSuppressUnusedFunctions);
BROTLI_UNUSED(BrotliUnalignedRead16);
@@ -413,6 +426,11 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
BROTLI_UNUSED(brotli_max_uint32_t);
BROTLI_UNUSED(brotli_min_uint8_t);
BROTLI_UNUSED(brotli_max_uint8_t);
+ BROTLI_UNUSED(BrotliDefaultAllocFunc);
+ BROTLI_UNUSED(BrotliDefaultFreeFunc);
+#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
+ BROTLI_UNUSED(BrotliDump);
+#endif
}
#endif /* BROTLI_COMMON_PLATFORM_H_ */
diff --git a/c/common/transform.c b/c/common/transform.c
new file mode 100755
index 0000000..53fe4f6
--- /dev/null
+++ b/c/common/transform.c
@@ -0,0 +1,236 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./platform.h"
+#include "./transform.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* RFC 7932 transforms string data */
+static const char kPrefixSuffix[217] =
+ "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "
+/* 0x _0 _2 __5 _E _3 _6 _8 _E */
+ "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "
+/* 2x _3_ _5 _A_ _D_ _F _2 _4 _A _E */
+ "that \1\'\6 with \6 from \4 by \1(\6. T"
+/* 4x _5_ _7 _E _5 _A _C */
+ "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "
+/* 6x _3 _8 _D _2 _7_ _ _A _C */
+ "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"
+/* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */
+ " not \3er \3al \4ful \4ive \5less \4es"
+/* Ax _5 _9 _D _2 _7 _D */
+ "t \4ize \2\xc2\xa0\4ous \5 the \2e \0";
+/* Cx _2 _7___ ___ _A _F _5 _8 */
+
+static const uint16_t kPrefixSuffixMap[50] = {
+ 0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,
+ 0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,
+ 0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,
+ 0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,
+ 0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8
+};
+
+/* RFC 7932 transforms */
+static const uint8_t kTransformsData[] = {
+ 49, BROTLI_TRANSFORM_IDENTITY, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 0,
+ 0, BROTLI_TRANSFORM_IDENTITY, 0,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 47,
+ 0, BROTLI_TRANSFORM_IDENTITY, 49,
+ 4, BROTLI_TRANSFORM_IDENTITY, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 3,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 6,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,
+ 1, BROTLI_TRANSFORM_IDENTITY, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 1,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 7,
+ 49, BROTLI_TRANSFORM_IDENTITY, 9,
+ 48, BROTLI_TRANSFORM_IDENTITY, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 8,
+ 49, BROTLI_TRANSFORM_IDENTITY, 5,
+ 49, BROTLI_TRANSFORM_IDENTITY, 10,
+ 49, BROTLI_TRANSFORM_IDENTITY, 11,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 13,
+ 49, BROTLI_TRANSFORM_IDENTITY, 14,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 15,
+ 49, BROTLI_TRANSFORM_IDENTITY, 16,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 12,
+ 5, BROTLI_TRANSFORM_IDENTITY, 49,
+ 0, BROTLI_TRANSFORM_IDENTITY, 1,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 18,
+ 49, BROTLI_TRANSFORM_IDENTITY, 17,
+ 49, BROTLI_TRANSFORM_IDENTITY, 19,
+ 49, BROTLI_TRANSFORM_IDENTITY, 20,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,
+ 47, BROTLI_TRANSFORM_IDENTITY, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 22,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 23,
+ 49, BROTLI_TRANSFORM_IDENTITY, 24,
+ 49, BROTLI_TRANSFORM_IDENTITY, 25,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,
+ 49, BROTLI_TRANSFORM_IDENTITY, 27,
+ 49, BROTLI_TRANSFORM_IDENTITY, 28,
+ 0, BROTLI_TRANSFORM_IDENTITY, 12,
+ 49, BROTLI_TRANSFORM_IDENTITY, 29,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,
+ 49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 21,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 31,
+ 49, BROTLI_TRANSFORM_IDENTITY, 32,
+ 47, BROTLI_TRANSFORM_IDENTITY, 3,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,
+ 49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,
+ 5, BROTLI_TRANSFORM_IDENTITY, 21,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,
+ 49, BROTLI_TRANSFORM_IDENTITY, 30,
+ 0, BROTLI_TRANSFORM_IDENTITY, 5,
+ 35, BROTLI_TRANSFORM_IDENTITY, 49,
+ 47, BROTLI_TRANSFORM_IDENTITY, 2,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,
+ 49, BROTLI_TRANSFORM_IDENTITY, 36,
+ 49, BROTLI_TRANSFORM_IDENTITY, 33,
+ 5, BROTLI_TRANSFORM_IDENTITY, 0,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
+ 49, BROTLI_TRANSFORM_IDENTITY, 37,
+ 0, BROTLI_TRANSFORM_IDENTITY, 30,
+ 49, BROTLI_TRANSFORM_IDENTITY, 38,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
+ 49, BROTLI_TRANSFORM_IDENTITY, 39,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
+ 49, BROTLI_TRANSFORM_IDENTITY, 34,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
+ 0, BROTLI_TRANSFORM_IDENTITY, 21,
+ 49, BROTLI_TRANSFORM_IDENTITY, 40,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
+ 49, BROTLI_TRANSFORM_IDENTITY, 41,
+ 49, BROTLI_TRANSFORM_IDENTITY, 42,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,
+ 49, BROTLI_TRANSFORM_IDENTITY, 43,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,
+ 0, BROTLI_TRANSFORM_IDENTITY, 34,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
+ 49, BROTLI_TRANSFORM_IDENTITY, 44,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
+ 45, BROTLI_TRANSFORM_IDENTITY, 49,
+ 0, BROTLI_TRANSFORM_IDENTITY, 33,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
+ 49, BROTLI_TRANSFORM_IDENTITY, 46,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
+ 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
+ 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
+ 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
+ 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
+};
+
+static BrotliTransforms kBrotliTransforms = {
+ sizeof(kPrefixSuffix),
+ (const uint8_t*)kPrefixSuffix,
+ kPrefixSuffixMap,
+ sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),
+ kTransformsData,
+ {0, 12, 27, 23, 42, 63, 56, 48, 59, 64}
+};
+
+const BrotliTransforms* BrotliGetTransforms(void) {
+ return &kBrotliTransforms;
+}
+
+static int ToUpperCase(uint8_t* p) {
+ if (p[0] < 0xC0) {
+ if (p[0] >= 'a' && p[0] <= 'z') {
+ p[0] ^= 32;
+ }
+ return 1;
+ }
+ /* An overly simplified uppercasing model for UTF-8. */
+ if (p[0] < 0xE0) {
+ p[1] ^= 32;
+ return 2;
+ }
+ /* An arbitrary transform for three byte characters. */
+ p[2] ^= 5;
+ return 3;
+}
+
+int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,
+ const BrotliTransforms* BROTLI_RESTRICT transforms, int transfom_idx) {
+ int idx = 0;
+ const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transfom_idx);
+ uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transfom_idx);
+ const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transfom_idx);
+ {
+ int prefix_len = *prefix++;
+ while (prefix_len--) { dst[idx++] = *prefix++; }
+ }
+ {
+ const int t = type;
+ int i = 0;
+ if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {
+ len -= t;
+ } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1
+ && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {
+ int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);
+ word += skip;
+ len -= skip;
+ }
+ while (i < len) { dst[idx++] = word[i++]; }
+ if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {
+ ToUpperCase(&dst[idx - len]);
+ } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {
+ uint8_t* uppercase = &dst[idx - len];
+ while (len > 0) {
+ int step = ToUpperCase(uppercase);
+ uppercase += step;
+ len -= step;
+ }
+ }
+ }
+ {
+ int suffix_len = *suffix++;
+ while (suffix_len--) { dst[idx++] = *suffix++; }
+ return idx;
+ }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
diff --git a/c/common/transform.h b/c/common/transform.h
new file mode 100755
index 0000000..b279c04
--- /dev/null
+++ b/c/common/transform.h
@@ -0,0 +1,80 @@
+/* transforms is a part of ABI, nut not API.
+
+ It means that there are some functions that are supposed to be in "common"
+ library, but header itself is not placed into include/brotli. This way,
+ aforementioned functions will be available only to brotli internals.
+ */
+
+#ifndef BROTLI_COMMON_TRANSFORM_H_
+#define BROTLI_COMMON_TRANSFORM_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+enum BrotliWordTransformType {
+ BROTLI_TRANSFORM_IDENTITY = 0,
+ BROTLI_TRANSFORM_OMIT_LAST_1 = 1,
+ BROTLI_TRANSFORM_OMIT_LAST_2 = 2,
+ BROTLI_TRANSFORM_OMIT_LAST_3 = 3,
+ BROTLI_TRANSFORM_OMIT_LAST_4 = 4,
+ BROTLI_TRANSFORM_OMIT_LAST_5 = 5,
+ BROTLI_TRANSFORM_OMIT_LAST_6 = 6,
+ BROTLI_TRANSFORM_OMIT_LAST_7 = 7,
+ BROTLI_TRANSFORM_OMIT_LAST_8 = 8,
+ BROTLI_TRANSFORM_OMIT_LAST_9 = 9,
+ BROTLI_TRANSFORM_UPPERCASE_FIRST = 10,
+ BROTLI_TRANSFORM_UPPERCASE_ALL = 11,
+ BROTLI_TRANSFORM_OMIT_FIRST_1 = 12,
+ BROTLI_TRANSFORM_OMIT_FIRST_2 = 13,
+ BROTLI_TRANSFORM_OMIT_FIRST_3 = 14,
+ BROTLI_TRANSFORM_OMIT_FIRST_4 = 15,
+ BROTLI_TRANSFORM_OMIT_FIRST_5 = 16,
+ BROTLI_TRANSFORM_OMIT_FIRST_6 = 17,
+ BROTLI_TRANSFORM_OMIT_FIRST_7 = 18,
+ BROTLI_TRANSFORM_OMIT_FIRST_8 = 19,
+ BROTLI_TRANSFORM_OMIT_FIRST_9 = 20,
+ BROTLI_NUM_TRANSFORM_TYPES /* Counts transforms, not a transform itself. */
+};
+
+#define BROTLI_TRANSFORMS_MAX_CUT_OFF BROTLI_TRANSFORM_OMIT_LAST_9
+
+typedef struct BrotliTransforms {
+ uint16_t prefix_suffix_size;
+ /* Last character must be null, so prefix_suffix_size must be at least 1. */
+ const uint8_t* prefix_suffix;
+ const uint16_t* prefix_suffix_map;
+ uint32_t num_transforms;
+ /* Each entry is a [prefix_id, transform, suffix_id] triplet. */
+ const uint8_t* transforms;
+ /* Indices of transforms like ["", BROTLI_TRANSFORM_OMIT_LAST_#, ""].
+ 0-th element corresponds to ["", BROTLI_TRANSFORM_IDENTITY, ""].
+ -1, if cut-off transform does not exist. */
+ int16_t cutOffTransforms[BROTLI_TRANSFORMS_MAX_CUT_OFF + 1];
+} BrotliTransforms;
+
+/* T is BrotliTransforms*; result is uint8_t. */
+#define BROTLI_TRANSFORM_PREFIX_ID(T, I) ((T)->transforms[((I) * 3) + 0])
+#define BROTLI_TRANSFORM_TYPE(T, I) ((T)->transforms[((I) * 3) + 1])
+#define BROTLI_TRANSFORM_SUFFIX_ID(T, I) ((T)->transforms[((I) * 3) + 2])
+
+/* T is BrotliTransforms*; result is const uint8_t*. */
+#define BROTLI_TRANSFORM_PREFIX(T, I) (&(T)->prefix_suffix[ \
+ (T)->prefix_suffix_map[BROTLI_TRANSFORM_PREFIX_ID(T, I)]])
+#define BROTLI_TRANSFORM_SUFFIX(T, I) (&(T)->prefix_suffix[ \
+ (T)->prefix_suffix_map[BROTLI_TRANSFORM_SUFFIX_ID(T, I)]])
+
+BROTLI_COMMON_API const BrotliTransforms* BrotliGetTransforms(void);
+
+BROTLI_COMMON_API int BrotliTransformDictionaryWord(
+ uint8_t* dst, const uint8_t* word, int len,
+ const BrotliTransforms* transforms, int transform_idx);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_COMMON_TRANSFORM_H_ */
diff --git a/c/dec/bit_reader.h b/c/dec/bit_reader.h
index 21c59d7..39e4873 100644
--- a/c/dec/bit_reader.h
+++ b/c/dec/bit_reader.h
@@ -20,7 +20,7 @@ extern "C" {
#define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1)
-static const uint32_t kBitMask[33] = { 0x0000,
+static const uint32_t kBitMask[33] = { 0x00000000,
0x00000001, 0x00000003, 0x00000007, 0x0000000F,
0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
@@ -35,7 +35,7 @@ static BROTLI_INLINE uint32_t BitMask(uint32_t n) {
if (BROTLI_IS_CONSTANT(n) || BROTLI_HAS_UBFX) {
/* Masking with this expression turns to a single
"Unsigned Bit Field Extract" UBFX instruction on ARM. */
- return ~((0xffffffffU) << n);
+ return ~((0xFFFFFFFFu) << n);
} else {
return kBitMask[n];
}
@@ -58,8 +58,9 @@ typedef struct {
/* Initializes the BrotliBitReader fields. */
BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br);
-/* Ensures that accumulator is not empty. May consume one byte of input.
- Returns 0 if data is required but there is no input available.
+/* Ensures that accumulator is not empty.
+ May consume up to sizeof(brotli_reg_t) - 1 bytes of input.
+ Returns BROTLI_FALSE if data is required but there is no input available.
For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned
reading. */
BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br);
@@ -98,9 +99,9 @@ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount(
return TO_BROTLI_BOOL(br->avail_in >= num);
}
-/* Guarantees that there are at least n_bits + 1 bits in accumulator.
+/* Guarantees that there are at least |n_bits| + 1 bits in accumulator.
Precondition: accumulator contains at least 1 bit.
- n_bits should be in the range [1..24] for regular build. For portable
+ |n_bits| should be in the range [1..24] for regular build. For portable
non-64-bit little-endian build only 16 bits are safe to request. */
static BROTLI_INLINE void BrotliFillBitWindow(
BrotliBitReader* const br, uint32_t n_bits) {
@@ -158,7 +159,8 @@ static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) {
BrotliFillBitWindow(br, 17);
}
-/* Pulls one byte of input to accumulator. */
+/* Tries to pull one byte of input to accumulator.
+ Returns BROTLI_FALSE if there is no input available. */
static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) {
if (br->avail_in == 0) {
return BROTLI_FALSE;
@@ -190,15 +192,16 @@ static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked(
return (uint32_t)BrotliGetBitsUnmasked(br);
}
-/* Returns the specified number of bits from |br| without advancing bit pos. */
+/* Returns the specified number of bits from |br| without advancing bit
+ position. */
static BROTLI_INLINE uint32_t BrotliGetBits(
BrotliBitReader* const br, uint32_t n_bits) {
BrotliFillBitWindow(br, n_bits);
return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
}
-/* Tries to peek the specified amount of bits. Returns 0, if there is not
- enough input. */
+/* Tries to peek the specified amount of bits. Returns BROTLI_FALSE, if there
+ is not enough input. */
static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
while (BrotliGetAvailableBits(br) < n_bits) {
@@ -210,7 +213,7 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits(
return BROTLI_TRUE;
}
-/* Advances the bit pos by n_bits. */
+/* Advances the bit pos by |n_bits|. */
static BROTLI_INLINE void BrotliDropBits(
BrotliBitReader* const br, uint32_t n_bits) {
br->bit_pos_ += n_bits;
@@ -230,7 +233,7 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
}
/* Reads the specified number of bits from |br| and advances the bit pos.
- Precondition: accumulator MUST contain at least n_bits. */
+ Precondition: accumulator MUST contain at least |n_bits|. */
static BROTLI_INLINE void BrotliTakeBits(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
*val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
@@ -259,8 +262,8 @@ static BROTLI_INLINE uint32_t BrotliReadBits(
}
}
-/* Tries to read the specified amount of bits. Returns 0, if there is not
- enough input. n_bits MUST be positive. */
+/* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there
+ is not enough input. |n_bits| MUST be positive. */
static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
while (BrotliGetAvailableBits(br) < n_bits) {
@@ -284,7 +287,7 @@ static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) {
}
/* Copies remaining input bytes stored in the bit reader to the output. Value
- num may not be larger than BrotliGetRemainingBytes. The bit reader must be
+ |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be
warmed up again after this. */
static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
BrotliBitReader* br, size_t num) {
diff --git a/c/dec/decode.c b/c/dec/decode.c
index 846557a..630edeb 100644
--- a/c/dec/decode.c
+++ b/c/dec/decode.c
@@ -14,15 +14,15 @@
#include <string.h> /* memcpy, memset */
#include "../common/constants.h"
+#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
+#include "../common/transform.h"
#include "../common/version.h"
#include "./bit_reader.h"
-#include "./context.h"
#include "./huffman.h"
#include "./prefix.h"
#include "./state.h"
-#include "./transform.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -37,7 +37,7 @@ extern "C" {
(unsigned long)(idx), (unsigned long)array_name[idx]))
#define HUFFMAN_TABLE_BITS 8U
-#define HUFFMAN_TABLE_MASK 0xff
+#define HUFFMAN_TABLE_MASK 0xFF
/* We need the slack region for the following reasons:
- doing up to two 16-byte copies for fast backward copying
@@ -59,11 +59,16 @@ static const uint8_t kCodeLengthPrefixValue[16] = {
BROTLI_BOOL BrotliDecoderSetParameter(
BrotliDecoderState* state, BrotliDecoderParameter p, uint32_t value) {
+ if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
switch (p) {
case BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION:
state->canny_ringbuffer_allocation = !!value ? 0 : 1;
return BROTLI_TRUE;
+ case BROTLI_DECODER_PARAM_LARGE_WINDOW:
+ state->large_window = TO_BROTLI_BOOL(!!value);
+ return BROTLI_TRUE;
+
default: return BROTLI_FALSE;
}
}
@@ -80,8 +85,15 @@ BrotliDecoderState* BrotliDecoderCreateInstance(
BROTLI_DUMP();
return 0;
}
- BrotliDecoderStateInitWithCustomAllocators(
- state, alloc_func, free_func, opaque);
+ if (!BrotliDecoderStateInit(state, alloc_func, free_func, opaque)) {
+ BROTLI_DUMP();
+ if (!alloc_func && !free_func) {
+ free(state);
+ } else if (alloc_func && free_func) {
+ free_func(opaque, state);
+ }
+ return 0;
+ }
return state;
}
@@ -97,39 +109,61 @@ void BrotliDecoderDestroyInstance(BrotliDecoderState* state) {
}
}
-/* Saves error code and converts it to BrotliDecoderResult */
+/* Saves error code and converts it to BrotliDecoderResult. */
static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode(
BrotliDecoderState* s, BrotliDecoderErrorCode e) {
s->error_code = (int)e;
switch (e) {
case BROTLI_DECODER_SUCCESS:
return BROTLI_DECODER_RESULT_SUCCESS;
+
case BROTLI_DECODER_NEEDS_MORE_INPUT:
return BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+
case BROTLI_DECODER_NEEDS_MORE_OUTPUT:
return BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
+
default:
return BROTLI_DECODER_RESULT_ERROR;
}
}
-/* Decodes a number in the range [9..24], by reading 1 - 7 bits.
- Precondition: bit-reader accumulator has at least 7 bits. */
-static uint32_t DecodeWindowBits(BrotliBitReader* br) {
+/* Decodes WBITS by reading 1 - 7 bits, or 0x11 for "Large Window Brotli".
+ Precondition: bit-reader accumulator has at least 8 bits. */
+static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s,
+ BrotliBitReader* br) {
uint32_t n;
+ BROTLI_BOOL large_window = s->large_window;
+ s->large_window = BROTLI_FALSE;
BrotliTakeBits(br, 1, &n);
if (n == 0) {
- return 16;
+ s->window_bits = 16;
+ return BROTLI_DECODER_SUCCESS;
}
BrotliTakeBits(br, 3, &n);
if (n != 0) {
- return 17 + n;
+ s->window_bits = 17 + n;
+ return BROTLI_DECODER_SUCCESS;
}
BrotliTakeBits(br, 3, &n);
+ if (n == 1) {
+ if (large_window) {
+ BrotliTakeBits(br, 1, &n);
+ if (n == 1) {
+ return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+ }
+ s->large_window = BROTLI_TRUE;
+ return BROTLI_DECODER_SUCCESS;
+ } else {
+ return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
+ }
+ }
if (n != 0) {
- return 8 + n;
+ s->window_bits = 8 + n;
+ return BROTLI_DECODER_SUCCESS;
}
- return 17;
+ s->window_bits = 17;
+ return BROTLI_DECODER_SUCCESS;
}
static BROTLI_INLINE void memmove16(uint8_t* dst, uint8_t* src) {
@@ -342,7 +376,7 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
*result = table->value;
return BROTLI_TRUE;
}
- return BROTLI_FALSE; /* No valid bits at all. */
+ return BROTLI_FALSE; /* No valid bits at all. */
}
val = (uint32_t)BrotliGetBitsUnmasked(br);
table += val & HUFFMAN_TABLE_MASK;
@@ -352,11 +386,11 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
*result = table->value;
return BROTLI_TRUE;
} else {
- return BROTLI_FALSE; /* Not enough bits for the first level. */
+ return BROTLI_FALSE; /* Not enough bits for the first level. */
}
}
if (available_bits <= HUFFMAN_TABLE_BITS) {
- return BROTLI_FALSE; /* Not enough bits to move to the second level. */
+ return BROTLI_FALSE; /* Not enough bits to move to the second level. */
}
/* Speculatively drop HUFFMAN_TABLE_BITS. */
@@ -364,7 +398,7 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
available_bits -= HUFFMAN_TABLE_BITS;
table += table->value + val;
if (available_bits < table->bits) {
- return BROTLI_FALSE; /* Not enough bits for the second level. */
+ return BROTLI_FALSE; /* Not enough bits for the second level. */
}
BrotliDropBits(br, HUFFMAN_TABLE_BITS + table->bits);
@@ -428,12 +462,11 @@ static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) {
}
/* Reads (s->symbol + 1) symbols.
- Totally 1..4 symbols are read, 1..10 bits each.
- The list of symbols MUST NOT contain duplicates.
- */
+ Totally 1..4 symbols are read, 1..11 bits each.
+ The list of symbols MUST NOT contain duplicates. */
static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols(
- uint32_t alphabet_size, BrotliDecoderState* s) {
- /* max_bits == 1..10; symbol == 0..3; 1..40 bits will be read. */
+ uint32_t alphabet_size, uint32_t max_symbol, BrotliDecoderState* s) {
+ /* max_bits == 1..11; symbol == 0..3; 1..44 bits will be read. */
BrotliBitReader* br = &s->br;
uint32_t max_bits = Log2Floor(alphabet_size - 1);
uint32_t i = s->sub_loop_counter;
@@ -445,7 +478,7 @@ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols(
s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
- if (v >= alphabet_size) {
+ if (v >= max_symbol) {
return
BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET);
}
@@ -471,14 +504,13 @@ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols(
B) remember code length (if it is not 0)
C) extend corresponding index-chain
D) reduce the Huffman space
- E) update the histogram
- */
+ E) update the histogram */
static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len,
uint32_t* symbol, uint32_t* repeat, uint32_t* space,
uint32_t* prev_code_len, uint16_t* symbol_lists,
uint16_t* code_length_histo, int* next_symbol) {
*repeat = 0;
- if (code_len != 0) { /* code_len == 1..15 */
+ if (code_len != 0) { /* code_len == 1..15 */
symbol_lists[next_symbol[code_len]] = (uint16_t)(*symbol);
next_symbol[code_len] = (int)(*symbol);
*prev_code_len = code_len;
@@ -498,8 +530,7 @@ static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len,
D) For each symbol do the same operations as in ProcessSingleCodeLength
PRECONDITION: code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH or
- code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH
- */
+ code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH */
static BROTLI_INLINE void ProcessRepeatedCodeLength(uint32_t code_len,
uint32_t repeat_delta, uint32_t alphabet_size, uint32_t* symbol,
uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len,
@@ -576,12 +607,12 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths(
BrotliFillBitWindow16(br);
p += BrotliGetBitsUnmasked(br) &
BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
- BrotliDropBits(br, p->bits); /* Use 1..5 bits */
+ BrotliDropBits(br, p->bits); /* Use 1..5 bits. */
code_len = p->value; /* code_len == 0..17 */
if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
ProcessSingleCodeLength(code_len, &symbol, &repeat, &space,
&prev_code_len, symbol_lists, code_length_histo, next_symbol);
- } else { /* code_len == 16..17, extra_bits == 2..3 */
+ } else { /* code_len == 16..17, extra_bits == 2..3 */
uint32_t extra_bits =
(code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) ? 2 : 3;
uint32_t repeat_delta =
@@ -616,13 +647,13 @@ static BrotliDecoderErrorCode SafeReadSymbolCodeLengths(
get_byte = BROTLI_TRUE;
continue;
}
- code_len = p->value; /* code_len == 0..17 */
+ code_len = p->value; /* code_len == 0..17 */
if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
BrotliDropBits(br, p->bits);
ProcessSingleCodeLength(code_len, &s->symbol, &s->repeat, &s->space,
&s->prev_code_len, s->symbol_lists, s->code_length_histo,
s->next_symbol);
- } else { /* code_len == 16..17, extra_bits == 2..3 */
+ } else { /* code_len == 16..17, extra_bits == 2..3 */
uint32_t extra_bits = code_len - 14U;
uint32_t repeat_delta = (bits >> p->bits) & BitMask(extra_bits);
if (available_bits < p->bits + extra_bits) {
@@ -674,7 +705,7 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
++num_codes;
++s->code_length_histo[v];
if (space - 1U >= 32U) {
- /* space is 0 or wrapped around */
+ /* space is 0 or wrapped around. */
break;
}
}
@@ -689,22 +720,22 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
There are 2 scenarios:
A) Huffman code contains only few symbols (1..4). Those symbols are read
directly; their code lengths are defined by the number of symbols.
- For this scenario 4 - 45 bits will be read.
+ For this scenario 4 - 49 bits will be read.
B) 2-phase decoding:
B.1) Small Huffman table is decoded; it is specified with code lengths
encoded with predefined entropy code. 32 - 74 bits are used.
B.2) Decoded table is used to decode code lengths of symbols in resulting
- Huffman table. In worst case 3520 bits are read.
-*/
+ Huffman table. In worst case 3520 bits are read. */
static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
+ uint32_t max_symbol,
HuffmanCode* table,
uint32_t* opt_table_size,
BrotliDecoderState* s) {
BrotliBitReader* br = &s->br;
/* Unnecessary masking, but might be good for safety. */
- alphabet_size &= 0x3ff;
- /* State machine */
+ alphabet_size &= 0x7FF;
+ /* State machine. */
for (;;) {
switch (s->substate_huffman) {
case BROTLI_STATE_HUFFMAN_NONE:
@@ -717,7 +748,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
if (s->sub_loop_counter != 1) {
s->space = 32;
- s->repeat = 0; /* num_codes */
+ s->repeat = 0; /* num_codes */
memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo[0]) *
(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1));
memset(&s->code_length_code_lengths[0], 0,
@@ -729,20 +760,22 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE:
/* Read symbols, codes & code lengths directly. */
- if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */
+ if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */
s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
s->sub_loop_counter = 0;
/* No break, transit to the next state. */
+
case BROTLI_STATE_HUFFMAN_SIMPLE_READ: {
BrotliDecoderErrorCode result =
- ReadSimpleHuffmanSymbols(alphabet_size, s);
+ ReadSimpleHuffmanSymbols(alphabet_size, max_symbol, s);
if (result != BROTLI_DECODER_SUCCESS) {
return result;
}
/* No break, transit to the next state. */
}
+
case BROTLI_STATE_HUFFMAN_SIMPLE_BUILD: {
uint32_t table_size;
if (s->symbol == 3) {
@@ -787,11 +820,12 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
s->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS;
/* No break, transit to the next state. */
}
+
case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: {
uint32_t table_size;
- BrotliDecoderErrorCode result = ReadSymbolCodeLengths(alphabet_size, s);
+ BrotliDecoderErrorCode result = ReadSymbolCodeLengths(max_symbol, s);
if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
- result = SafeReadSymbolCodeLengths(alphabet_size, s);
+ result = SafeReadSymbolCodeLengths(max_symbol, s);
}
if (result != BROTLI_DECODER_SUCCESS) {
return result;
@@ -823,7 +857,7 @@ static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table,
uint32_t code;
uint32_t nbits;
code = ReadSymbol(table, br);
- nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */
+ nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */
return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits);
}
@@ -842,7 +876,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength(
}
{
uint32_t bits;
- uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */
+ uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */
if (!BrotliSafeReadBits(br, nbits, &bits)) {
s->block_length_index = index;
s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX;
@@ -867,8 +901,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength(
of Y values, and reinitialize only first elements in L.
Most of input values are 0 and 1. To reduce number of branches, we replace
- inner for loop with do-while.
- */
+ inner for loop with do-while. */
static BROTLI_NOINLINE void InverseMoveToFrontTransform(
uint8_t* v, uint32_t v_len, BrotliDecoderState* state) {
/* Reinitialize elements that could have been changed. */
@@ -884,7 +917,7 @@ static BROTLI_NOINLINE void InverseMoveToFrontTransform(
/* Initialize list using 4 consequent values pattern. */
mtf[0] = pattern;
do {
- pattern += 0x04040404; /* Advance all 4 values by 4. */
+ pattern += 0x04040404; /* Advance all 4 values by 4. */
mtf[i] = pattern;
i++;
} while (i <= upper_bound);
@@ -917,7 +950,8 @@ static BrotliDecoderErrorCode HuffmanTreeGroupDecode(
while (s->htree_index < group->num_htrees) {
uint32_t table_size;
BrotliDecoderErrorCode result =
- ReadHuffmanCode(group->alphabet_size, s->next, &table_size, s);
+ ReadHuffmanCode(group->alphabet_size, group->max_symbol,
+ s->next, &table_size, s);
if (result != BROTLI_DECODER_SUCCESS) return result;
group->htrees[s->htree_index] = s->next;
s->next += table_size;
@@ -934,8 +968,7 @@ static BrotliDecoderErrorCode HuffmanTreeGroupDecode(
2) Decode Huffman table using ReadHuffmanCode function.
This table will be used for reading context map items.
3) Read context map items; "0" values could be run-length encoded.
- 4) Optionally, apply InverseMoveToFront transform to the resulting map.
- */
+ 4) Optionally, apply InverseMoveToFront transform to the resulting map. */
static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
uint32_t* num_htrees,
uint8_t** context_map_arg,
@@ -964,6 +997,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
}
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX;
/* No break, continue to next state. */
+
case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: {
uint32_t bits;
/* In next stage ReadHuffmanCode uses at least 4 bits, so it is safe
@@ -982,13 +1016,17 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN;
/* No break, continue to next state. */
}
- case BROTLI_STATE_CONTEXT_MAP_HUFFMAN:
- result = ReadHuffmanCode(*num_htrees + s->max_run_length_prefix,
+
+ case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: {
+ uint32_t alphabet_size = *num_htrees + s->max_run_length_prefix;
+ result = ReadHuffmanCode(alphabet_size, alphabet_size,
s->context_map_table, NULL, s);
if (result != BROTLI_DECODER_SUCCESS) return result;
s->code = 0xFFFF;
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE;
/* No break, continue to next state. */
+ }
+
case BROTLI_STATE_CONTEXT_MAP_DECODE: {
uint32_t context_index = s->context_index;
uint32_t max_run_length_prefix = s->max_run_length_prefix;
@@ -1037,6 +1075,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
}
/* No break, continue to next state. */
}
+
case BROTLI_STATE_CONTEXT_MAP_TRANSFORM: {
uint32_t bits;
if (!BrotliSafeReadBits(br, 1, &bits)) {
@@ -1049,6 +1088,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
return BROTLI_DECODER_SUCCESS;
}
+
default:
return
BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE);
@@ -1067,8 +1107,11 @@ static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength(
BrotliBitReader* br = &s->br;
uint32_t* ringbuffer = &s->block_type_rb[tree_type * 2];
uint32_t block_type;
+ if (max_block_type <= 1) {
+ return BROTLI_FALSE;
+ }
- /* Read 0..15 + 3..39 bits */
+ /* Read 0..15 + 3..39 bits. */
if (!safe) {
block_type = ReadSymbol(type_tree, br);
s->block_length[tree_type] = ReadBlockLength(len_tree, br);
@@ -1125,9 +1168,8 @@ static BROTLI_INLINE void PrepareLiteralDecoding(BrotliDecoderState* s) {
trivial = s->trivial_literal_contexts[block_type >> 5];
s->trivial_literal_context = (trivial >> (block_type & 31)) & 1;
s->literal_htree = s->literal_hgroup.htrees[s->context_map_slice[0]];
- context_mode = s->context_modes[block_type];
- s->context_lookup1 = &kContextLookup[kContextLookupOffsets[context_mode]];
- s->context_lookup2 = &kContextLookup[kContextLookupOffsets[context_mode + 1]];
+ context_mode = s->context_modes[block_type] & 3;
+ s->context_lookup = BROTLI_CONTEXT_LUT(context_mode);
}
/* Decodes the block type and updates the state for literal context.
@@ -1164,6 +1206,7 @@ static BROTLI_INLINE BROTLI_BOOL DecodeCommandBlockSwitchInternal(
static void BROTLI_NOINLINE DecodeCommandBlockSwitch(BrotliDecoderState* s) {
DecodeCommandBlockSwitchInternal(0, s);
}
+
static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeCommandBlockSwitch(
BrotliDecoderState* s) {
return DecodeCommandBlockSwitchInternal(1, s);
@@ -1200,8 +1243,7 @@ static size_t UnwrittenBytes(const BrotliDecoderState* s, BROTLI_BOOL wrap) {
/* Dumps output.
Returns BROTLI_DECODER_NEEDS_MORE_OUTPUT only if there is more output to push
- and either ring-buffer is as big as window size, or |force| is true.
- */
+ and either ring-buffer is as big as window size, or |force| is true. */
static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer(
BrotliDecoderState* s, size_t* available_out, uint8_t** next_out,
size_t* total_out, BROTLI_BOOL force) {
@@ -1260,8 +1302,7 @@ static void BROTLI_NOINLINE WrapRingBuffer(BrotliDecoderState* s) {
this function is called.
Last two bytes of ring-buffer are initialized to 0, so context calculation
- could be done uniformly for the first two and all other positions.
-*/
+ could be done uniformly for the first two and all other positions. */
static BROTLI_BOOL BROTLI_NOINLINE BrotliEnsureRingBuffer(
BrotliDecoderState* s) {
uint8_t* old_ringbuffer = s->ringbuffer;
@@ -1321,8 +1362,9 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
}
+
case BROTLI_STATE_UNCOMPRESSED_WRITE: {
BrotliDecoderErrorCode result;
result = WriteRingBuffer(
@@ -1346,8 +1388,7 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
If we know the data size is small, do not allocate more ring buffer
size than needed to reduce memory usage.
- When this method is called, metablock size and flags MUST be decoded.
-*/
+ When this method is called, metablock size and flags MUST be decoded. */
static void BROTLI_NOINLINE BrotliCalculateRingBufferSize(
BrotliDecoderState* s) {
int window_size = 1 << s->window_bits;
@@ -1378,7 +1419,7 @@ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize(
if (!!s->canny_ringbuffer_allocation) {
/* Reduce ring buffer size to save memory when server is unscrupulous.
In worst case memory usage might be 1.5x bigger for a short period of
- ring buffer reallocation.*/
+ ring buffer reallocation. */
while ((new_ringbuffer_size >> 1) >= min_size) {
new_ringbuffer_size >>= 1;
}
@@ -1398,7 +1439,7 @@ static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) {
s->loop_counter = i;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
- s->context_modes[i] = (uint8_t)(bits << 1);
+ s->context_modes[i] = (uint8_t)bits;
BROTLI_LOG_ARRAY_INDEX(s->context_modes, i);
i++;
}
@@ -1413,12 +1454,12 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) {
s->distance_context = 1;
} else {
int distance_code = s->distance_code << 1;
- /* kDistanceShortCodeIndexOffset has 2-bit values from LSB: */
- /* 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */
- const uint32_t kDistanceShortCodeIndexOffset = 0xaaafff1b;
- /* kDistanceShortCodeValueOffset has 2-bit values from LSB: */
- /*-0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */
- const uint32_t kDistanceShortCodeValueOffset = 0xfa5fa500;
+ /* kDistanceShortCodeIndexOffset has 2-bit values from LSB:
+ 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */
+ const uint32_t kDistanceShortCodeIndexOffset = 0xAAAFFF1B;
+ /* kDistanceShortCodeValueOffset has 2-bit values from LSB:
+ -0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */
+ const uint32_t kDistanceShortCodeValueOffset = 0xFA5FA500;
int v = (s->dist_rb_idx +
(int)(kDistanceShortCodeIndexOffset >> distance_code)) & 0x3;
s->distance_code = s->dist_rb[v];
@@ -1428,9 +1469,9 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) {
} else {
s->distance_code -= v;
if (s->distance_code <= 0) {
- /* A huge distance will cause a BROTLI_FAILURE() soon. */
- /* This is a little faster than failing here. */
- s->distance_code = 0x0fffffff;
+ /* A huge distance will cause a BROTLI_FAILURE() soon.
+ This is a little faster than failing here. */
+ s->distance_code = 0x7FFFFFFF;
}
}
}
@@ -1446,7 +1487,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBits(
}
}
-/* Precondition: s->distance_code < 0 */
+/* Precondition: s->distance_code < 0. */
static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
int safe, BrotliDecoderState* s, BrotliBitReader* br) {
int distval;
@@ -1462,10 +1503,10 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
}
s->distance_code = (int)code;
}
- /* Convert the distance code to the actual distance by possibly */
- /* looking up past distances from the s->ringbuffer. */
+ /* Convert the distance code to the actual distance by possibly
+ looking up past distances from the s->ringbuffer. */
s->distance_context = 0;
- if ((s->distance_code & ~0xf) == 0) {
+ if ((s->distance_code & ~0xF) == 0) {
TakeDistanceFromRingBuffer(s);
--s->block_length[2];
return BROTLI_TRUE;
@@ -1481,14 +1522,14 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
s->distance_code = (int)s->num_direct_distance_codes + offset +
(int)BrotliReadBits(br, nbits);
} else {
- /* This branch also works well when s->distance_postfix_bits == 0 */
+ /* This branch also works well when s->distance_postfix_bits == 0. */
uint32_t bits;
postfix = distval & s->distance_postfix_mask;
distval >>= s->distance_postfix_bits;
nbits = ((uint32_t)distval >> 1) + 1;
if (safe) {
if (!SafeReadBits(br, nbits, &bits)) {
- s->distance_code = -1; /* Restore precondition. */
+ s->distance_code = -1; /* Restore precondition. */
BrotliBitReaderRestoreState(br, &memento);
return BROTLI_FALSE;
}
@@ -1615,7 +1656,7 @@ CommandBegin:
if (safe) {
s->state = BROTLI_STATE_COMMAND_BEGIN;
}
- if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */
+ if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */
s->state = BROTLI_STATE_COMMAND_BEGIN;
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
@@ -1624,7 +1665,7 @@ CommandBegin:
BROTLI_SAFE(DecodeCommandBlockSwitch(s));
goto CommandBegin;
}
- /* Read the insert/copy length in the command */
+ /* Read the insert/copy length in the command. */
BROTLI_SAFE(ReadCommand(s, br, &i));
BROTLI_LOG(("[ProcessCommandsInternal] pos = %d insert = %d copy = %d\n",
pos, i, s->copy_length));
@@ -1637,13 +1678,13 @@ CommandInner:
if (safe) {
s->state = BROTLI_STATE_COMMAND_INNER;
}
- /* Read the literals in the command */
+ /* Read the literals in the command. */
if (s->trivial_literal_context) {
uint32_t bits;
uint32_t value;
PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
do {
- if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
+ if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
s->state = BROTLI_STATE_COMMAND_INNER;
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
@@ -1679,7 +1720,7 @@ CommandInner:
do {
const HuffmanCode* hc;
uint8_t context;
- if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
+ if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
s->state = BROTLI_STATE_COMMAND_INNER;
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
@@ -1688,7 +1729,7 @@ CommandInner:
BROTLI_SAFE(DecodeLiteralBlockSwitch(s));
if (s->trivial_literal_context) goto CommandInner;
}
- context = s->context_lookup1[p1] | s->context_lookup2[p2];
+ context = BROTLI_CONTEXT(p1, p2, s->context_lookup);
BROTLI_LOG_UINT(context);
hc = s->literal_hgroup.htrees[s->context_map_slice[context]];
p2 = p1;
@@ -1744,14 +1785,25 @@ CommandPostDecodeLiterals:
}
i = s->copy_length;
/* Apply copy of LZ77 back-reference, or static dictionary reference if
- the distance is larger than the max LZ77 distance */
+ the distance is larger than the max LZ77 distance */
if (s->distance_code > s->max_distance) {
- int address = s->distance_code - s->max_distance - 1;
+ /* The maximum allowed distance is BROTLI_MAX_ALLOWED_DISTANCE = 0x7FFFFFFC.
+ With this choice, no signed overflow can occur after decoding
+ a special distance code (e.g., after adding 3 to the last distance). */
+ if (s->distance_code > BROTLI_MAX_ALLOWED_DISTANCE) {
+ BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+ "len: %d bytes left: %d\n",
+ pos, s->distance_code, i, s->meta_block_remaining_len));
+ return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DISTANCE);
+ }
if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
+ int address = s->distance_code - s->max_distance - 1;
const BrotliDictionary* words = s->dictionary;
+ const BrotliTransforms* transforms = s->transforms;
int offset = (int)s->dictionary->offsets_by_length[i];
uint32_t shift = s->dictionary->size_bits_by_length[i];
+
int mask = (int)BitMask(shift);
int word_idx = address & mask;
int transform_idx = address >> shift;
@@ -1761,16 +1813,16 @@ CommandPostDecodeLiterals:
if (BROTLI_PREDICT_FALSE(!words->data)) {
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET);
}
- if (transform_idx < kNumTransforms) {
+ if (transform_idx < (int)transforms->num_transforms) {
const uint8_t* word = &words->data[offset];
int len = i;
- if (transform_idx == 0) {
+ if (transform_idx == transforms->cutOffTransforms[0]) {
memcpy(&s->ringbuffer[pos], word, (size_t)len);
BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]\n",
len, word));
} else {
len = BrotliTransformDictionaryWord(&s->ringbuffer[pos], word, len,
- transform_idx);
+ transforms, transform_idx);
BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s],"
" transform_idx = %d, transformed: [%.*s]\n",
i, word, transform_idx, len, &s->ringbuffer[pos]));
@@ -1778,7 +1830,6 @@ CommandPostDecodeLiterals:
pos += len;
s->meta_block_remaining_len -= len;
if (pos >= s->ringbuffer_size) {
- /*s->partial_pos_rb += (size_t)s->ringbuffer_size;*/
s->state = BROTLI_STATE_COMMAND_POST_WRITE_1;
goto saveStateAndReturn;
}
@@ -1800,14 +1851,13 @@ CommandPostDecodeLiterals:
uint8_t* copy_src = &s->ringbuffer[src_start];
int dst_end = pos + i;
int src_end = src_start + i;
- /* update the recent distances cache */
+ /* Update the recent distances cache. */
s->dist_rb[s->dist_rb_idx & 3] = s->distance_code;
++s->dist_rb_idx;
s->meta_block_remaining_len -= i;
/* There are 32+ bytes of slack in the ring-buffer allocation.
Also, we have 16 short codes, that make these 16 bytes irrelevant
- in the ring-buffer. Let's copy over them as a first guess.
- */
+ in the ring-buffer. Let's copy over them as a first guess. */
memmove16(copy_dst, copy_src);
if (src_end > pos && dst_end > src_start) {
/* Regions intersect. */
@@ -1830,7 +1880,7 @@ CommandPostDecodeLiterals:
}
BROTLI_LOG_UINT(s->meta_block_remaining_len);
if (s->meta_block_remaining_len <= 0) {
- /* Next metablock, if any */
+ /* Next metablock, if any. */
s->state = BROTLI_STATE_METABLOCK_DONE;
goto saveStateAndReturn;
} else {
@@ -1850,7 +1900,7 @@ CommandPostWrapCopy:
}
}
if (s->meta_block_remaining_len <= 0) {
- /* Next metablock, if any */
+ /* Next metablock, if any. */
s->state = BROTLI_STATE_METABLOCK_DONE;
goto saveStateAndReturn;
} else {
@@ -1875,6 +1925,21 @@ static BROTLI_NOINLINE BrotliDecoderErrorCode SafeProcessCommands(
return ProcessCommandsInternal(1, s);
}
+/* Returns the maximum number of distance symbols which can only represent
+ distances not exceeding BROTLI_MAX_ALLOWED_DISTANCE. */
+static uint32_t BrotliMaxDistanceSymbol(uint32_t ndirect, uint32_t npostfix) {
+ static const uint32_t bound[BROTLI_MAX_NPOSTFIX + 1] = {0, 4, 12, 28};
+ static const uint32_t diff[BROTLI_MAX_NPOSTFIX + 1] = {73, 126, 228, 424};
+ uint32_t postfix = 1U << npostfix;
+ if (ndirect < bound[npostfix]) {
+ return ndirect + diff[npostfix] + postfix;
+ } else if (ndirect > bound[npostfix] + postfix) {
+ return ndirect + diff[npostfix];
+ } else {
+ return bound[npostfix] + diff[npostfix] + postfix;
+ }
+}
+
BrotliDecoderResult BrotliDecoderDecompress(
size_t encoded_size, const uint8_t* encoded_buffer, size_t* decoded_size,
uint8_t* decoded_buffer) {
@@ -1885,7 +1950,9 @@ BrotliDecoderResult BrotliDecoderDecompress(
const uint8_t* next_in = encoded_buffer;
size_t available_out = *decoded_size;
uint8_t* next_out = decoded_buffer;
- BrotliDecoderStateInit(&s);
+ if (!BrotliDecoderStateInit(&s, 0, 0, 0)) {
+ return BROTLI_DECODER_RESULT_ERROR;
+ }
result = BrotliDecoderDecompressStream(
&s, &available_in, &next_in, &available_out, &next_out, &total_out);
*decoded_size = total_out;
@@ -1897,23 +1964,22 @@ BrotliDecoderResult BrotliDecoderDecompress(
}
/* Invariant: input stream is never overconsumed:
- * invalid input implies that the whole stream is invalid -> any amount of
+ - invalid input implies that the whole stream is invalid -> any amount of
input could be read and discarded
- * when result is "needs more input", then at least one more byte is REQUIRED
+ - when result is "needs more input", then at least one more byte is REQUIRED
to complete decoding; all input data MUST be consumed by decoder, so
client could swap the input buffer
- * when result is "needs more output" decoder MUST ensure that it doesn't
+ - when result is "needs more output" decoder MUST ensure that it doesn't
hold more than 7 bits in bit reader; this saves client from swapping input
buffer ahead of time
- * when result is "success" decoder MUST return all unused data back to input
- buffer; this is possible because the invariant is hold on enter
-*/
+ - when result is "success" decoder MUST return all unused data back to input
+ buffer; this is possible because the invariant is held on enter */
BrotliDecoderResult BrotliDecoderDecompressStream(
BrotliDecoderState* s, size_t* available_in, const uint8_t** next_in,
size_t* available_out, uint8_t** next_out, size_t* total_out) {
BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
BrotliBitReader* br = &s->br;
- /* Ensure that *total_out is set, even if no data will ever be pushed out. */
+ /* Ensure that |total_out| is set, even if no data will ever be pushed out. */
if (total_out) {
*total_out = s->partial_pos_out;
}
@@ -1926,7 +1992,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS));
}
if (!*available_out) next_out = 0;
- if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */
+ if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */
br->avail_in = *available_in;
br->next_in = *next_in;
} else {
@@ -1938,9 +2004,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
/* State machine */
for (;;) {
- if (result != BROTLI_DECODER_SUCCESS) { /* Error, needs more input/output */
+ if (result != BROTLI_DECODER_SUCCESS) {
+ /* Error, needs more input/output. */
if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
- if (s->ringbuffer != 0) { /* Pro-actively push output. */
+ if (s->ringbuffer != 0) { /* Pro-actively push output. */
BrotliDecoderErrorCode intermediate_result = WriteRingBuffer(s,
available_out, next_out, total_out, BROTLI_TRUE);
/* WriteRingBuffer checks s->meta_block_remaining_len validity. */
@@ -1949,9 +2016,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
break;
}
}
- if (s->buffer_length != 0) { /* Used with internal buffer. */
- if (br->avail_in == 0) { /* Successfully finished read transaction. */
- /* Accumulator contains less than 8 bits, because internal buffer
+ if (s->buffer_length != 0) { /* Used with internal buffer. */
+ if (br->avail_in == 0) {
+ /* Successfully finished read transaction.
+ Accumulator contains less than 8 bits, because internal buffer
is expanded byte-by-byte until it is enough to complete read. */
s->buffer_length = 0;
/* Switch to input stream and restart. */
@@ -1971,9 +2039,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
/* Retry with more data in buffer. */
continue;
}
- /* Can't finish reading and no more input.*/
+ /* Can't finish reading and no more input. */
break;
- } else { /* Input stream doesn't contain enough input. */
+ } else { /* Input stream doesn't contain enough input. */
/* Copy tail to internal buffer and return. */
*next_in = br->next_in;
*available_in = br->avail_in;
@@ -1992,7 +2060,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
if (s->buffer_length != 0) {
/* Just consumed the buffered input and produced some output. Otherwise
- it would result in "needs more input". Reset internal buffer.*/
+ it would result in "needs more input". Reset internal buffer. */
s->buffer_length = 0;
} else {
/* Using input stream in last iteration. When decoder switches to input
@@ -2012,13 +2080,32 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
break;
}
/* Decode window size. */
- s->window_bits = DecodeWindowBits(br); /* Reads 1..7 bits. */
- BROTLI_LOG_UINT(s->window_bits);
- if (s->window_bits == 9) {
- /* Value 9 is reserved for future use. */
+ result = DecodeWindowBits(s, br); /* Reads 1..8 bits. */
+ if (result != BROTLI_DECODER_SUCCESS) {
+ break;
+ }
+ if (s->large_window) {
+ s->state = BROTLI_STATE_LARGE_WINDOW_BITS;
+ break;
+ }
+ s->state = BROTLI_STATE_INITIALIZE;
+ break;
+
+ case BROTLI_STATE_LARGE_WINDOW_BITS:
+ if (!BrotliSafeReadBits(br, 6, &s->window_bits)) {
+ result = BROTLI_DECODER_NEEDS_MORE_INPUT;
+ break;
+ }
+ if (s->window_bits < BROTLI_LARGE_MIN_WBITS ||
+ s->window_bits > BROTLI_LARGE_MAX_WBITS) {
result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS);
break;
}
+ s->state = BROTLI_STATE_INITIALIZE;
+ /* No break, continue to next state */
+
+ case BROTLI_STATE_INITIALIZE:
+ BROTLI_LOG_UINT(s->window_bits);
/* Maximum distance, see section 9.1. of the spec. */
s->max_backward_distance = (1 << s->window_bits) - BROTLI_WINDOW_GAP;
@@ -2034,14 +2121,16 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->block_type_trees + 3 * BROTLI_HUFFMAN_MAX_SIZE_258;
s->state = BROTLI_STATE_METABLOCK_BEGIN;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
+
case BROTLI_STATE_METABLOCK_BEGIN:
BrotliDecoderStateMetablockBegin(s);
BROTLI_LOG_UINT(s->pos);
s->state = BROTLI_STATE_METABLOCK_HEADER;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
+
case BROTLI_STATE_METABLOCK_HEADER:
- result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */
+ result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */
if (result != BROTLI_DECODER_SUCCESS) {
break;
}
@@ -2071,6 +2160,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->loop_counter = 0;
s->state = BROTLI_STATE_HUFFMAN_CODE_0;
break;
+
case BROTLI_STATE_UNCOMPRESSED: {
result = CopyUncompressedBlockToOutput(
available_out, next_out, total_out, s);
@@ -2080,6 +2170,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_METABLOCK_DONE;
break;
}
+
case BROTLI_STATE_METADATA:
for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) {
uint32_t bits;
@@ -2093,6 +2184,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_METABLOCK_DONE;
}
break;
+
case BROTLI_STATE_HUFFMAN_CODE_0:
if (s->loop_counter >= 3) {
s->state = BROTLI_STATE_METABLOCK_HEADER_2;
@@ -2110,23 +2202,28 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
break;
}
s->state = BROTLI_STATE_HUFFMAN_CODE_1;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
+
case BROTLI_STATE_HUFFMAN_CODE_1: {
+ uint32_t alphabet_size = s->num_block_types[s->loop_counter] + 2;
int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_258;
- result = ReadHuffmanCode(s->num_block_types[s->loop_counter] + 2,
+ result = ReadHuffmanCode(alphabet_size, alphabet_size,
&s->block_type_trees[tree_offset], NULL, s);
if (result != BROTLI_DECODER_SUCCESS) break;
s->state = BROTLI_STATE_HUFFMAN_CODE_2;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
}
+
case BROTLI_STATE_HUFFMAN_CODE_2: {
+ uint32_t alphabet_size = BROTLI_NUM_BLOCK_LEN_SYMBOLS;
int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
- result = ReadHuffmanCode(BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+ result = ReadHuffmanCode(alphabet_size, alphabet_size,
&s->block_len_trees[tree_offset], NULL, s);
if (result != BROTLI_DECODER_SUCCESS) break;
s->state = BROTLI_STATE_HUFFMAN_CODE_3;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
}
+
case BROTLI_STATE_HUFFMAN_CODE_3: {
int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter],
@@ -2139,6 +2236,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_HUFFMAN_CODE_0;
break;
}
+
case BROTLI_STATE_METABLOCK_HEADER_2: {
uint32_t bits;
if (!BrotliSafeReadBits(br, 6, &bits)) {
@@ -2160,15 +2258,17 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
s->loop_counter = 0;
s->state = BROTLI_STATE_CONTEXT_MODES;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
}
+
case BROTLI_STATE_CONTEXT_MODES:
result = ReadContextModes(s);
if (result != BROTLI_DECODER_SUCCESS) {
break;
}
s->state = BROTLI_STATE_CONTEXT_MAP_1;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
+
case BROTLI_STATE_CONTEXT_MAP_1:
result = DecodeContextMap(
s->num_block_types[0] << BROTLI_LITERAL_CONTEXT_BITS,
@@ -2178,54 +2278,54 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
DetectTrivialLiteralBlockTypes(s);
s->state = BROTLI_STATE_CONTEXT_MAP_2;
- /* No break, continue to next state */
- case BROTLI_STATE_CONTEXT_MAP_2:
- {
- uint32_t num_distance_codes = s->num_direct_distance_codes +
- ((2 * BROTLI_MAX_DISTANCE_BITS) << s->distance_postfix_bits);
- BROTLI_BOOL allocation_success = BROTLI_TRUE;
- result = DecodeContextMap(
- s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS,
- &s->num_dist_htrees, &s->dist_context_map, s);
- if (result != BROTLI_DECODER_SUCCESS) {
- break;
- }
- allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
- s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS,
- s->num_literal_htrees);
- allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
- s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS,
- s->num_block_types[1]);
- allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
- s, &s->distance_hgroup, num_distance_codes,
- s->num_dist_htrees);
- if (!allocation_success) {
- return SaveErrorCode(s,
- BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS));
- }
+ /* No break, continue to next state. */
+
+ case BROTLI_STATE_CONTEXT_MAP_2: {
+ uint32_t num_direct_codes =
+ s->num_direct_distance_codes - BROTLI_NUM_DISTANCE_SHORT_CODES;
+ uint32_t num_distance_codes = BROTLI_DISTANCE_ALPHABET_SIZE(
+ num_direct_codes, s->distance_postfix_bits,
+ (s->large_window ? BROTLI_LARGE_MAX_DISTANCE_BITS :
+ BROTLI_MAX_DISTANCE_BITS));
+ uint32_t max_distance_symbol = (s->large_window ?
+ BrotliMaxDistanceSymbol(
+ num_direct_codes, s->distance_postfix_bits) :
+ num_distance_codes);
+ BROTLI_BOOL allocation_success = BROTLI_TRUE;
+ result = DecodeContextMap(
+ s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS,
+ &s->num_dist_htrees, &s->dist_context_map, s);
+ if (result != BROTLI_DECODER_SUCCESS) {
+ break;
+ }
+ allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+ s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS,
+ BROTLI_NUM_LITERAL_SYMBOLS, s->num_literal_htrees);
+ allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+ s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS,
+ BROTLI_NUM_COMMAND_SYMBOLS, s->num_block_types[1]);
+ allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
+ s, &s->distance_hgroup, num_distance_codes,
+ max_distance_symbol, s->num_dist_htrees);
+ if (!allocation_success) {
+ return SaveErrorCode(s,
+ BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS));
}
s->loop_counter = 0;
s->state = BROTLI_STATE_TREE_GROUP;
- /* No break, continue to next state */
- case BROTLI_STATE_TREE_GROUP:
- {
- HuffmanTreeGroup* hgroup = NULL;
- switch (s->loop_counter) {
- case 0:
- hgroup = &s->literal_hgroup;
- break;
- case 1:
- hgroup = &s->insert_copy_hgroup;
- break;
- case 2:
- hgroup = &s->distance_hgroup;
- break;
- default:
- return SaveErrorCode(s, BROTLI_FAILURE(
- BROTLI_DECODER_ERROR_UNREACHABLE));
- }
- result = HuffmanTreeGroupDecode(hgroup, s);
+ /* No break, continue to next state. */
+ }
+
+ case BROTLI_STATE_TREE_GROUP: {
+ HuffmanTreeGroup* hgroup = NULL;
+ switch (s->loop_counter) {
+ case 0: hgroup = &s->literal_hgroup; break;
+ case 1: hgroup = &s->insert_copy_hgroup; break;
+ case 2: hgroup = &s->distance_hgroup; break;
+ default: return SaveErrorCode(s, BROTLI_FAILURE(
+ BROTLI_DECODER_ERROR_UNREACHABLE));
}
+ result = HuffmanTreeGroupDecode(hgroup, s);
if (result != BROTLI_DECODER_SUCCESS) break;
s->loop_counter++;
if (s->loop_counter >= 3) {
@@ -2239,6 +2339,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_COMMAND_BEGIN;
}
break;
+ }
+
case BROTLI_STATE_COMMAND_BEGIN:
case BROTLI_STATE_COMMAND_INNER:
case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS:
@@ -2248,6 +2350,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
result = SafeProcessCommands(s);
}
break;
+
case BROTLI_STATE_COMMAND_INNER_WRITE:
case BROTLI_STATE_COMMAND_POST_WRITE_1:
case BROTLI_STATE_COMMAND_POST_WRITE_2:
@@ -2262,7 +2365,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) {
if (s->meta_block_remaining_len == 0) {
- /* Next metablock, if any */
+ /* Next metablock, if any. */
s->state = BROTLI_STATE_METABLOCK_DONE;
} else {
s->state = BROTLI_STATE_COMMAND_BEGIN;
@@ -2282,6 +2385,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_COMMAND_INNER;
}
break;
+
case BROTLI_STATE_METABLOCK_DONE:
if (s->meta_block_remaining_len < 0) {
result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2);
@@ -2302,7 +2406,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
*next_in = br->next_in;
}
s->state = BROTLI_STATE_DONE;
- /* No break, continue to next state */
+ /* No break, continue to next state. */
+
case BROTLI_STATE_DONE:
if (s->ringbuffer != 0) {
result = WriteRingBuffer(
diff --git a/c/dec/huffman.c b/c/dec/huffman.c
index 4fe7bfa..f142442 100644
--- a/c/dec/huffman.c
+++ b/c/dec/huffman.c
@@ -86,9 +86,9 @@ static BROTLI_INLINE void ReplicateValue(HuffmanCode* table,
} while (end > 0);
}
-/* Returns the table width of the next 2nd level table. count is the histogram
- of bit lengths for the remaining symbols, len is the code length of the next
- processed symbol */
+/* Returns the table width of the next 2nd level table. |count| is the histogram
+ of bit lengths for the remaining symbols, |len| is the code length of the
+ next processed symbol. */
static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count,
int len, int root_bits) {
int left = 1 << (len - root_bits);
@@ -118,7 +118,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <=
BROTLI_REVERSE_BITS_MAX);
- /* generate offsets into sorted symbol table by code length */
+ /* Generate offsets into sorted symbol table by code length. */
symbol = -1;
bits = 1;
BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, {
@@ -129,7 +129,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
/* Symbols with code length 0 are placed after all other symbols. */
offset[0] = BROTLI_CODE_LENGTH_CODES - 1;
- /* sort symbols by length, by symbol order within each length */
+ /* Sort symbols by length, by symbol order within each length. */
symbol = BROTLI_CODE_LENGTH_CODES;
do {
BROTLI_REPEAT(6, {
@@ -150,7 +150,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
return;
}
- /* fill in table */
+ /* Fill in table. */
key = 0;
key_step = BROTLI_REVERSE_BITS_LOWEST;
symbol = 0;
@@ -200,9 +200,8 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
table_size = 1 << table_bits;
total_size = table_size;
- /* fill in root table */
- /* let's reduce the table size to a smaller size if possible, and */
- /* create the repetitions by memcpy if possible in the coming loop */
+ /* Fill in the root table. Reduce the table size to if possible,
+ and create the repetitions by memcpy. */
if (table_bits > max_length) {
table_bits = max_length;
table_size = 1 << table_bits;
@@ -224,15 +223,14 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
key_step >>= 1;
} while (++bits <= table_bits);
- /* if root_bits != table_bits we only created one fraction of the */
- /* table, and we need to replicate it now. */
+ /* If root_bits != table_bits then replicate to fill the remaining slots. */
while (total_size != table_size) {
memcpy(&table[table_size], &table[0],
(size_t)table_size * sizeof(table[0]));
table_size <<= 1;
}
- /* fill in 2nd level tables and add pointers to root table */
+ /* Fill in 2nd level tables and add pointers to root table. */
key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1);
sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1);
sub_key_step = BROTLI_REVERSE_BITS_LOWEST;
diff --git a/c/dec/huffman.h b/c/dec/huffman.h
index 730af88..521ec6e 100644
--- a/c/dec/huffman.h
+++ b/c/dec/huffman.h
@@ -19,10 +19,11 @@ extern "C" {
#define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15
/* Maximum possible Huffman table size for an alphabet size of (index * 32),
- * max code length 15 and root table bits 8. */
+ max code length 15 and root table bits 8. */
static const uint16_t kMaxHuffmanTableSize[] = {
256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822,
- 854, 886, 920, 952, 984, 1016, 1048, 1080};
+ 854, 886, 920, 952, 984, 1016, 1048, 1080, 1112, 1144, 1176, 1208, 1240, 1272,
+ 1304, 1336, 1368, 1400, 1432, 1464, 1496, 1528};
/* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */
#define BROTLI_HUFFMAN_MAX_SIZE_26 396
/* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */
@@ -41,23 +42,26 @@ typedef struct {
BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table,
const uint8_t* const code_lengths, uint16_t* count);
-/* Builds Huffman lookup table assuming code lengths are in symbol order. */
-/* Returns size of resulting table. */
+/* Builds Huffman lookup table assuming code lengths are in symbol order.
+ Returns size of resulting table. */
BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
int root_bits, const uint16_t* const symbol_lists, uint16_t* count_arg);
-/* Builds a simple Huffman table. The num_symbols parameter is to be */
-/* interpreted as follows: 0 means 1 symbol, 1 means 2 symbols, 2 means 3 */
-/* symbols, 3 means 4 symbols with lengths 2,2,2,2, 4 means 4 symbols with */
-/* lengths 1,2,3,3. */
+/* Builds a simple Huffman table. The |num_symbols| parameter is to be
+ interpreted as follows: 0 means 1 symbol, 1 means 2 symbols,
+ 2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2],
+ 4 means 4 symbols with lengths [1, 2, 3, 3]. */
BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
int root_bits, uint16_t* symbols, uint32_t num_symbols);
/* Contains a collection of Huffman trees with the same alphabet size. */
+/* max_symbol is needed due to simple codes since log2(alphabet_size) could be
+ greater than log2(max_symbol). */
typedef struct {
HuffmanCode** htrees;
HuffmanCode* codes;
uint16_t alphabet_size;
+ uint16_t max_symbol;
uint16_t num_htrees;
} HuffmanTreeGroup;
diff --git a/c/dec/prefix.h b/c/dec/prefix.h
index aa776c7..3ea062d 100644
--- a/c/dec/prefix.h
+++ b/c/dec/prefix.h
@@ -5,8 +5,7 @@
*/
/* Lookup tables to map prefix codes to value ranges. This is used during
- decoding of the block lengths, literal insertion lengths and copy lengths.
-*/
+ decoding of the block lengths, literal insertion lengths and copy lengths. */
#ifndef BROTLI_DEC_PREFIX_H_
#define BROTLI_DEC_PREFIX_H_
@@ -14,8 +13,8 @@
#include "../common/constants.h"
#include <brotli/types.h>
-/* Represents the range of values belonging to a prefix code: */
-/* [offset, offset + 2^nbits) */
+/* Represents the range of values belonging to a prefix code:
+ [offset, offset + 2^nbits) */
struct PrefixCodeRange {
uint16_t offset;
uint8_t nbits;
diff --git a/c/dec/state.c b/c/dec/state.c
index eaec823..e0b37c2 100644
--- a/c/dec/state.c
+++ b/c/dec/state.c
@@ -15,25 +15,11 @@
extern "C" {
#endif
-static void* DefaultAllocFunc(void* opaque, size_t size) {
- BROTLI_UNUSED(opaque);
- return malloc(size);
-}
-
-static void DefaultFreeFunc(void* opaque, void* address) {
- BROTLI_UNUSED(opaque);
- free(address);
-}
-
-void BrotliDecoderStateInit(BrotliDecoderState* s) {
- BrotliDecoderStateInitWithCustomAllocators(s, 0, 0, 0);
-}
-
-void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s,
+BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
if (!alloc_func) {
- s->alloc_func = DefaultAllocFunc;
- s->free_func = DefaultFreeFunc;
+ s->alloc_func = BrotliDefaultAllocFunc;
+ s->free_func = BrotliDefaultFreeFunc;
s->memory_manager_opaque = 0;
} else {
s->alloc_func = alloc_func;
@@ -45,6 +31,7 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s,
BrotliInitBitReader(&s->br);
s->state = BROTLI_STATE_UNINITED;
+ s->large_window = 0;
s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
@@ -103,13 +90,16 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s,
s->mtf_upper_bound = 63;
s->dictionary = BrotliGetDictionary();
+ s->transforms = BrotliGetTransforms();
+
+ return BROTLI_TRUE;
}
void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) {
s->meta_block_remaining_len = 0;
- s->block_length[0] = 1U << 28;
- s->block_length[1] = 1U << 28;
- s->block_length[2] = 1U << 28;
+ s->block_length[0] = 1U << 24;
+ s->block_length[1] = 1U << 24;
+ s->block_length[2] = 1U << 24;
s->num_block_types[0] = 1;
s->num_block_types[1] = 1;
s->num_block_types[2] = 1;
@@ -126,8 +116,7 @@ void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) {
s->literal_htree = NULL;
s->dist_context_map_slice = NULL;
s->dist_htree_index = 0;
- s->context_lookup1 = NULL;
- s->context_lookup2 = NULL;
+ s->context_lookup = NULL;
s->literal_hgroup.codes = NULL;
s->literal_hgroup.htrees = NULL;
s->insert_copy_hgroup.codes = NULL;
@@ -153,7 +142,8 @@ void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
}
BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
- HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t ntrees) {
+ HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t max_symbol,
+ uint32_t ntrees) {
/* Pack two allocations into one */
const size_t max_table_size = kMaxHuffmanTableSize[(alphabet_size + 31) >> 5];
const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size;
@@ -162,6 +152,7 @@ BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
HuffmanCode** p = (HuffmanCode**)BROTLI_DECODER_ALLOC(s,
code_size + htree_size);
group->alphabet_size = (uint16_t)alphabet_size;
+ group->max_symbol = (uint16_t)max_symbol;
group->num_htrees = (uint16_t)ntrees;
group->htrees = p;
group->codes = (HuffmanCode*)(&p[ntrees]);
diff --git a/c/dec/state.h b/c/dec/state.h
index 069beca..d28b639 100644
--- a/c/dec/state.h
+++ b/c/dec/state.h
@@ -12,6 +12,7 @@
#include "../common/constants.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
+#include "../common/transform.h"
#include <brotli/types.h>
#include "./bit_reader.h"
#include "./huffman.h"
@@ -22,6 +23,8 @@ extern "C" {
typedef enum {
BROTLI_STATE_UNINITED,
+ BROTLI_STATE_LARGE_WINDOW_BITS,
+ BROTLI_STATE_INITIALIZE,
BROTLI_STATE_METABLOCK_BEGIN,
BROTLI_STATE_METABLOCK_HEADER,
BROTLI_STATE_METABLOCK_HEADER_2,
@@ -126,23 +129,22 @@ struct BrotliDecoderStateStruct {
uint8_t* ringbuffer;
uint8_t* ringbuffer_end;
HuffmanCode* htree_command;
- const uint8_t* context_lookup1;
- const uint8_t* context_lookup2;
+ const uint8_t* context_lookup;
uint8_t* context_map_slice;
uint8_t* dist_context_map_slice;
- /* This ring buffer holds a few past copy distances that will be used by */
- /* some special distance codes. */
+ /* This ring buffer holds a few past copy distances that will be used by
+ some special distance codes. */
HuffmanTreeGroup literal_hgroup;
HuffmanTreeGroup insert_copy_hgroup;
HuffmanTreeGroup distance_hgroup;
HuffmanCode* block_type_trees;
HuffmanCode* block_len_trees;
/* This is true if the literal context map histogram type always matches the
- block type. It is then not needed to keep the context (faster decoding). */
+ block type. It is then not needed to keep the context (faster decoding). */
int trivial_literal_context;
- /* Distance context is actual after command is decoded and before distance
- is computed. After distance computation it is used as a temporary variable. */
+ /* Distance context is actual after command is decoded and before distance is
+ computed. After distance computation it is used as a temporary variable. */
int distance_context;
int meta_block_remaining_len;
uint32_t block_length_index;
@@ -162,11 +164,11 @@ struct BrotliDecoderStateStruct {
int copy_length;
int distance_code;
- /* For partial write operations */
- size_t rb_roundtrips; /* How many times we went around the ring-buffer */
- size_t partial_pos_out; /* How much output to the user in total */
+ /* For partial write operations. */
+ size_t rb_roundtrips; /* how many times we went around the ring-buffer */
+ size_t partial_pos_out; /* how much output to the user in total */
- /* For ReadHuffmanCode */
+ /* For ReadHuffmanCode. */
uint32_t symbol;
uint32_t repeat;
uint32_t space;
@@ -180,25 +182,26 @@ struct BrotliDecoderStateStruct {
/* Tails of symbol chains. */
int next_symbol[32];
uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES];
- /* Population counts for the code lengths */
+ /* Population counts for the code lengths. */
uint16_t code_length_histo[16];
- /* For HuffmanTreeGroupDecode */
+ /* For HuffmanTreeGroupDecode. */
int htree_index;
HuffmanCode* next;
- /* For DecodeContextMap */
+ /* For DecodeContextMap. */
uint32_t context_index;
uint32_t max_run_length_prefix;
uint32_t code;
HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
- /* For InverseMoveToFrontTransform */
+ /* For InverseMoveToFrontTransform. */
uint32_t mtf_upper_bound;
uint32_t mtf[64 + 1];
- /* less used attributes are in the end of this struct */
- /* States inside function calls */
+ /* Less used attributes are at the end of this struct. */
+
+ /* States inside function calls. */
BrotliRunningMetablockHeaderState substate_metablock_header;
BrotliRunningTreeGroupState substate_tree_group;
BrotliRunningContextMapState substate_context_map;
@@ -212,6 +215,7 @@ struct BrotliDecoderStateStruct {
unsigned int is_metadata : 1;
unsigned int should_wrap_ringbuffer : 1;
unsigned int canny_ringbuffer_allocation : 1;
+ unsigned int large_window : 1;
unsigned int size_nibbles : 8;
uint32_t window_bits;
@@ -222,6 +226,7 @@ struct BrotliDecoderStateStruct {
uint8_t* context_modes;
const BrotliDictionary* dictionary;
+ const BrotliTransforms* transforms;
uint32_t trivial_literal_contexts[8]; /* 256 bits */
};
@@ -229,17 +234,15 @@ struct BrotliDecoderStateStruct {
typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal;
#define BrotliDecoderState BrotliDecoderStateInternal
-BROTLI_INTERNAL void BrotliDecoderStateInit(BrotliDecoderState* s);
-BROTLI_INTERNAL void BrotliDecoderStateInitWithCustomAllocators(
- BrotliDecoderState* s, brotli_alloc_func alloc_func,
- brotli_free_func free_func, void* opaque);
+BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s);
BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s);
BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock(
BrotliDecoderState* s);
BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(
BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size,
- uint32_t ntrees);
+ uint32_t max_symbol, uint32_t ntrees);
#define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L)
diff --git a/c/dec/transform.h b/c/dec/transform.h
deleted file mode 100644
index e1d96ff..0000000
--- a/c/dec/transform.h
+++ /dev/null
@@ -1,300 +0,0 @@
-/* Copyright 2013 Google Inc. All Rights Reserved.
-
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-
-/* Transformations on dictionary words. */
-
-#ifndef BROTLI_DEC_TRANSFORM_H_
-#define BROTLI_DEC_TRANSFORM_H_
-
-#include "../common/platform.h"
-#include <brotli/types.h>
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-enum WordTransformType {
- kIdentity = 0,
- kOmitLast1 = 1,
- kOmitLast2 = 2,
- kOmitLast3 = 3,
- kOmitLast4 = 4,
- kOmitLast5 = 5,
- kOmitLast6 = 6,
- kOmitLast7 = 7,
- kOmitLast8 = 8,
- kOmitLast9 = 9,
- kUppercaseFirst = 10,
- kUppercaseAll = 11,
- kOmitFirst1 = 12,
- kOmitFirst2 = 13,
- kOmitFirst3 = 14,
- kOmitFirst4 = 15,
- kOmitFirst5 = 16,
- kOmitFirst6 = 17,
- kOmitFirst7 = 18,
- kOmitFirst8 = 19,
- kOmitFirst9 = 20
-};
-
-typedef struct {
- const uint8_t prefix_id;
- const uint8_t transform;
- const uint8_t suffix_id;
-} Transform;
-
-static const char kPrefixSuffix[208] =
- "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
- " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
- " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
- " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
-
-enum {
- /* EMPTY = ""
- SP = " "
- DQUOT = "\""
- SQUOT = "'"
- CLOSEBR = "]"
- OPEN = "("
- SLASH = "/"
- NBSP = non-breaking space "\0xc2\xa0"
- */
- kPFix_EMPTY = 0,
- kPFix_SP = 1,
- kPFix_COMMASP = 3,
- kPFix_SPofSPtheSP = 6,
- kPFix_SPtheSP = 9,
- kPFix_eSP = 12,
- kPFix_SPofSP = 15,
- kPFix_sSP = 20,
- kPFix_DOT = 23,
- kPFix_SPandSP = 25,
- kPFix_SPinSP = 31,
- kPFix_DQUOT = 36,
- kPFix_SPtoSP = 38,
- kPFix_DQUOTGT = 43,
- kPFix_NEWLINE = 46,
- kPFix_DOTSP = 48,
- kPFix_CLOSEBR = 51,
- kPFix_SPforSP = 53,
- kPFix_SPaSP = 59,
- kPFix_SPthatSP = 63,
- kPFix_SQUOT = 70,
- kPFix_SPwithSP = 72,
- kPFix_SPfromSP = 79,
- kPFix_SPbySP = 86,
- kPFix_OPEN = 91,
- kPFix_DOTSPTheSP = 93,
- kPFix_SPonSP = 100,
- kPFix_SPasSP = 105,
- kPFix_SPisSP = 110,
- kPFix_ingSP = 115,
- kPFix_NEWLINETAB = 120,
- kPFix_COLON = 123,
- kPFix_edSP = 125,
- kPFix_EQDQUOT = 129,
- kPFix_SPatSP = 132,
- kPFix_lySP = 137,
- kPFix_COMMA = 141,
- kPFix_EQSQUOT = 143,
- kPFix_DOTcomSLASH = 146,
- kPFix_DOTSPThisSP = 152,
- kPFix_SPnotSP = 160,
- kPFix_erSP = 166,
- kPFix_alSP = 170,
- kPFix_fulSP = 174,
- kPFix_iveSP = 179,
- kPFix_lessSP = 184,
- kPFix_estSP = 190,
- kPFix_izeSP = 195,
- kPFix_NBSP = 200,
- kPFix_ousSP = 203
-};
-
-static const Transform kTransforms[] = {
- { kPFix_EMPTY, kIdentity, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SP },
- { kPFix_SP, kIdentity, kPFix_SP },
- { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_SPtheSP },
- { kPFix_SP, kIdentity, kPFix_EMPTY },
- { kPFix_sSP, kIdentity, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_SPofSP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SPandSP },
- { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },
- { kPFix_COMMASP, kIdentity, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_COMMASP },
- { kPFix_SP, kUppercaseFirst, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_SPinSP },
- { kPFix_EMPTY, kIdentity, kPFix_SPtoSP },
- { kPFix_eSP, kIdentity, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_DQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_DOT },
- { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },
- { kPFix_EMPTY, kIdentity, kPFix_NEWLINE },
- { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },
- { kPFix_EMPTY, kIdentity, kPFix_SPforSP },
- { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SPaSP },
- { kPFix_EMPTY, kIdentity, kPFix_SPthatSP },
- { kPFix_SP, kUppercaseFirst, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_DOTSP },
- { kPFix_DOT, kIdentity, kPFix_EMPTY },
- { kPFix_SP, kIdentity, kPFix_COMMASP },
- { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SPwithSP },
- { kPFix_EMPTY, kIdentity, kPFix_SQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_SPfromSP },
- { kPFix_EMPTY, kIdentity, kPFix_SPbySP },
- { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },
- { kPFix_SPtheSP, kIdentity, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },
- { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SPonSP },
- { kPFix_EMPTY, kIdentity, kPFix_SPasSP },
- { kPFix_EMPTY, kIdentity, kPFix_SPisSP },
- { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast1, kPFix_ingSP },
- { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },
- { kPFix_EMPTY, kIdentity, kPFix_COLON },
- { kPFix_SP, kIdentity, kPFix_DOTSP },
- { kPFix_EMPTY, kIdentity, kPFix_edSP },
- { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_OPEN },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },
- { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_SPatSP },
- { kPFix_EMPTY, kIdentity, kPFix_lySP },
- { kPFix_SPtheSP, kIdentity, kPFix_SPofSP },
- { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },
- { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },
- { kPFix_SP, kUppercaseFirst, kPFix_COMMASP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },
- { kPFix_DOT, kIdentity, kPFix_OPEN },
- { kPFix_EMPTY, kUppercaseAll, kPFix_SP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },
- { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },
- { kPFix_SP, kIdentity, kPFix_DOT },
- { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },
- { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },
- { kPFix_EMPTY, kIdentity, kPFix_COMMA },
- { kPFix_DOT, kIdentity, kPFix_SP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },
- { kPFix_EMPTY, kIdentity, kPFix_SPnotSP },
- { kPFix_SP, kIdentity, kPFix_EQDQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_erSP },
- { kPFix_SP, kUppercaseAll, kPFix_SP },
- { kPFix_EMPTY, kIdentity, kPFix_alSP },
- { kPFix_SP, kUppercaseAll, kPFix_EMPTY },
- { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },
- { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },
- { kPFix_SP, kIdentity, kPFix_OPEN },
- { kPFix_EMPTY, kIdentity, kPFix_fulSP },
- { kPFix_SP, kUppercaseFirst, kPFix_DOTSP },
- { kPFix_EMPTY, kIdentity, kPFix_iveSP },
- { kPFix_EMPTY, kIdentity, kPFix_lessSP },
- { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_estSP },
- { kPFix_SP, kUppercaseFirst, kPFix_DOT },
- { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },
- { kPFix_SP, kIdentity, kPFix_EQSQUOT },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },
- { kPFix_EMPTY, kIdentity, kPFix_izeSP },
- { kPFix_EMPTY, kUppercaseAll, kPFix_DOT },
- { kPFix_NBSP, kIdentity, kPFix_EMPTY },
- { kPFix_SP, kIdentity, kPFix_COMMA },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },
- { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },
- { kPFix_EMPTY, kIdentity, kPFix_ousSP },
- { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },
- { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },
- { kPFix_SP, kUppercaseFirst, kPFix_COMMA },
- { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },
- { kPFix_SP, kUppercaseAll, kPFix_COMMASP },
- { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },
- { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },
- { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },
- { kPFix_SP, kUppercaseAll, kPFix_DOT },
- { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },
- { kPFix_SP, kUppercaseAll, kPFix_DOTSP },
- { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },
- { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },
- { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },
-};
-
-static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
-
-static int ToUpperCase(uint8_t* p) {
- if (p[0] < 0xc0) {
- if (p[0] >= 'a' && p[0] <= 'z') {
- p[0] ^= 32;
- }
- return 1;
- }
- /* An overly simplified uppercasing model for UTF-8. */
- if (p[0] < 0xe0) {
- p[1] ^= 32;
- return 2;
- }
- /* An arbitrary transform for three byte characters. */
- p[2] ^= 5;
- return 3;
-}
-
-static BROTLI_NOINLINE int BrotliTransformDictionaryWord(
- uint8_t* dst, const uint8_t* word, int len, int transform) {
- int idx = 0;
- {
- const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];
- while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
- }
- {
- const int t = kTransforms[transform].transform;
- int i = 0;
- int skip = t - (kOmitFirst1 - 1);
- if (skip > 0) {
- word += skip;
- len -= skip;
- } else if (t <= kOmitLast9) {
- len -= t;
- }
- while (i < len) { dst[idx++] = word[i++]; }
- if (t == kUppercaseFirst) {
- ToUpperCase(&dst[idx - len]);
- } else if (t == kUppercaseAll) {
- uint8_t* uppercase = &dst[idx - len];
- while (len > 0) {
- int step = ToUpperCase(uppercase);
- uppercase += step;
- len -= step;
- }
- }
- }
- {
- const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];
- while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
- return idx;
- }
-}
-
-#if defined(__cplusplus) || defined(c_plusplus)
-} /* extern "C" */
-#endif
-
-#endif /* BROTLI_DEC_TRANSFORM_H_ */
diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c
index cce0cd4..62ecea7 100644
--- a/c/enc/backward_references.c
+++ b/c/enc/backward_references.c
@@ -102,23 +102,16 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
#undef CAT
#undef EXPAND_CAT
-void BrotliCreateBackwardReferences(const BrotliDictionary* dictionary,
- size_t num_bytes,
- size_t position,
- const uint8_t* ringbuffer,
- size_t ringbuffer_mask,
- const BrotliEncoderParams* params,
- HasherHandle hasher,
- int* dist_cache,
- size_t* last_insert_len,
- Command* commands,
- size_t* num_commands,
- size_t* num_literals) {
+void BrotliCreateBackwardReferences(
+ size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+ size_t ringbuffer_mask, const BrotliEncoderParams* params,
+ HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
+ Command* commands, size_t* num_commands, size_t* num_literals) {
switch (params->hasher.type) {
#define CASE_(N) \
case N: \
- CreateBackwardReferencesNH ## N(dictionary, \
- kStaticDictionaryHash, num_bytes, position, ringbuffer, \
+ CreateBackwardReferencesNH ## N( \
+ num_bytes, position, ringbuffer, \
ringbuffer_mask, params, hasher, dist_cache, \
last_insert_len, commands, num_commands, num_literals); \
return;
diff --git a/c/enc/backward_references.h b/c/enc/backward_references.h
index 631c2f6..3a41466 100644
--- a/c/enc/backward_references.h
+++ b/c/enc/backward_references.h
@@ -26,7 +26,6 @@ extern "C" {
CreateBackwardReferences calls, and must be incremented by the amount written
by this call. */
BROTLI_INTERNAL void BrotliCreateBackwardReferences(
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c
index b05cb4f..f2f9918 100644
--- a/c/enc/backward_references_hq.c
+++ b/c/enc/backward_references_hq.c
@@ -25,6 +25,10 @@
extern "C" {
#endif
+#define BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE ( \
+ BROTLI_NUM_DISTANCE_SHORT_CODES + (2 * BROTLI_LARGE_MAX_DISTANCE_BITS))
+/* BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE == 74 */
+
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
static const uint32_t kDistanceCacheIndex[] = {
@@ -39,40 +43,40 @@ void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
size_t i;
stub.length = 1;
stub.distance = 0;
- stub.insert_length = 0;
+ stub.dcode_insert_length = 0;
stub.u.cost = kInfinity;
for (i = 0; i < length; ++i) array[i] = stub;
}
static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
- return self->length & 0xffffff;
+ return self->length & 0x1FFFFFF;
}
static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
- const uint32_t modifier = self->length >> 24;
+ const uint32_t modifier = self->length >> 25;
return ZopfliNodeCopyLength(self) + 9u - modifier;
}
static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
- return self->distance & 0x7ffffff;
+ return self->distance;
}
static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
- const uint32_t short_code = self->distance >> 27;
+ const uint32_t short_code = self->dcode_insert_length >> 27;
return short_code == 0 ?
ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 :
short_code - 1;
}
static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
- return ZopfliNodeCopyLength(self) + self->insert_length;
+ return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF);
}
/* Histogram based cost model for zopflification. */
typedef struct ZopfliCostModel {
/* The insert and copy length symbols. */
float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
- float cost_dist_[BROTLI_NUM_DISTANCE_SYMBOLS];
+ float cost_dist_[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE];
/* Cumulative costs of literals per position in the stream. */
float* literal_costs_;
float min_cost_cmd_;
@@ -91,17 +95,26 @@ static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
}
static void SetCost(const uint32_t* histogram, size_t histogram_size,
- float* cost) {
+ BROTLI_BOOL literal_histogram, float* cost) {
size_t sum = 0;
+ size_t missing_symbol_sum;
float log2sum;
+ float missing_symbol_cost;
size_t i;
for (i = 0; i < histogram_size; i++) {
sum += histogram[i];
}
log2sum = (float)FastLog2(sum);
+ missing_symbol_sum = sum;
+ if (!literal_histogram) {
+ for (i = 0; i < histogram_size; i++) {
+ if (histogram[i] == 0) missing_symbol_sum++;
+ }
+ }
+ missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2;
for (i = 0; i < histogram_size; i++) {
if (histogram[i] == 0) {
- cost[i] = log2sum + 2;
+ cost[i] = missing_symbol_cost;
continue;
}
@@ -122,7 +135,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
size_t last_insert_len) {
uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
- uint32_t histogram_dist[BROTLI_NUM_DISTANCE_SYMBOLS];
+ uint32_t histogram_dist[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE];
float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
size_t pos = position - last_insert_len;
float min_cost_cmd = kInfinity;
@@ -136,7 +149,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
for (i = 0; i < num_commands; i++) {
size_t inslength = commands[i].insert_len_;
size_t copylength = CommandCopyLen(&commands[i]);
- size_t distcode = commands[i].dist_prefix_;
+ size_t distcode = commands[i].dist_prefix_ & 0x3FF;
size_t cmdcode = commands[i].cmd_prefix_;
size_t j;
@@ -150,9 +163,12 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
pos += inslength + copylength;
}
- SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, cost_literal);
- SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, cost_cmd);
- SetCost(histogram_dist, BROTLI_NUM_DISTANCE_SYMBOLS, self->cost_dist_);
+ SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
+ cost_literal);
+ SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
+ cost_cmd);
+ SetCost(histogram_dist, BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE, BROTLI_FALSE,
+ self->cost_dist_);
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
@@ -161,11 +177,14 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
{
float* literal_costs = self->literal_costs_;
+ float literal_carry = 0.0;
size_t num_bytes = self->num_bytes_;
literal_costs[0] = 0.0;
for (i = 0; i < num_bytes; ++i) {
- literal_costs[i + 1] = literal_costs[i] +
+ literal_carry +=
cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
+ literal_costs[i + 1] = literal_costs[i] + literal_carry;
+ literal_carry -= literal_costs[i + 1] - literal_costs[i];
}
}
}
@@ -175,6 +194,7 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
float* literal_costs = self->literal_costs_;
+ float literal_carry = 0.0;
float* cost_dist = self->cost_dist_;
float* cost_cmd = self->cost_cmd_;
size_t num_bytes = self->num_bytes_;
@@ -183,12 +203,14 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
ringbuffer, &literal_costs[1]);
literal_costs[0] = 0.0;
for (i = 0; i < num_bytes; ++i) {
- literal_costs[i + 1] += literal_costs[i];
+ literal_carry += literal_costs[i + 1];
+ literal_costs[i + 1] = literal_costs[i] + literal_carry;
+ literal_carry -= literal_costs[i + 1] - literal_costs[i];
}
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
}
- for (i = 0; i < BROTLI_NUM_DISTANCE_SYMBOLS; ++i) {
+ for (i = 0; i < BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE; ++i) {
cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
}
self->min_cost_cmd_ = (float)FastLog2(11);
@@ -221,9 +243,10 @@ static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
size_t start_pos, size_t len, size_t len_code, size_t dist,
size_t short_code, float cost) {
ZopfliNode* next = &nodes[pos + len];
- next->length = (uint32_t)(len | ((len + 9u - len_code) << 24));
- next->distance = (uint32_t)(dist | (short_code << 27));
- next->insert_length = (uint32_t)(pos - start_pos);
+ next->length = (uint32_t)(len | ((len + 9u - len_code) << 25));
+ next->distance = (uint32_t)dist;
+ next->dcode_insert_length = (uint32_t)(
+ (short_code << 27) | (pos - start_pos));
next->u.cost = cost;
}
@@ -303,7 +326,7 @@ static uint32_t ComputeDistanceShortcut(const size_t block_start,
const size_t gap,
const ZopfliNode* nodes) {
const size_t clen = ZopfliNodeCopyLength(&nodes[pos]);
- const size_t ilen = nodes[pos].insert_length;
+ const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF;
const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]);
/* Since |block_start + pos| is the end position of the command, the copy part
starts from |block_start + pos - clen|. Distances that are greater than
@@ -335,7 +358,7 @@ static void ComputeDistanceCache(const size_t pos,
int idx = 0;
size_t p = nodes[pos].u.shortcut;
while (idx < 4 && p > 0) {
- const size_t ilen = nodes[p].insert_length;
+ const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF;
const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
dist_cache[idx++] = (int)dist;
@@ -483,9 +506,9 @@ static size_t UpdateNodes(
float dist_cost;
size_t max_match_len;
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
- distnumextra = distextra >> 24;
+ distnumextra = dist_symbol >> 10;
dist_cost = base_cost + (float)distnumextra +
- ZopfliCostModelGetDistanceCost(model, dist_symbol);
+ ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF);
/* Try all copy lengths up until the maximum copy length corresponding
to this distance. If the distance refers to the static dictionary, or
@@ -517,7 +540,8 @@ static size_t ComputeShortestPathFromNodes(size_t num_bytes,
ZopfliNode* nodes) {
size_t index = num_bytes;
size_t num_commands = 0;
- while (nodes[index].insert_length == 0 && nodes[index].length == 1) --index;
+ while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 &&
+ nodes[index].length == 1) --index;
nodes[index].u.next = BROTLI_UINT32_MAX;
while (index != 0) {
size_t len = ZopfliNodeCommandLength(&nodes[index]);
@@ -546,7 +570,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes,
for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
const ZopfliNode* next = &nodes[pos + offset];
size_t copy_length = ZopfliNodeCopyLength(next);
- size_t insert_length = next->insert_length;
+ size_t insert_length = next->dcode_insert_length & 0x7FFFFFF;
pos += insert_length;
offset = next->u.next;
if (i == 0) {
@@ -624,7 +648,6 @@ static size_t ZopfliIterate(size_t num_bytes,
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
const size_t max_backward_limit, const int* dist_cache, HasherHandle hasher,
@@ -649,9 +672,9 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
const size_t pos = position + i;
const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
size_t skip;
- size_t num_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer,
- ringbuffer_mask, pos, num_bytes - i, max_distance, gap, params,
- &matches[lz_matches_offset]);
+ size_t num_matches = FindAllMatchesH10(hasher, &params->dictionary,
+ ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance, gap,
+ params, &matches[lz_matches_offset]);
if (num_matches > 0 &&
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
matches[0] = matches[num_matches - 1];
@@ -683,7 +706,6 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
}
void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
@@ -693,7 +715,7 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
if (BROTLI_IS_OOM(m)) return;
BrotliInitZopfliNodes(nodes, num_bytes + 1);
- *num_commands += BrotliZopfliComputeShortestPath(m, dictionary,
+ *num_commands += BrotliZopfliComputeShortestPath(m,
num_bytes, position, ringbuffer, ringbuffer_mask,
params, max_backward_limit, dist_cache, hasher, nodes);
if (BROTLI_IS_OOM(m)) return;
@@ -703,7 +725,6 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
}
void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
@@ -736,8 +757,8 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
if (BROTLI_IS_OOM(m)) return;
- num_found_matches = FindAllMatchesH10(hasher, dictionary,
- ringbuffer, ringbuffer_mask, pos, max_length,
+ num_found_matches = FindAllMatchesH10(hasher,
+ &params->dictionary, ringbuffer, ringbuffer_mask, pos, max_length,
max_distance, gap, params, &matches[cur_match_pos + shadow_matches]);
cur_match_end = cur_match_pos + num_found_matches;
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
diff --git a/c/enc/backward_references_hq.h b/c/enc/backward_references_hq.h
index cc19544..7c38bd6 100644
--- a/c/enc/backward_references_hq.h
+++ b/c/enc/backward_references_hq.h
@@ -23,29 +23,26 @@ extern "C" {
#endif
BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals);
BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals);
typedef struct ZopfliNode {
- /* best length to get up to this byte (not including this byte itself)
- highest 8 bit is used to reconstruct the length code */
+ /* Best length to get up to this byte (not including this byte itself)
+ highest 7 bit is used to reconstruct the length code. */
uint32_t length;
- /* distance associated with the length; highest 5 bits contain distance
- short code + 1 (or zero if no short code); this way only distances shorter
- than 128MiB are allowed here */
+ /* Distance associated with the length. */
uint32_t distance;
- /* number of literal inserts before this copy */
- uint32_t insert_length;
+ /* Number of literal inserts before this copy; highest 5 bits contain
+ distance short code + 1 (or zero if no short code). */
+ uint32_t dcode_insert_length;
/* This union holds information used by dynamic-programming. During forward
pass |cost| it used to store the goal function. When node is processed its
@@ -78,7 +75,6 @@ BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
(2) nodes[i].command_length() <= i and
(3) nodes[i - nodes[i].command_length()].cost < kInfinity */
BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
- const BrotliDictionary* dictionary,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
const size_t max_backward_limit, const int* dist_cache, HasherHandle hasher,
diff --git a/c/enc/backward_references_inc.h b/c/enc/backward_references_inc.h
index 0a715b2..967545d 100644
--- a/c/enc/backward_references_inc.h
+++ b/c/enc/backward_references_inc.h
@@ -8,8 +8,6 @@
/* template parameters: EXPORT_FN, FN */
static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
- const BrotliDictionary* dictionary,
- const uint16_t* dictionary_hash,
size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
@@ -43,9 +41,10 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
sr.len_code_delta = 0;
sr.distance = 0;
sr.score = kMinScore;
- FN(FindLongestMatch)(hasher, dictionary, dictionary_hash, ringbuffer,
- ringbuffer_mask, dist_cache, position,
- max_length, max_distance, gap, &sr);
+ FN(FindLongestMatch)(hasher, &params->dictionary,
+ ringbuffer, ringbuffer_mask, dist_cache, position,
+ max_length, max_distance, gap,
+ params->dist.max_distance, &sr);
if (sr.score > kMinScore) {
/* Found a match. Let's look for something even better ahead. */
int delayed_backward_references_in_row = 0;
@@ -59,9 +58,9 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
sr2.distance = 0;
sr2.score = kMinScore;
max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
- FN(FindLongestMatch)(hasher, dictionary, dictionary_hash,
+ FN(FindLongestMatch)(hasher, &params->dictionary,
ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
- max_distance, gap, &sr2);
+ max_distance, gap, params->dist.max_distance, &sr2);
if (sr2.score >= sr.score + cost_diff_lazy) {
/* Ok, let's just write one byte for now and start a match from the
next byte. */
diff --git a/c/enc/bit_cost.h b/c/enc/bit_cost.h
index e8b7013..6586469 100644
--- a/c/enc/bit_cost.h
+++ b/c/enc/bit_cost.h
@@ -18,11 +18,11 @@
extern "C" {
#endif
-static BROTLI_INLINE double ShannonEntropy(const uint32_t *population,
- size_t size, size_t *total) {
+static BROTLI_INLINE double ShannonEntropy(
+ const uint32_t* population, size_t size, size_t* total) {
size_t sum = 0;
double retval = 0;
- const uint32_t *population_end = population + size;
+ const uint32_t* population_end = population + size;
size_t p;
if (size & 1) {
goto odd_number_of_elements_left;
@@ -42,7 +42,7 @@ static BROTLI_INLINE double ShannonEntropy(const uint32_t *population,
}
static BROTLI_INLINE double BitsEntropy(
- const uint32_t *population, size_t size) {
+ const uint32_t* population, size_t size) {
size_t sum;
double retval = ShannonEntropy(population, size, &sum);
if (retval < sum) {
diff --git a/c/enc/block_encoder_inc.h b/c/enc/block_encoder_inc.h
index 2a08f90..8cbd5ea 100644
--- a/c/enc/block_encoder_inc.h
+++ b/c/enc/block_encoder_inc.h
@@ -13,9 +13,9 @@
stream. */
static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
const HistogramType* histograms, const size_t histograms_size,
- HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) {
- const size_t alphabet_size = self->alphabet_size_;
- const size_t table_size = histograms_size * alphabet_size;
+ const size_t alphabet_size, HuffmanTree* tree,
+ size_t* storage_ix, uint8_t* storage) {
+ const size_t table_size = histograms_size * self->histogram_length_;
self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
if (BROTLI_IS_OOM(m)) return;
@@ -23,9 +23,10 @@ static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
{
size_t i;
for (i = 0; i < histograms_size; ++i) {
- size_t ix = i * alphabet_size;
- BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size, tree,
- &self->depths_[ix], &self->bits_[ix], storage_ix, storage);
+ size_t ix = i * self->histogram_length_;
+ BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_,
+ alphabet_size, tree, &self->depths_[ix], &self->bits_[ix],
+ storage_ix, storage);
}
}
}
diff --git a/c/enc/block_splitter.c b/c/enc/block_splitter.c
index 6362211..d308eca 100644
--- a/c/enc/block_splitter.c
+++ b/c/enc/block_splitter.c
@@ -174,7 +174,7 @@ void BrotliSplitBlock(MemoryManager* m,
for (i = 0; i < num_commands; ++i) {
const Command* cmd = &cmds[i];
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
- distance_prefixes[j++] = cmd->dist_prefix_;
+ distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
}
}
/* Create the block split on the array of distance prefixes. */
diff --git a/c/enc/block_splitter_inc.h b/c/enc/block_splitter_inc.h
index 5712572..023712b 100644
--- a/c/enc/block_splitter_inc.h
+++ b/c/enc/block_splitter_inc.h
@@ -70,7 +70,7 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
double* insert_cost,
double* cost,
uint8_t* switch_signal,
- uint8_t *block_id) {
+ uint8_t* block_id) {
const size_t data_size = FN(HistogramDataSize)();
const size_t bitmaplen = (num_histograms + 7) >> 3;
size_t num_blocks = 1;
diff --git a/c/enc/brotli_bit_stream.c b/c/enc/brotli_bit_stream.c
index cd9c594..aaf2dad 100644
--- a/c/enc/brotli_bit_stream.c
+++ b/c/enc/brotli_bit_stream.c
@@ -13,12 +13,13 @@
#include <string.h> /* memcpy, memset */
#include "../common/constants.h"
+#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
-#include "./context.h"
#include "./entropy_encode.h"
#include "./entropy_encode_static.h"
#include "./fast_log.h"
+#include "./histogram.h"
#include "./memory.h"
#include "./write_bits.h"
@@ -27,12 +28,11 @@ extern "C" {
#endif
#define MAX_HUFFMAN_TREE_SIZE (2 * BROTLI_NUM_COMMAND_SYMBOLS + 1)
-/* The size of Huffman dictionary for distances assuming that NPOSTFIX = 0 and
- NDIRECT = 0. */
-#define SIMPLE_DISTANCE_ALPHABET_SIZE (BROTLI_NUM_DISTANCE_SHORT_CODES + \
- (2 * BROTLI_MAX_DISTANCE_BITS))
-/* SIMPLE_DISTANCE_ALPHABET_SIZE == 64 */
-#define SIMPLE_DISTANCE_ALPHABET_BITS 6
+/* The maximum size of Huffman dictionary for distances assuming that
+ NPOSTFIX = 0 and NDIRECT = 0. */
+#define MAX_SIMPLE_DISTANCE_ALPHABET_SIZE \
+ BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_LARGE_MAX_DISTANCE_BITS)
+/* MAX_SIMPLE_DISTANCE_ALPHABET_SIZE == 140 */
/* Represents the range of values belonging to a prefix code:
[offset, offset + 2^nbits) */
@@ -258,7 +258,7 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths,
size_t symbols[4],
size_t num_symbols,
size_t max_bits,
- size_t *storage_ix, uint8_t *storage) {
+ size_t* storage_ix, uint8_t* storage) {
/* value of 1 indicates a simple Huffman code */
BrotliWriteBits(2, 1, storage_ix, storage);
BrotliWriteBits(2, num_symbols - 1, storage_ix, storage); /* NSYM - 1 */
@@ -297,7 +297,7 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths,
depths = symbol depths */
void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
HuffmanTree* tree,
- size_t *storage_ix, uint8_t *storage) {
+ size_t* storage_ix, uint8_t* storage) {
/* Write the Huffman tree into the brotli-representation.
The command alphabet is the largest, so this allocation will fit all
alphabets. */
@@ -360,8 +360,9 @@ void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and
bits[0:length] and stores the encoded tree to the bit stream. */
-static void BuildAndStoreHuffmanTree(const uint32_t *histogram,
- const size_t length,
+static void BuildAndStoreHuffmanTree(const uint32_t* histogram,
+ const size_t histogram_length,
+ const size_t alphabet_size,
HuffmanTree* tree,
uint8_t* depth,
uint16_t* bits,
@@ -371,7 +372,7 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram,
size_t s4[4] = { 0 };
size_t i;
size_t max_bits = 0;
- for (i = 0; i < length; i++) {
+ for (i = 0; i < histogram_length; i++) {
if (histogram[i]) {
if (count < 4) {
s4[count] = i;
@@ -383,7 +384,7 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram,
}
{
- size_t max_bits_counter = length - 1;
+ size_t max_bits_counter = alphabet_size - 1;
while (max_bits_counter) {
max_bits_counter >>= 1;
++max_bits;
@@ -398,14 +399,14 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram,
return;
}
- memset(depth, 0, length * sizeof(depth[0]));
- BrotliCreateHuffmanTree(histogram, length, 15, tree, depth);
- BrotliConvertBitDepthsToSymbols(depth, length, bits);
+ memset(depth, 0, histogram_length * sizeof(depth[0]));
+ BrotliCreateHuffmanTree(histogram, histogram_length, 15, tree, depth);
+ BrotliConvertBitDepthsToSymbols(depth, histogram_length, bits);
if (count <= 4) {
StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
} else {
- BrotliStoreHuffmanTree(depth, length, tree, storage_ix, storage);
+ BrotliStoreHuffmanTree(depth, histogram_length, tree, storage_ix, storage);
}
}
@@ -729,6 +730,7 @@ static void EncodeContextMap(MemoryManager* m,
}
}
BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
+ num_clusters + max_run_length_prefix,
tree, depths, bits, storage_ix, storage);
for (i = 0; i < num_rle_symbols; ++i) {
const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
@@ -788,10 +790,11 @@ static void BuildAndStoreBlockSplitCode(const uint8_t* types,
}
StoreVarLenUint8(num_types - 1, storage_ix, storage);
if (num_types > 1) { /* TODO: else? could StoreBlockSwitch occur? */
- BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree,
+ BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, num_types + 2, tree,
&code->type_depths[0], &code->type_bits[0],
storage_ix, storage);
BuildAndStoreHuffmanTree(&length_histo[0], BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+ BROTLI_NUM_BLOCK_LEN_SYMBOLS,
tree, &code->length_depths[0],
&code->length_bits[0], storage_ix, storage);
StoreBlockSwitch(code, lengths[0], types[0], 1, storage_ix, storage);
@@ -822,8 +825,8 @@ static void StoreTrivialContextMap(size_t num_types,
for (i = context_bits; i < alphabet_size; ++i) {
histogram[i] = 1;
}
- BuildAndStoreHuffmanTree(histogram, alphabet_size, tree,
- depths, bits, storage_ix, storage);
+ BuildAndStoreHuffmanTree(histogram, alphabet_size, alphabet_size,
+ tree, depths, bits, storage_ix, storage);
for (i = 0; i < num_types; ++i) {
size_t code = (i == 0 ? 0 : i + context_bits - 1);
BrotliWriteBits(depths[code], bits[code], storage_ix, storage);
@@ -838,7 +841,7 @@ static void StoreTrivialContextMap(size_t num_types,
/* Manages the encoding of one block category (literal, command or distance). */
typedef struct BlockEncoder {
- size_t alphabet_size_;
+ size_t histogram_length_;
size_t num_block_types_;
const uint8_t* block_types_; /* Not owned. */
const uint32_t* block_lengths_; /* Not owned. */
@@ -851,10 +854,10 @@ typedef struct BlockEncoder {
uint16_t* bits_;
} BlockEncoder;
-static void InitBlockEncoder(BlockEncoder* self, size_t alphabet_size,
+static void InitBlockEncoder(BlockEncoder* self, size_t histogram_length,
size_t num_block_types, const uint8_t* block_types,
const uint32_t* block_lengths, const size_t num_blocks) {
- self->alphabet_size_ = alphabet_size;
+ self->histogram_length_ = histogram_length;
self->num_block_types_ = num_block_types;
self->block_types_ = block_types;
self->block_lengths_ = block_lengths;
@@ -890,7 +893,7 @@ static void StoreSymbol(BlockEncoder* self, size_t symbol, size_t* storage_ix,
uint32_t block_len = self->block_lengths_[block_ix];
uint8_t block_type = self->block_types_[block_ix];
self->block_len_ = block_len;
- self->entropy_ix_ = block_type * self->alphabet_size_;
+ self->entropy_ix_ = block_type * self->histogram_length_;
StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0,
storage_ix, storage);
}
@@ -919,7 +922,7 @@ static void StoreSymbolWithContext(BlockEncoder* self, size_t symbol,
--self->block_len_;
{
size_t histo_ix = context_map[self->entropy_ix_ + context];
- size_t ix = histo_ix * self->alphabet_size_ + symbol;
+ size_t ix = histo_ix * self->histogram_length_ + symbol;
BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage);
}
}
@@ -945,42 +948,38 @@ static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) {
}
void BrotliStoreMetaBlock(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- uint8_t prev_byte,
- uint8_t prev_byte2,
- BROTLI_BOOL is_last,
- uint32_t num_direct_distance_codes,
- uint32_t distance_postfix_bits,
- ContextType literal_context_mode,
- const Command *commands,
- size_t n_commands,
- const MetaBlockSplit* mb,
- size_t *storage_ix,
- uint8_t *storage) {
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
+ const BrotliEncoderParams* params, ContextType literal_context_mode,
+ const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
+ size_t* storage_ix, uint8_t* storage) {
+
size_t pos = start_pos;
size_t i;
- size_t num_distance_codes =
- BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_distance_codes +
- (48u << distance_postfix_bits);
+ uint32_t num_distance_symbols = params->dist.alphabet_size;
+ uint32_t num_effective_distance_symbols = num_distance_symbols;
HuffmanTree* tree;
+ ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
BlockEncoder literal_enc;
BlockEncoder command_enc;
BlockEncoder distance_enc;
+ const BrotliDistanceParams* dist = &params->dist;
+ if (params->large_window &&
+ num_effective_distance_symbols > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
+ num_effective_distance_symbols = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
+ }
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
if (BROTLI_IS_OOM(m)) return;
- InitBlockEncoder(&literal_enc, 256, mb->literal_split.num_types,
- mb->literal_split.types, mb->literal_split.lengths,
- mb->literal_split.num_blocks);
+ InitBlockEncoder(&literal_enc, BROTLI_NUM_LITERAL_SYMBOLS,
+ mb->literal_split.num_types, mb->literal_split.types,
+ mb->literal_split.lengths, mb->literal_split.num_blocks);
InitBlockEncoder(&command_enc, BROTLI_NUM_COMMAND_SYMBOLS,
mb->command_split.num_types, mb->command_split.types,
mb->command_split.lengths, mb->command_split.num_blocks);
- InitBlockEncoder(&distance_enc, num_distance_codes,
+ InitBlockEncoder(&distance_enc, num_effective_distance_symbols,
mb->distance_split.num_types, mb->distance_split.types,
mb->distance_split.lengths, mb->distance_split.num_blocks);
@@ -989,9 +988,10 @@ void BrotliStoreMetaBlock(MemoryManager* m,
BuildAndStoreBlockSwitchEntropyCodes(
&distance_enc, tree, storage_ix, storage);
- BrotliWriteBits(2, distance_postfix_bits, storage_ix, storage);
- BrotliWriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
- storage_ix, storage);
+ BrotliWriteBits(2, dist->distance_postfix_bits, storage_ix, storage);
+ BrotliWriteBits(
+ 4, dist->num_direct_distance_codes >> dist->distance_postfix_bits,
+ storage_ix, storage);
for (i = 0; i < mb->literal_split.num_types; ++i) {
BrotliWriteBits(2, literal_context_mode, storage_ix, storage);
}
@@ -1017,13 +1017,16 @@ void BrotliStoreMetaBlock(MemoryManager* m,
}
BuildAndStoreEntropyCodesLiteral(m, &literal_enc, mb->literal_histograms,
- mb->literal_histograms_size, tree, storage_ix, storage);
+ mb->literal_histograms_size, BROTLI_NUM_LITERAL_SYMBOLS, tree,
+ storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
BuildAndStoreEntropyCodesCommand(m, &command_enc, mb->command_histograms,
- mb->command_histograms_size, tree, storage_ix, storage);
+ mb->command_histograms_size, BROTLI_NUM_COMMAND_SYMBOLS, tree,
+ storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
BuildAndStoreEntropyCodesDistance(m, &distance_enc, mb->distance_histograms,
- mb->distance_histograms_size, tree, storage_ix, storage);
+ mb->distance_histograms_size, num_distance_symbols, tree,
+ storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
BROTLI_FREE(m, tree);
@@ -1041,7 +1044,8 @@ void BrotliStoreMetaBlock(MemoryManager* m,
} else {
size_t j;
for (j = cmd.insert_len_; j != 0; --j) {
- size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
+ size_t context =
+ BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
uint8_t literal = input[pos & mask];
StoreSymbolWithContext(&literal_enc, literal, context,
mb->literal_context_map, storage_ix, storage,
@@ -1056,9 +1060,9 @@ void BrotliStoreMetaBlock(MemoryManager* m,
prev_byte2 = input[(pos - 2) & mask];
prev_byte = input[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
- size_t dist_code = cmd.dist_prefix_;
- uint32_t distnumextra = cmd.dist_extra_ >> 24;
- uint64_t distextra = cmd.dist_extra_ & 0xffffff;
+ size_t dist_code = cmd.dist_prefix_ & 0x3FF;
+ uint32_t distnumextra = cmd.dist_prefix_ >> 10;
+ uint64_t distextra = cmd.dist_extra_;
if (mb->distance_context_map_size == 0) {
StoreSymbol(&distance_enc, dist_code, storage_ix, storage);
} else {
@@ -1082,7 +1086,7 @@ void BrotliStoreMetaBlock(MemoryManager* m,
static void BuildHistograms(const uint8_t* input,
size_t start_pos,
size_t mask,
- const Command *commands,
+ const Command* commands,
size_t n_commands,
HistogramLiteral* lit_histo,
HistogramCommand* cmd_histo,
@@ -1099,7 +1103,7 @@ static void BuildHistograms(const uint8_t* input,
}
pos += CommandCopyLen(&cmd);
if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
- HistogramAddDistance(dist_histo, cmd.dist_prefix_);
+ HistogramAddDistance(dist_histo, cmd.dist_prefix_ & 0x3FF);
}
}
}
@@ -1107,7 +1111,7 @@ static void BuildHistograms(const uint8_t* input,
static void StoreDataWithHuffmanCodes(const uint8_t* input,
size_t start_pos,
size_t mask,
- const Command *commands,
+ const Command* commands,
size_t n_commands,
const uint8_t* lit_depth,
const uint16_t* lit_bits,
@@ -1134,9 +1138,9 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input,
}
pos += CommandCopyLen(&cmd);
if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
- const size_t dist_code = cmd.dist_prefix_;
- const uint32_t distnumextra = cmd.dist_extra_ >> 24;
- const uint32_t distextra = cmd.dist_extra_ & 0xffffff;
+ const size_t dist_code = cmd.dist_prefix_ & 0x3FF;
+ const uint32_t distnumextra = cmd.dist_prefix_ >> 10;
+ const uint32_t distextra = cmd.dist_extra_;
BrotliWriteBits(dist_depth[dist_code], dist_bits[dist_code],
storage_ix, storage);
BrotliWriteBits(distnumextra, distextra, storage_ix, storage);
@@ -1145,15 +1149,10 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input,
}
void BrotliStoreMetaBlockTrivial(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- BROTLI_BOOL is_last,
- const Command *commands,
- size_t n_commands,
- size_t *storage_ix,
- uint8_t *storage) {
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+ const Command* commands, size_t n_commands,
+ size_t* storage_ix, uint8_t* storage) {
HistogramLiteral lit_histo;
HistogramCommand cmd_histo;
HistogramDistance dist_histo;
@@ -1161,9 +1160,10 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m,
uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
- uint8_t dist_depth[SIMPLE_DISTANCE_ALPHABET_SIZE];
- uint16_t dist_bits[SIMPLE_DISTANCE_ALPHABET_SIZE];
+ uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+ uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
HuffmanTree* tree;
+ uint32_t num_distance_symbols = params->dist.alphabet_size;
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
@@ -1178,14 +1178,16 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m,
tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
if (BROTLI_IS_OOM(m)) return;
- BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS, tree,
+ BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS,
+ BROTLI_NUM_LITERAL_SYMBOLS, tree,
lit_depth, lit_bits,
storage_ix, storage);
- BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS, tree,
+ BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS,
+ BROTLI_NUM_COMMAND_SYMBOLS, tree,
cmd_depth, cmd_bits,
storage_ix, storage);
- BuildAndStoreHuffmanTree(dist_histo.data_, SIMPLE_DISTANCE_ALPHABET_SIZE,
- tree,
+ BuildAndStoreHuffmanTree(dist_histo.data_, MAX_SIMPLE_DISTANCE_ALPHABET_SIZE,
+ num_distance_symbols, tree,
dist_depth, dist_bits,
storage_ix, storage);
BROTLI_FREE(m, tree);
@@ -1200,15 +1202,14 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m,
}
void BrotliStoreMetaBlockFast(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- BROTLI_BOOL is_last,
- const Command *commands,
- size_t n_commands,
- size_t *storage_ix,
- uint8_t *storage) {
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+ const Command* commands, size_t n_commands,
+ size_t* storage_ix, uint8_t* storage) {
+ uint32_t num_distance_symbols = params->dist.alphabet_size;
+ uint32_t distance_alphabet_bits =
+ Log2FloorNonZero(num_distance_symbols - 1) + 1;
+
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
BrotliWriteBits(13, 0, storage_ix, storage);
@@ -1252,8 +1253,8 @@ void BrotliStoreMetaBlockFast(MemoryManager* m,
uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
- uint8_t dist_depth[SIMPLE_DISTANCE_ALPHABET_SIZE];
- uint16_t dist_bits[SIMPLE_DISTANCE_ALPHABET_SIZE];
+ uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
+ uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
HistogramClearLiteral(&lit_histo);
HistogramClearCommand(&cmd_histo);
HistogramClearDistance(&dist_histo);
@@ -1274,7 +1275,7 @@ void BrotliStoreMetaBlockFast(MemoryManager* m,
BrotliBuildAndStoreHuffmanTreeFast(m, dist_histo.data_,
dist_histo.total_count_,
/* max_bits = */
- SIMPLE_DISTANCE_ALPHABET_BITS,
+ distance_alphabet_bits,
dist_depth, dist_bits,
storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
@@ -1293,11 +1294,11 @@ void BrotliStoreMetaBlockFast(MemoryManager* m,
/* This is for storing uncompressed blocks (simple raw storage of
bytes-as-bytes). */
void BrotliStoreUncompressedMetaBlock(BROTLI_BOOL is_final_block,
- const uint8_t * BROTLI_RESTRICT input,
+ const uint8_t* BROTLI_RESTRICT input,
size_t position, size_t mask,
size_t len,
- size_t * BROTLI_RESTRICT storage_ix,
- uint8_t * BROTLI_RESTRICT storage) {
+ size_t* BROTLI_RESTRICT storage_ix,
+ uint8_t* BROTLI_RESTRICT storage) {
size_t masked_pos = position & mask;
BrotliStoreUncompressedMetaBlockHeader(len, storage_ix, storage);
JumpToByteBoundary(storage_ix, storage);
diff --git a/c/enc/brotli_bit_stream.h b/c/enc/brotli_bit_stream.h
index 1324b18..9089b1d 100644
--- a/c/enc/brotli_bit_stream.h
+++ b/c/enc/brotli_bit_stream.h
@@ -16,10 +16,10 @@
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
+#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./command.h"
-#include "./context.h"
#include "./entropy_encode.h"
#include "./memory.h"
#include "./metablock.h"
@@ -32,7 +32,7 @@ extern "C" {
position for the current storage. */
BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
- HuffmanTree* tree, size_t *storage_ix, uint8_t *storage);
+ HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
@@ -42,51 +42,31 @@ BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
/* REQUIRES: length > 0 */
/* REQUIRES: length <= (1 << 24) */
BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- uint8_t prev_byte,
- uint8_t prev_byte2,
- BROTLI_BOOL is_final_block,
- uint32_t num_direct_distance_codes,
- uint32_t distance_postfix_bits,
- ContextType literal_context_mode,
- const Command* commands,
- size_t n_commands,
- const MetaBlockSplit* mb,
- size_t* storage_ix,
- uint8_t* storage);
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
+ const BrotliEncoderParams* params, ContextType literal_context_mode,
+ const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
+ size_t* storage_ix, uint8_t* storage);
/* Stores the meta-block without doing any block splitting, just collects
one histogram per block category and uses that for entropy coding.
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- BROTLI_BOOL is_last,
- const Command *commands,
- size_t n_commands,
- size_t* storage_ix,
- uint8_t* storage);
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+ const Command* commands, size_t n_commands,
+ size_t* storage_ix, uint8_t* storage);
/* Same as above, but uses static prefix codes for histograms with a only a few
symbols, and uses static code length prefix codes for all other histograms.
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
- const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- BROTLI_BOOL is_last,
- const Command *commands,
- size_t n_commands,
- size_t* storage_ix,
- uint8_t* storage);
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+ BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+ const Command* commands, size_t n_commands,
+ size_t* storage_ix, uint8_t* storage);
/* This is for storing uncompressed blocks (simple raw storage of
bytes-as-bytes).
diff --git a/c/enc/command.h b/c/enc/command.h
index 3bf0cf7..0526815 100644
--- a/c/enc/command.h
+++ b/c/enc/command.h
@@ -105,10 +105,13 @@ static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
typedef struct Command {
uint32_t insert_len_;
- /* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
+ /* Stores copy_len in low 25 bits and copy_code - copy_len in high 7 bit. */
uint32_t copy_len_;
+ /* Stores distance extra bits. */
uint32_t dist_extra_;
uint16_t cmd_prefix_;
+ /* Stores distance code in low 10 bits
+ and number of extra bits in high 6 bits. */
uint16_t dist_prefix_;
} Command;
@@ -118,7 +121,7 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen,
/* Don't rely on signed int representation, use honest casts. */
uint32_t delta = (uint8_t)((int8_t)copylen_code_delta);
self->insert_len_ = (uint32_t)insertlen;
- self->copy_len_ = (uint32_t)(copylen | (delta << 24));
+ self->copy_len_ = (uint32_t)(copylen | (delta << 25));
/* The distance prefix and extra bits are stored in this Command as if
npostfix and ndirect were 0, they are only recomputed later after the
clustering if needed. */
@@ -126,29 +129,29 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen,
distance_code, 0, 0, &self->dist_prefix_, &self->dist_extra_);
GetLengthCode(
insertlen, (size_t)((int)copylen + copylen_code_delta),
- TO_BROTLI_BOOL(self->dist_prefix_ == 0), &self->cmd_prefix_);
+ TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_);
}
static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
self->insert_len_ = (uint32_t)insertlen;
- self->copy_len_ = 4 << 24;
+ self->copy_len_ = 4 << 25;
self->dist_extra_ = 0;
self->dist_prefix_ = BROTLI_NUM_DISTANCE_SHORT_CODES;
GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_);
}
static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(const Command* self) {
- if (self->dist_prefix_ < BROTLI_NUM_DISTANCE_SHORT_CODES) {
- return self->dist_prefix_;
+ if ((self->dist_prefix_ & 0x3FF) < BROTLI_NUM_DISTANCE_SHORT_CODES) {
+ return self->dist_prefix_ & 0x3FF;
} else {
- uint32_t nbits = self->dist_extra_ >> 24;
- uint32_t extra = self->dist_extra_ & 0xffffff;
+ uint32_t nbits = self->dist_prefix_ >> 10;
+ uint32_t extra = self->dist_extra_;
/* It is assumed that the distance was first encoded with NPOSTFIX = 0 and
NDIRECT = 0, so the code itself is of this form:
BROTLI_NUM_DISTANCE_SHORT_CODES + 2 * (nbits - 1) + prefix_bit
Therefore, the following expression results in (2 + prefix_bit). */
- uint32_t prefix =
- self->dist_prefix_ + 4u - BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits;
+ uint32_t prefix = (self->dist_prefix_ & 0x3FF) + 4u -
+ BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits;
/* Subtract 4 for offset (Chapter 4.) and
increase by BROTLI_NUM_DISTANCE_SHORT_CODES - 1 */
return (prefix << nbits) + extra + BROTLI_NUM_DISTANCE_SHORT_CODES - 4u;
@@ -165,12 +168,13 @@ static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
}
static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
- return self->copy_len_ & 0xFFFFFF;
+ return self->copy_len_ & 0x1FFFFFF;
}
static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
- int32_t delta = (int8_t)((uint8_t)(self->copy_len_ >> 24));
- return (uint32_t)((int32_t)(self->copy_len_ & 0xFFFFFF) + delta);
+ uint32_t modifier = self->copy_len_ >> 25;
+ int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1)));
+ return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta);
}
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/compress_fragment.c b/c/enc/compress_fragment.c
index 40dce3e..f75069b 100644
--- a/c/enc/compress_fragment.c
+++ b/c/enc/compress_fragment.c
@@ -38,7 +38,7 @@ extern "C" {
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
-static const uint32_t kHashMul32 = 0x1e35a7bd;
+static const uint32_t kHashMul32 = 0x1E35A7BD;
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
@@ -343,7 +343,7 @@ static void BrotliStoreMetaBlockHeader(
}
static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
- uint8_t *array) {
+ uint8_t* array) {
while (n_bits > 0) {
size_t byte_pos = pos >> 3;
size_t n_unchanged_bits = pos & 7;
diff --git a/c/enc/compress_fragment_two_pass.c b/c/enc/compress_fragment_two_pass.c
index 8259817..b8bd6e8 100644
--- a/c/enc/compress_fragment_two_pass.c
+++ b/c/enc/compress_fragment_two_pass.c
@@ -37,7 +37,7 @@ extern "C" {
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
-static const uint32_t kHashMul32 = 0x1e35a7bd;
+static const uint32_t kHashMul32 = 0x1E35A7BD;
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 16) * kHashMul32;
diff --git a/c/enc/context.h b/c/enc/context.h
deleted file mode 100644
index caa4230..0000000
--- a/c/enc/context.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/* Copyright 2013 Google Inc. All Rights Reserved.
-
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-
-/* Functions to map previous bytes into a context id. */
-
-#ifndef BROTLI_ENC_CONTEXT_H_
-#define BROTLI_ENC_CONTEXT_H_
-
-#include "../common/platform.h"
-#include <brotli/types.h>
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-/* Second-order context lookup table for UTF8 byte streams.
-
- If p1 and p2 are the previous two bytes, we calculate the context as
-
- context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
-
- If the previous two bytes are ASCII characters (i.e. < 128), this will be
- equivalent to
-
- context = 4 * context1(p1) + context2(p2),
-
- where context1 is based on the previous byte in the following way:
-
- 0 : non-ASCII control
- 1 : \t, \n, \r
- 2 : space
- 3 : other punctuation
- 4 : " '
- 5 : %
- 6 : ( < [ {
- 7 : ) > ] }
- 8 : , ; :
- 9 : .
- 10 : =
- 11 : number
- 12 : upper-case vowel
- 13 : upper-case consonant
- 14 : lower-case vowel
- 15 : lower-case consonant
-
- and context2 is based on the second last byte:
-
- 0 : control, space
- 1 : punctuation
- 2 : upper-case letter, number
- 3 : lower-case letter
-
- If the last byte is ASCII, and the second last byte is not (in a valid UTF8
- stream it will be a continuation byte, value between 128 and 191), the
- context is the same as if the second last byte was an ASCII control or space.
-
- If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
- be a continuation byte and the context id is 2 or 3 depending on the LSB of
- the last byte and to a lesser extent on the second last byte if it is ASCII.
-
- If the last byte is a UTF8 continuation byte, the second last byte can be:
- - continuation byte: the next byte is probably ASCII or lead byte (assuming
- 4-byte UTF8 characters are rare) and the context id is 0 or 1.
- - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
- - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
-
- The possible value combinations of the previous two bytes, the range of
- context ids and the type of the next byte is summarized in the table below:
-
- |--------\-----------------------------------------------------------------|
- | \ Last byte |
- | Second \---------------------------------------------------------------|
- | last byte \ ASCII | cont. byte | lead byte |
- | \ (0-127) | (128-191) | (192-) |
- |=============|===================|=====================|==================|
- | ASCII | next: ASCII/lead | not valid | next: cont. |
- | (0-127) | context: 4 - 63 | | context: 2 - 3 |
- |-------------|-------------------|---------------------|------------------|
- | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
- | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
- |-------------|-------------------|---------------------|------------------|
- | lead byte | not valid | next: ASCII/lead | not valid |
- | (192-207) | | context: 0 - 1 | |
- |-------------|-------------------|---------------------|------------------|
- | lead byte | not valid | next: cont. | not valid |
- | (208-) | | context: 2 - 3 | |
- |-------------|-------------------|---------------------|------------------|
-*/
-static const uint8_t kUTF8ContextLookup[512] = {
- /* Last byte. */
- /* */
- /* ASCII range. */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
- 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
- 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
- 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
- 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
- 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
- /* UTF8 continuation byte range. */
- 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
- 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
- 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
- 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
- /* UTF8 lead byte range. */
- 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
- 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
- 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
- 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
- /* Second last byte. */
- /* */
- /* ASCII range. */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
- 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
- /* UTF8 continuation byte range. */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* UTF8 lead byte range. */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-};
-
-/* Context lookup table for small signed integers. */
-static const uint8_t kSigned3BitContextLookup[] = {
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
-};
-
-typedef enum ContextType {
- CONTEXT_LSB6 = 0,
- CONTEXT_MSB6 = 1,
- CONTEXT_UTF8 = 2,
- CONTEXT_SIGNED = 3
-} ContextType;
-
-static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
- switch (mode) {
- case CONTEXT_LSB6:
- return p1 & 0x3f;
- case CONTEXT_MSB6:
- return (uint8_t)(p1 >> 2);
- case CONTEXT_UTF8:
- return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
- case CONTEXT_SIGNED:
- return (uint8_t)((kSigned3BitContextLookup[p1] << 3) +
- kSigned3BitContextLookup[p2]);
- default:
- return 0;
- }
-}
-
-#if defined(__cplusplus) || defined(c_plusplus)
-} /* extern "C" */
-#endif
-
-#endif /* BROTLI_ENC_CONTEXT_H_ */
diff --git a/c/enc/encode.c b/c/enc/encode.c
index 794a409..4fd28d0 100644
--- a/c/enc/encode.c
+++ b/c/enc/encode.c
@@ -11,6 +11,8 @@
#include <stdlib.h> /* free, malloc */
#include <string.h> /* memcpy, memset */
+#include "../common/constants.h"
+#include "../common/context.h"
#include "../common/platform.h"
#include "../common/version.h"
#include "./backward_references.h"
@@ -19,7 +21,7 @@
#include "./brotli_bit_stream.h"
#include "./compress_fragment.h"
#include "./compress_fragment_two_pass.h"
-#include "./context.h"
+#include "./encoder_dict.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./hash.h"
@@ -69,8 +71,8 @@ typedef struct BrotliEncoderStateStruct {
uint64_t last_processed_pos_;
int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
int saved_dist_cache_[4];
- uint8_t last_byte_;
- uint8_t last_byte_bits_;
+ uint16_t last_bytes_;
+ uint8_t last_bytes_bits_;
uint8_t prev_byte_;
uint8_t prev_byte2_;
size_t storage_size_;
@@ -161,6 +163,10 @@ BROTLI_BOOL BrotliEncoderSetParameter(
state->params.size_hint = value;
return BROTLI_TRUE;
+ case BROTLI_PARAM_LARGE_WINDOW:
+ state->params.large_window = TO_BROTLI_BOOL(!!value);
+ return BROTLI_TRUE;
+
default: return BROTLI_FALSE;
}
}
@@ -251,20 +257,25 @@ static int* GetHashTable(BrotliEncoderState* s, int quality,
return table;
}
-static void EncodeWindowBits(int lgwin, uint8_t* last_byte,
- uint8_t* last_byte_bits) {
- if (lgwin == 16) {
- *last_byte = 0;
- *last_byte_bits = 1;
- } else if (lgwin == 17) {
- *last_byte = 1;
- *last_byte_bits = 7;
- } else if (lgwin > 17) {
- *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1);
- *last_byte_bits = 4;
+static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
+ uint16_t* last_bytes, uint8_t* last_bytes_bits) {
+ if (large_window) {
+ *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
+ *last_bytes_bits = 14;
} else {
- *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1);
- *last_byte_bits = 7;
+ if (lgwin == 16) {
+ *last_bytes = 0;
+ *last_bytes_bits = 1;
+ } else if (lgwin == 17) {
+ *last_bytes = 1;
+ *last_bytes_bits = 7;
+ } else if (lgwin > 17) {
+ *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
+ *last_bytes_bits = 4;
+ } else {
+ *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
+ *last_bytes_bits = 7;
+ }
}
}
@@ -420,6 +431,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
double entropy[3];
size_t dummy;
size_t i;
+ ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
const size_t stride_end_pos = start_pos + 64;
uint8_t prev2 = input[start_pos & mask];
@@ -430,7 +442,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
const uint8_t literal = input[pos & mask];
const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
- Context(prev1, prev2, CONTEXT_UTF8)];
+ BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
++total;
++combined_histo[literal >> 3];
++context_histo[context][literal >> 3];
@@ -519,12 +531,26 @@ static BROTLI_BOOL ShouldCompress(
return BROTLI_TRUE;
}
+/* Chooses the literal context mode for a metablock */
+static ContextType ChooseContextMode(const BrotliEncoderParams* params,
+ const uint8_t* data, const size_t pos, const size_t mask,
+ const size_t length) {
+ /* We only do the computation for the option of something else than
+ CONTEXT_UTF8 for the highest qualities */
+ if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
+ !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
+ return CONTEXT_SIGNED;
+ }
+ return CONTEXT_UTF8;
+}
+
static void WriteMetaBlockInternal(MemoryManager* m,
const uint8_t* data,
const size_t mask,
const uint64_t last_flush_pos,
const size_t bytes,
const BROTLI_BOOL is_last,
+ ContextType literal_context_mode,
const BrotliEncoderParams* params,
const uint8_t prev_byte,
const uint8_t prev_byte2,
@@ -536,10 +562,9 @@ static void WriteMetaBlockInternal(MemoryManager* m,
size_t* storage_ix,
uint8_t* storage) {
const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
- uint8_t last_byte;
- uint8_t last_byte_bits;
- uint32_t num_direct_distance_codes = 0;
- uint32_t distance_postfix_bits = 0;
+ uint16_t last_bytes;
+ uint8_t last_bytes_bits;
+ ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
if (bytes == 0) {
/* Write the ISLAST and ISEMPTY bits. */
@@ -559,31 +584,29 @@ static void WriteMetaBlockInternal(MemoryManager* m,
return;
}
- last_byte = storage[0];
- last_byte_bits = (uint8_t)(*storage_ix & 0xff);
- if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES &&
- params->mode == BROTLI_MODE_FONT) {
- num_direct_distance_codes = 12;
- distance_postfix_bits = 1;
+ BROTLI_DCHECK(*storage_ix <= 14);
+ last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
+ last_bytes_bits = (uint8_t)(*storage_ix);
+ if (params->dist.num_direct_distance_codes != 0 ||
+ params->dist.distance_postfix_bits != 0) {
RecomputeDistancePrefixes(commands,
num_commands,
- num_direct_distance_codes,
- distance_postfix_bits);
+ params->dist.num_direct_distance_codes,
+ params->dist.distance_postfix_bits);
}
if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
- bytes, mask, is_last,
+ bytes, mask, is_last, params,
commands, num_commands,
storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
} else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
- bytes, mask, is_last,
+ bytes, mask, is_last, params,
commands, num_commands,
storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
} else {
- ContextType literal_context_mode = CONTEXT_UTF8;
MetaBlockSplit mb;
InitMetaBlockSplit(&mb);
if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
@@ -596,14 +619,10 @@ static void WriteMetaBlockInternal(MemoryManager* m,
&literal_context_map);
}
BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
- prev_byte, prev_byte2, literal_context_mode, num_literal_contexts,
+ prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
literal_context_map, commands, num_commands, &mb);
if (BROTLI_IS_OOM(m)) return;
} else {
- if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes,
- kMinUTF8Ratio)) {
- literal_context_mode = CONTEXT_SIGNED;
- }
BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, params,
prev_byte, prev_byte2,
commands, num_commands,
@@ -612,15 +631,18 @@ static void WriteMetaBlockInternal(MemoryManager* m,
if (BROTLI_IS_OOM(m)) return;
}
if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
- BrotliOptimizeHistograms(num_direct_distance_codes,
- distance_postfix_bits,
- &mb);
+ /* The number of distance symbols effectively used by
+ "Large Window Brotli" (32-bit). */
+ uint32_t num_effective_dist_codes = params->dist.alphabet_size;
+ if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
+ num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
+ }
+ BrotliOptimizeHistograms(num_effective_dist_codes, &mb);
}
BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
prev_byte, prev_byte2,
is_last,
- num_direct_distance_codes,
- distance_postfix_bits,
+ params,
literal_context_mode,
commands, num_commands,
&mb,
@@ -631,20 +653,54 @@ static void WriteMetaBlockInternal(MemoryManager* m,
if (bytes + 4 < (*storage_ix >> 3)) {
/* Restore the distance cache and last byte. */
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
- storage[0] = last_byte;
- *storage_ix = last_byte_bits;
+ storage[0] = (uint8_t)last_bytes;
+ storage[1] = (uint8_t)(last_bytes >> 8);
+ *storage_ix = last_bytes_bits;
BrotliStoreUncompressedMetaBlock(is_last, data,
wrapped_last_flush_pos, mask,
bytes, storage_ix, storage);
}
}
+static void ChooseDistanceParams(BrotliEncoderParams* params) {
+ uint32_t num_direct_distance_codes = 0;
+ uint32_t distance_postfix_bits = 0;
+ uint32_t alphabet_size;
+ size_t max_distance = BROTLI_MAX_DISTANCE;
+
+ if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES &&
+ params->mode == BROTLI_MODE_FONT) {
+ num_direct_distance_codes = 12;
+ distance_postfix_bits = 1;
+ max_distance = (1U << 27) + 4;
+ }
+
+ alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE(
+ num_direct_distance_codes, distance_postfix_bits,
+ BROTLI_MAX_DISTANCE_BITS);
+ if (params->large_window) {
+ max_distance = BROTLI_MAX_ALLOWED_DISTANCE;
+ if (num_direct_distance_codes != 0 || distance_postfix_bits != 0) {
+ max_distance = (3U << 29) - 4;
+ }
+ alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE(
+ num_direct_distance_codes, distance_postfix_bits,
+ BROTLI_LARGE_MAX_DISTANCE_BITS);
+ }
+
+ params->dist.num_direct_distance_codes = num_direct_distance_codes;
+ params->dist.distance_postfix_bits = distance_postfix_bits;
+ params->dist.alphabet_size = alphabet_size;
+ params->dist.max_distance = max_distance;
+}
+
static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;
if (s->is_initialized_) return BROTLI_TRUE;
SanitizeParams(&s->params);
s->params.lgblock = ComputeLgBlock(&s->params);
+ ChooseDistanceParams(&s->params);
s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
@@ -657,7 +713,8 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
lgwin = BROTLI_MAX(int, lgwin, 18);
}
- EncodeWindowBits(lgwin, &s->last_byte_, &s->last_byte_bits_);
+ EncodeWindowBits(lgwin, s->params.large_window,
+ &s->last_bytes_, &s->last_bytes_bits_);
}
if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
@@ -671,11 +728,18 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
params->mode = BROTLI_DEFAULT_MODE;
+ params->large_window = BROTLI_FALSE;
params->quality = BROTLI_DEFAULT_QUALITY;
params->lgwin = BROTLI_DEFAULT_WINDOW;
params->lgblock = 0;
params->size_hint = 0;
params->disable_literal_context_modeling = BROTLI_FALSE;
+ BrotliInitEncoderDictionary(&params->dictionary);
+ params->dist.num_direct_distance_codes = 0;
+ params->dist.distance_postfix_bits = 0;
+ params->dist.alphabet_size =
+ BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
+ params->dist.max_distance = BROTLI_MAX_DISTANCE;
}
static void BrotliEncoderInitState(BrotliEncoderState* s) {
@@ -837,6 +901,37 @@ static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
}
+static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
+ uint32_t* wrapped_last_processed_pos) {
+ Command* last_command = &s->commands_[s->num_commands_ - 1];
+ const uint8_t* data = s->ringbuffer_.buffer_;
+ const uint32_t mask = s->ringbuffer_.mask_;
+ uint64_t max_backward_distance = (1u << s->params.lgwin) - BROTLI_WINDOW_GAP;
+ uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
+ uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
+ uint64_t max_distance = last_processed_pos < max_backward_distance ?
+ last_processed_pos : max_backward_distance;
+ uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
+ uint32_t distance_code = CommandRestoreDistanceCode(last_command);
+ if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
+ distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
+ if (cmd_dist <= max_distance) {
+ while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
+ data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
+ last_command->copy_len_++;
+ (*bytes)--;
+ (*wrapped_last_processed_pos)++;
+ }
+ }
+ /* The copy length is at most the metablock size, and thus expressible. */
+ GetLengthCode(last_command->insert_len_,
+ (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
+ (int)(last_command->copy_len_ >> 25)),
+ TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
+ &last_command->cmd_prefix_);
+ }
+}
+
/*
Processes the accumulated input data and sets |*out_size| to the length of
the new output meta-block, or to zero if no new output meta-block has been
@@ -853,13 +948,12 @@ static BROTLI_BOOL EncodeData(
BrotliEncoderState* s, const BROTLI_BOOL is_last,
const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
const uint64_t delta = UnprocessedInputSize(s);
- const uint32_t bytes = (uint32_t)delta;
- const uint32_t wrapped_last_processed_pos =
- WrapPosition(s->last_processed_pos_);
+ uint32_t bytes = (uint32_t)delta;
+ uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
uint8_t* data;
uint32_t mask;
MemoryManager* m = &s->memory_manager_;
- const BrotliDictionary* dictionary = BrotliGetDictionary();
+ ContextType literal_context_mode;
if (!EnsureInitialized(s)) return BROTLI_FALSE;
data = s->ringbuffer_.buffer_;
@@ -884,7 +978,7 @@ static BROTLI_BOOL EncodeData(
if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
uint8_t* storage;
- size_t storage_ix = s->last_byte_bits_;
+ size_t storage_ix = s->last_bytes_bits_;
size_t table_size;
int* table;
@@ -894,9 +988,10 @@ static BROTLI_BOOL EncodeData(
*out_size = 0;
return BROTLI_TRUE;
}
- storage = GetBrotliStorage(s, 2 * bytes + 502);
+ storage = GetBrotliStorage(s, 2 * bytes + 503);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
- storage[0] = s->last_byte_;
+ storage[0] = (uint8_t)s->last_bytes_;
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
table = GetHashTable(s, s->params.quality, bytes, &table_size);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
@@ -917,8 +1012,8 @@ static BROTLI_BOOL EncodeData(
&storage_ix, storage);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
}
- s->last_byte_ = storage[storage_ix >> 3];
- s->last_byte_bits_ = storage_ix & 7u;
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+ s->last_bytes_bits_ = storage_ix & 7u;
UpdateLastProcessedPos(s);
*output = &storage[0];
*out_size = storage_ix >> 3;
@@ -946,27 +1041,36 @@ static BROTLI_BOOL EncodeData(
InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
wrapped_last_processed_pos, bytes, is_last);
+
+ literal_context_mode = ChooseContextMode(
+ &s->params, data, WrapPosition(s->last_flush_pos_),
+ mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
+
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+ if (s->num_commands_ && s->last_insert_len_ == 0) {
+ ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
+ }
+
if (s->params.quality == ZOPFLIFICATION_QUALITY) {
BROTLI_DCHECK(s->params.hasher.type == 10);
- BrotliCreateZopfliBackwardReferences(
- m, dictionary, bytes, wrapped_last_processed_pos,
+ BrotliCreateZopfliBackwardReferences(m,
+ bytes, wrapped_last_processed_pos,
data, mask, &s->params, s->hasher_, s->dist_cache_,
&s->last_insert_len_, &s->commands_[s->num_commands_],
&s->num_commands_, &s->num_literals_);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
} else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
BROTLI_DCHECK(s->params.hasher.type == 10);
- BrotliCreateHqZopfliBackwardReferences(
- m, dictionary, bytes, wrapped_last_processed_pos,
+ BrotliCreateHqZopfliBackwardReferences(m,
+ bytes, wrapped_last_processed_pos,
data, mask, &s->params, s->hasher_, s->dist_cache_,
&s->last_insert_len_, &s->commands_[s->num_commands_],
&s->num_commands_, &s->num_literals_);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
} else {
BrotliCreateBackwardReferences(
- dictionary, bytes, wrapped_last_processed_pos,
+ bytes, wrapped_last_processed_pos,
data, mask, &s->params, s->hasher_, s->dist_cache_,
&s->last_insert_len_, &s->commands_[s->num_commands_],
&s->num_commands_, &s->num_literals_);
@@ -1018,18 +1122,19 @@ static BROTLI_BOOL EncodeData(
{
const uint32_t metablock_size =
(uint32_t)(s->input_pos_ - s->last_flush_pos_);
- uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 502);
- size_t storage_ix = s->last_byte_bits_;
+ uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
+ size_t storage_ix = s->last_bytes_bits_;
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
- storage[0] = s->last_byte_;
+ storage[0] = (uint8_t)s->last_bytes_;
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
WriteMetaBlockInternal(
m, data, mask, s->last_flush_pos_, metablock_size, is_last,
- &s->params, s->prev_byte_, s->prev_byte2_,
+ literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
s->dist_cache_, &storage_ix, storage);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
- s->last_byte_ = storage[storage_ix >> 3];
- s->last_byte_bits_ = storage_ix & 7u;
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+ s->last_bytes_bits_ = storage_ix & 7u;
s->last_flush_pos_ = s->input_pos_;
if (UpdateLastProcessedPos(s)) {
HasherReset(s->hasher_);
@@ -1058,10 +1163,11 @@ static BROTLI_BOOL EncodeData(
static size_t WriteMetadataHeader(
BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
size_t storage_ix;
- storage_ix = s->last_byte_bits_;
- header[0] = s->last_byte_;
- s->last_byte_ = 0;
- s->last_byte_bits_ = 0;
+ storage_ix = s->last_bytes_bits_;
+ header[0] = (uint8_t)s->last_bytes_;
+ header[1] = (uint8_t)(s->last_bytes_ >> 8);
+ s->last_bytes_ = 0;
+ s->last_bytes_bits_ = 0;
BrotliWriteBits(1, 0, &storage_ix, header);
BrotliWriteBits(2, 3, &storage_ix, header);
@@ -1091,15 +1197,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
BROTLI_BOOL ok = BROTLI_TRUE;
const size_t max_out_size = *encoded_size;
size_t total_out_size = 0;
- uint8_t last_byte;
- uint8_t last_byte_bits;
+ uint16_t last_bytes;
+ uint8_t last_bytes_bits;
HasherHandle hasher = NULL;
const size_t hasher_eff_size =
BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP);
BrotliEncoderParams params;
- const BrotliDictionary* dictionary = BrotliGetDictionary();
const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
size_t max_block_size;
@@ -1113,14 +1218,18 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
BrotliEncoderInitParams(&params);
params.quality = 10;
params.lgwin = lgwin;
+ if (lgwin > BROTLI_MAX_WINDOW_BITS) {
+ params.large_window = BROTLI_TRUE;
+ }
SanitizeParams(&params);
params.lgblock = ComputeLgBlock(&params);
+ ChooseDistanceParams(&params);
max_block_size = (size_t)1 << params.lgblock;
BrotliInitMemoryManager(m, 0, 0, 0);
BROTLI_DCHECK(input_size <= mask + 1);
- EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
+ EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits);
InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, &params,
0, hasher_eff_size, BROTLI_TRUE);
if (BROTLI_IS_OOM(m)) goto oom;
@@ -1140,6 +1249,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
uint8_t* storage;
size_t storage_ix;
+ ContextType literal_context_mode = ChooseContextMode(&params,
+ input_buffer, metablock_start, mask, metablock_end - metablock_start);
+
size_t block_start;
for (block_start = metablock_start; block_start < metablock_end; ) {
size_t block_size =
@@ -1151,10 +1263,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
BrotliInitZopfliNodes(nodes, block_size + 1);
StitchToPreviousBlockH10(hasher, block_size, block_start,
input_buffer, mask);
- path_size = BrotliZopfliComputeShortestPath(
- m, dictionary, block_size, block_start,
- input_buffer, mask, &params, max_backward_limit, dist_cache, hasher,
- nodes);
+ path_size = BrotliZopfliComputeShortestPath(m,
+ block_size, block_start, input_buffer, mask, &params,
+ max_backward_limit, dist_cache, hasher, nodes);
if (BROTLI_IS_OOM(m)) goto oom;
/* We allocate a command buffer in the first iteration of this loop that
will be likely big enough for the whole metablock, so that for most
@@ -1197,13 +1308,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);
storage = NULL;
- storage_ix = last_byte_bits;
+ storage_ix = last_bytes_bits;
if (metablock_size == 0) {
/* Write the ISLAST and ISEMPTY bits. */
storage = BROTLI_ALLOC(m, uint8_t, 16);
if (BROTLI_IS_OOM(m)) goto oom;
- storage[0] = last_byte;
+ storage[0] = (uint8_t)last_bytes;
+ storage[1] = (uint8_t)(last_bytes >> 8);
BrotliWriteBits(2, 3, &storage_ix, storage);
storage_ix = (storage_ix + 7u) & ~7u;
} else if (!ShouldCompress(input_buffer, mask, metablock_start,
@@ -1213,37 +1325,35 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
if (BROTLI_IS_OOM(m)) goto oom;
- storage[0] = last_byte;
+ storage[0] = (uint8_t)last_bytes;
+ storage[1] = (uint8_t)(last_bytes >> 8);
BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
metablock_start, mask, metablock_size,
&storage_ix, storage);
} else {
- uint32_t num_direct_distance_codes = 0;
- uint32_t distance_postfix_bits = 0;
- ContextType literal_context_mode = CONTEXT_UTF8;
MetaBlockSplit mb;
- InitMetaBlockSplit(&mb);
- if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask,
- metablock_size, kMinUTF8Ratio)) {
- literal_context_mode = CONTEXT_SIGNED;
+ /* The number of distance symbols effectively used by
+ "Large Window Brotli" (32-bit). */
+ uint32_t num_effective_dist_codes = params.dist.alphabet_size;
+ if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
+ num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
}
+ InitMetaBlockSplit(&mb);
BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, &params,
prev_byte, prev_byte2,
commands, num_commands,
literal_context_mode,
&mb);
if (BROTLI_IS_OOM(m)) goto oom;
- BrotliOptimizeHistograms(num_direct_distance_codes,
- distance_postfix_bits,
- &mb);
- storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 502);
+ BrotliOptimizeHistograms(num_effective_dist_codes, &mb);
+ storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
if (BROTLI_IS_OOM(m)) goto oom;
- storage[0] = last_byte;
+ storage[0] = (uint8_t)last_bytes;
+ storage[1] = (uint8_t)(last_bytes >> 8);
BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
mask, prev_byte, prev_byte2,
is_last,
- num_direct_distance_codes,
- distance_postfix_bits,
+ &params,
literal_context_mode,
commands, num_commands,
&mb,
@@ -1252,16 +1362,17 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
if (metablock_size + 4 < (storage_ix >> 3)) {
/* Restore the distance cache and last byte. */
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
- storage[0] = last_byte;
- storage_ix = last_byte_bits;
+ storage[0] = (uint8_t)last_bytes;
+ storage[1] = (uint8_t)(last_bytes >> 8);
+ storage_ix = last_bytes_bits;
BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
metablock_start, mask,
metablock_size, &storage_ix, storage);
}
DestroyMetaBlockSplit(m, &mb);
}
- last_byte = storage[storage_ix >> 3];
- last_byte_bits = storage_ix & 7u;
+ last_bytes = (uint16_t)(storage[storage_ix >> 3]);
+ last_bytes_bits = storage_ix & 7u;
metablock_start += metablock_size;
if (metablock_start < input_size) {
prev_byte = input_buffer[metablock_start - 1];
@@ -1296,8 +1407,8 @@ oom:
size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
/* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
- size_t num_small_blocks = input_size >> 14;
- size_t overhead = 2 + (4 * num_small_blocks) + 3 + 1;
+ size_t num_large_blocks = input_size >> 14;
+ size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
size_t result = input_size + overhead;
if (input_size == 0) return 2;
return (result < input_size) ? 0 : result;
@@ -1360,7 +1471,7 @@ BROTLI_BOOL BrotliEncoderCompress(
}
if (quality == 10) {
/* TODO: Implement this direct path for all quality levels. */
- const int lg_win = BROTLI_MIN(int, BROTLI_MAX_WINDOW_BITS,
+ const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,
BROTLI_MAX(int, 16, lgwin));
int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
encoded_size, encoded_buffer);
@@ -1384,6 +1495,9 @@ BROTLI_BOOL BrotliEncoderCompress(
BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
+ if (lgwin > BROTLI_MAX_WINDOW_BITS) {
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
+ }
result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
&available_in, &next_in, &available_out, &next_out, &total_out);
if (!BrotliEncoderIsFinished(s)) result = 0;
@@ -1406,11 +1520,11 @@ fallback:
}
static void InjectBytePaddingBlock(BrotliEncoderState* s) {
- uint32_t seal = s->last_byte_;
- size_t seal_bits = s->last_byte_bits_;
+ uint32_t seal = s->last_bytes_;
+ size_t seal_bits = s->last_bytes_bits_;
uint8_t* destination;
- s->last_byte_ = 0;
- s->last_byte_bits_ = 0;
+ s->last_bytes_ = 0;
+ s->last_bytes_bits_ = 0;
/* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
seal |= 0x6u << seal_bits;
seal_bits += 6;
@@ -1424,6 +1538,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) {
}
destination[0] = (uint8_t)seal;
if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
+ if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
s->available_out_ += (seal_bits + 7) >> 3;
}
@@ -1432,7 +1547,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) {
static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
size_t* available_out, uint8_t** next_out, size_t* total_out) {
if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
- s->last_byte_bits_ != 0) {
+ s->last_bytes_bits_ != 0) {
InjectBytePaddingBlock(s);
return BROTLI_TRUE;
}
@@ -1513,10 +1628,10 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
(*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
BROTLI_BOOL force_flush =
(*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
- size_t max_out_size = 2 * block_size + 502;
+ size_t max_out_size = 2 * block_size + 503;
BROTLI_BOOL inplace = BROTLI_TRUE;
uint8_t* storage = NULL;
- size_t storage_ix = s->last_byte_bits_;
+ size_t storage_ix = s->last_bytes_bits_;
size_t table_size;
int* table;
@@ -1531,7 +1646,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
storage = GetBrotliStorage(s, max_out_size);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
}
- storage[0] = s->last_byte_;
+ storage[0] = (uint8_t)s->last_bytes_;
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
table = GetHashTable(s, s->params.quality, block_size, &table_size);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
@@ -1561,8 +1677,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
s->next_out_ = storage;
s->available_out_ = out_bytes;
}
- s->last_byte_ = storage[storage_ix >> 3];
- s->last_byte_bits_ = storage_ix & 7u;
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
+ s->last_bytes_bits_ = storage_ix & 7u;
if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
diff --git a/c/enc/encoder_dict.c b/c/enc/encoder_dict.c
new file mode 100755
index 0000000..8b2f6ad
--- /dev/null
+++ b/c/enc/encoder_dict.c
@@ -0,0 +1,32 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./encoder_dict.h"
+
+#include "../common/dictionary.h"
+#include "../common/transform.h"
+#include "./dictionary_hash.h"
+#include "./hash.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
+ dict->words = BrotliGetDictionary();
+
+ dict->hash_table = kStaticDictionaryHash;
+ dict->buckets = kStaticDictionaryBuckets;
+ dict->dict_words = kStaticDictionaryWords;
+
+ dict->cutoffTransformsCount = kCutoffTransformsCount;
+ dict->cutoffTransforms = kCutoffTransforms;
+
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
diff --git a/c/enc/encoder_dict.h b/c/enc/encoder_dict.h
new file mode 100755
index 0000000..9ac8b4a
--- /dev/null
+++ b/c/enc/encoder_dict.h
@@ -0,0 +1,42 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#ifndef BROTLI_ENC_ENCODER_DICT_H_
+#define BROTLI_ENC_ENCODER_DICT_H_
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./static_dict_lut.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Dictionary data (words and transforms) for 1 possible context */
+typedef struct BrotliEncoderDictionary {
+ const BrotliDictionary* words;
+
+ /* cut off for fast encoder */
+ uint32_t cutoffTransformsCount;
+ uint64_t cutoffTransforms;
+
+ /* from dictionary_hash.h, for fast encoder */
+ const uint16_t* hash_table;
+
+ /* from static_dict_lut.h, for slow encoder */
+ const uint16_t* buckets;
+ const DictWord* dict_words;
+} BrotliEncoderDictionary;
+
+BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict);
+
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_ENC_ENCODER_DICT_H_ */
diff --git a/c/enc/entropy_encode.c b/c/enc/entropy_encode.c
index 9e0ea11..97f9dfb 100644
--- a/c/enc/entropy_encode.c
+++ b/c/enc/entropy_encode.c
@@ -66,11 +66,11 @@ static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
we are not planning to use this with extremely long blocks.
See http://en.wikipedia.org/wiki/Huffman_coding */
-void BrotliCreateHuffmanTree(const uint32_t *data,
+void BrotliCreateHuffmanTree(const uint32_t* data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
- uint8_t *depth) {
+ uint8_t* depth) {
uint32_t count_limit;
HuffmanTree sentinel;
InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
@@ -371,8 +371,8 @@ void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
}
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
- BROTLI_BOOL *use_rle_for_non_zero,
- BROTLI_BOOL *use_rle_for_zero) {
+ BROTLI_BOOL* use_rle_for_non_zero,
+ BROTLI_BOOL* use_rle_for_zero) {
size_t total_reps_zero = 0;
size_t total_reps_non_zero = 0;
size_t count_reps_zero = 1;
@@ -454,26 +454,26 @@ void BrotliWriteHuffmanTree(const uint8_t* depth,
static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */
- 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
- 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+ 0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E,
+ 0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F
};
- size_t retval = kLut[bits & 0xf];
+ size_t retval = kLut[bits & 0x0F];
size_t i;
for (i = 4; i < num_bits; i += 4) {
retval <<= 4;
bits = (uint16_t)(bits >> 4);
- retval |= kLut[bits & 0xf];
+ retval |= kLut[bits & 0x0F];
}
- retval >>= ((0 - num_bits) & 0x3);
+ retval >>= ((0 - num_bits) & 0x03);
return (uint16_t)retval;
}
/* 0..15 are values for bits */
#define MAX_HUFFMAN_BITS 16
-void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
+void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
size_t len,
- uint16_t *bits) {
+ uint16_t* bits) {
/* In Brotli, all bit depths are [1..15]
0 bit depth means that the symbol does not exist. */
uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
diff --git a/c/enc/entropy_encode.h b/c/enc/entropy_encode.h
index ef7c216..f23d9c3 100644
--- a/c/enc/entropy_encode.h
+++ b/c/enc/entropy_encode.h
@@ -46,11 +46,11 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
be at least 2 * length + 1 long.
See http://en.wikipedia.org/wiki/Huffman_coding */
-BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t *data,
+BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
- uint8_t *depth);
+ uint8_t* depth);
/* Change the population counts in a way that the consequent
Huffman tree compression, especially its RLE-part will be more
@@ -72,9 +72,9 @@ BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
uint8_t* extra_bits_data);
/* Get the actual bit values for a tree of bit depths. */
-BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
+BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
size_t len,
- uint16_t *bits);
+ uint16_t* bits);
/* Input size optimized Shell sort. */
typedef BROTLI_BOOL (*HuffmanTreeComparator)(
diff --git a/c/enc/entropy_encode_static.h b/c/enc/entropy_encode_static.h
index b2c1fbb..62b99a9 100644
--- a/c/enc/entropy_encode_static.h
+++ b/c/enc/entropy_encode_static.h
@@ -83,7 +83,7 @@ static const uint32_t kCodeLengthBits[18] = {
static BROTLI_INLINE void StoreStaticCodeLengthCode(
size_t* storage_ix, uint8_t* storage) {
BrotliWriteBits(
- 40, BROTLI_MAKE_UINT64_T(0x0000ffU, 0x55555554U), storage_ix, storage);
+ 40, BROTLI_MAKE_UINT64_T(0x0000FFu, 0x55555554u), storage_ix, storage);
}
static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
@@ -529,7 +529,7 @@ static const uint16_t kStaticDistanceCodeBits[64] = {
static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
size_t* storage_ix, uint8_t* storage) {
- BrotliWriteBits(28, 0x0369dc03U, storage_ix, storage);
+ BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage);
}
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/hash.h b/c/enc/hash.h
index 2a1634d..1827ce6 100644
--- a/c/enc/hash.h
+++ b/c/enc/hash.h
@@ -16,6 +16,7 @@
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/types.h>
+#include "./encoder_dict.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./memory.h"
@@ -73,10 +74,10 @@ typedef struct HasherSearchResult {
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
-static const uint32_t kHashMul32 = 0x1e35a7bd;
-static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1e35a7bd, 0x1e35a7bd);
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
static const uint64_t kHashMul64Long =
- BROTLI_MAKE_UINT64_T(0x1fe35a7bU, 0xd3579bd3U);
+ BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
@@ -146,8 +147,9 @@ static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
}
static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
- const BrotliDictionary* dictionary, size_t item, const uint8_t* data,
- size_t max_length, size_t max_backward, HasherSearchResult* out) {
+ const BrotliEncoderDictionary* dictionary, size_t item, const uint8_t* data,
+ size_t max_length, size_t max_backward, size_t max_distance,
+ HasherSearchResult* out) {
size_t len;
size_t dist;
size_t offset;
@@ -156,24 +158,24 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
score_t score;
len = item & 0x1F;
dist = item >> 5;
- offset = dictionary->offsets_by_length[len] + len * dist;
+ offset = dictionary->words->offsets_by_length[len] + len * dist;
if (len > max_length) {
return BROTLI_FALSE;
}
matchlen =
- FindMatchLengthWithLimit(data, &dictionary->data[offset], len);
- if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {
+ FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
+ if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
return BROTLI_FALSE;
}
{
size_t cut = len - matchlen;
- size_t transform_id =
- (cut << 2) + (size_t)((kCutoffTransforms >> (cut * 6)) & 0x3F);
+ size_t transform_id = (cut << 2) +
+ (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
backward = max_backward + dist + 1 +
- (transform_id << dictionary->size_bits_by_length[len]);
+ (transform_id << dictionary->words->size_bits_by_length[len]);
}
- if (backward >= BROTLI_MAX_DISTANCE) {
+ if (backward > max_distance) {
return BROTLI_FALSE;
}
score = BackwardReferenceScore(matchlen, backward);
@@ -188,9 +190,10 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
}
static BROTLI_INLINE void SearchInStaticDictionary(
- const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
+ const BrotliEncoderDictionary* dictionary,
HasherHandle handle, const uint8_t* data, size_t max_length,
- size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {
+ size_t max_backward, size_t max_distance,
+ HasherSearchResult* out, BROTLI_BOOL shallow) {
size_t key;
size_t i;
HasherCommon* self = GetHasherCommon(handle);
@@ -199,11 +202,11 @@ static BROTLI_INLINE void SearchInStaticDictionary(
}
key = Hash14(data) << 1;
for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
- size_t item = dictionary_hash[key];
+ size_t item = dictionary->hash_table[key];
self->dict_num_lookups++;
if (item != 0) {
BROTLI_BOOL item_matches = TestStaticDictionaryItem(
- dictionary, item, data, max_length, max_backward, out);
+ dictionary, item, data, max_length, max_backward, max_distance, out);
if (item_matches) {
self->dict_num_matches++;
}
diff --git a/c/enc/hash_forgetful_chain_inc.h b/c/enc/hash_forgetful_chain_inc.h
index 46d363c..41cb3ff 100644
--- a/c/enc/hash_forgetful_chain_inc.h
+++ b/c/enc/hash_forgetful_chain_inc.h
@@ -28,7 +28,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
/* HashBytes is the function that chooses the bucket to place the address in.*/
-static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t *data) {
+static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* data) {
const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@@ -115,7 +115,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix_start,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
@@ -154,11 +154,12 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
- const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
+ const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length, const size_t max_backward,
- const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) {
+ const size_t gap, const size_t max_distance,
+ HasherSearchResult* BROTLI_RESTRICT out) {
HashForgetfulChain* self = FN(Self)(handle);
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
/* Don't accept a short copy from far away. */
@@ -240,9 +241,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
FN(Store)(handle, data, ring_buffer_mask, cur_ix);
}
if (out->score == min_score) {
- SearchInStaticDictionary(dictionary, dictionary_hash,
- handle, &data[cur_ix_masked], max_length, max_backward + gap, out,
- BROTLI_FALSE);
+ SearchInStaticDictionary(dictionary,
+ handle, &data[cur_ix_masked], max_length, max_backward + gap,
+ max_distance, out, BROTLI_FALSE);
}
}
diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h
index 6b0697b..e099edf 100644
--- a/c/enc/hash_longest_match64_inc.h
+++ b/c/enc/hash_longest_match64_inc.h
@@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
/* HashBytes is the function that chooses the bucket to place the address in. */
-static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t *data,
+static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* data,
const uint64_t mask,
const int shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long;
@@ -105,7 +105,7 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
/* Look at 4 bytes at &data[ix & mask].
Compute a hash from these, and store the value of ix at that position. */
-static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
+static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
const size_t mask, const size_t ix) {
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
@@ -119,7 +119,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix_start,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
@@ -158,11 +158,11 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
- const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
+ const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward, const size_t gap,
- HasherSearchResult* BROTLI_RESTRICT out) {
+ const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
@@ -257,9 +257,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
++num[key];
}
if (min_score == out->score) {
- SearchInStaticDictionary(dictionary, dictionary_hash,
- handle, &data[cur_ix_masked], max_length, max_backward + gap, out,
- BROTLI_FALSE);
+ SearchInStaticDictionary(dictionary,
+ handle, &data[cur_ix_masked], max_length, max_backward + gap,
+ max_distance, out, BROTLI_FALSE);
}
}
diff --git a/c/enc/hash_longest_match_inc.h b/c/enc/hash_longest_match_inc.h
index d24576d..951d7a4 100644
--- a/c/enc/hash_longest_match_inc.h
+++ b/c/enc/hash_longest_match_inc.h
@@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
/* HashBytes is the function that chooses the bucket to place the address in. */
-static uint32_t FN(HashBytes)(const uint8_t *data, const int shift) {
+static uint32_t FN(HashBytes)(const uint8_t* data, const int shift) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@@ -112,7 +112,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix_start,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
@@ -151,11 +151,11 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
- const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
+ const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward, const size_t gap,
- HasherSearchResult* BROTLI_RESTRICT out) {
+ const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
@@ -249,9 +249,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
++num[key];
}
if (min_score == out->score) {
- SearchInStaticDictionary(dictionary, dictionary_hash,
- handle, &data[cur_ix_masked], max_length, max_backward + gap, out,
- BROTLI_FALSE);
+ SearchInStaticDictionary(dictionary,
+ handle, &data[cur_ix_masked], max_length, max_backward + gap,
+ max_distance, out, BROTLI_FALSE);
}
}
diff --git a/c/enc/hash_longest_match_quickly_inc.h b/c/enc/hash_longest_match_quickly_inc.h
index 2c78351..a7b9639 100644
--- a/c/enc/hash_longest_match_quickly_inc.h
+++ b/c/enc/hash_longest_match_quickly_inc.h
@@ -81,7 +81,7 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
Compute a hash from these, and store the value somewhere within
[ix .. ix+3]. */
static BROTLI_INLINE void FN(Store)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix) {
+ const uint8_t* data, const size_t mask, const size_t ix) {
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
/* Wiggle the value with the bucket sweep range. */
const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
@@ -89,7 +89,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle,
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix_start,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
@@ -125,11 +125,12 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(
- HasherHandle handle, const BrotliDictionary* dictionary,
- const uint16_t* dictionary_hash, const uint8_t* BROTLI_RESTRICT data,
+ HasherHandle handle, const BrotliEncoderDictionary* dictionary,
+ const uint8_t* BROTLI_RESTRICT data,
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length, const size_t max_backward,
- const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) {
+ const size_t gap, const size_t max_distance,
+ HasherSearchResult* BROTLI_RESTRICT out) {
HashLongestMatchQuickly* self = FN(Self)(handle);
const size_t best_len_in = out->len;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
@@ -191,7 +192,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
}
}
} else {
- uint32_t *bucket = self->buckets_ + key;
+ uint32_t* bucket = self->buckets_ + key;
int i;
prev_ix = *bucket++;
for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) {
@@ -221,9 +222,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
}
}
if (USE_DICTIONARY && min_score == out->score) {
- SearchInStaticDictionary(dictionary, dictionary_hash,
- handle, &data[cur_ix_masked], max_length, max_backward + gap, out,
- BROTLI_TRUE);
+ SearchInStaticDictionary(dictionary,
+ handle, &data[cur_ix_masked], max_length, max_backward + gap,
+ max_distance, out, BROTLI_TRUE);
}
self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
}
diff --git a/c/enc/hash_to_binary_tree_inc.h b/c/enc/hash_to_binary_tree_inc.h
index 73774b2..48097b1 100644
--- a/c/enc/hash_to_binary_tree_inc.h
+++ b/c/enc/hash_to_binary_tree_inc.h
@@ -24,7 +24,7 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
return MAX_TREE_COMP_LENGTH;
}
-static uint32_t FN(HashBytes)(const uint8_t *data) {
+static uint32_t FN(HashBytes)(const uint8_t* data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@@ -200,7 +200,7 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
sorted by strictly increasing length and (non-strictly) increasing
distance. */
static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
- const BrotliDictionary* dictionary, const uint8_t* data,
+ const BrotliEncoderDictionary* dictionary, const uint8_t* data,
const size_t ring_buffer_mask, const size_t cur_ix,
const size_t max_length, const size_t max_backward, const size_t gap,
const BrotliEncoderParams* params, BackwardMatch* matches) {
@@ -252,7 +252,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
uint32_t dict_id = dict_matches[l];
if (dict_id < kInvalidMatch) {
size_t distance = max_backward + gap + (dict_id >> 5) + 1;
- if (distance < BROTLI_MAX_DISTANCE) {
+ if (distance <= params->dist.max_distance) {
InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);
}
}
@@ -265,7 +265,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
current sequence, without returning any matches.
REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
-static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
+static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
const size_t mask, const size_t ix) {
HashToBinaryTree* self = FN(Self)(handle);
/* Maximum distance is window size - 16, see section 9.1. of the spec. */
@@ -275,7 +275,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t *data, const size_t mask, const size_t ix_start,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i = ix_start;
size_t j = ix_start;
diff --git a/c/enc/histogram.c b/c/enc/histogram.c
index bb7b4c5..6da2ff6 100644
--- a/c/enc/histogram.c
+++ b/c/enc/histogram.c
@@ -8,9 +8,9 @@
#include "./histogram.h"
+#include "../common/context.h"
#include "./block_splitter.h"
#include "./command.h"
-#include "./context.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -63,13 +63,16 @@ void BrotliBuildHistogramsWithContext(
BlockSplitIteratorNext(&insert_and_copy_it);
HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
cmd->cmd_prefix_);
+ /* TODO: unwrap iterator blocks. */
for (j = cmd->insert_len_; j != 0; --j) {
size_t context;
BlockSplitIteratorNext(&literal_it);
- context = context_modes ?
- ((literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
- Context(prev_byte, prev_byte2, context_modes[literal_it.type_])) :
- literal_it.type_;
+ context = literal_it.type_;
+ if (context_modes) {
+ ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]);
+ context = (context << BROTLI_LITERAL_CONTEXT_BITS) +
+ BROTLI_CONTEXT(prev_byte, prev_byte2, lut);
+ }
HistogramAddLiteral(&literal_histograms[context],
ringbuffer[pos & mask]);
prev_byte2 = prev_byte;
@@ -86,7 +89,7 @@ void BrotliBuildHistogramsWithContext(
context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
CommandDistanceContext(cmd);
HistogramAddDistance(&copy_dist_histograms[context],
- cmd->dist_prefix_);
+ cmd->dist_prefix_ & 0x3FF);
}
}
}
diff --git a/c/enc/histogram.h b/c/enc/histogram.h
index b1b8d11..42af3c3 100644
--- a/c/enc/histogram.h
+++ b/c/enc/histogram.h
@@ -12,16 +12,19 @@
#include <string.h> /* memset */
#include "../common/constants.h"
+#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./block_splitter.h"
#include "./command.h"
-#include "./context.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
+/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
+#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
+
#define FN(X) X ## Literal
#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
#define DataType uint8_t
@@ -38,7 +41,7 @@ extern "C" {
#undef FN
#define FN(X) X ## Distance
-#define DATA_SIZE BROTLI_NUM_DISTANCE_SYMBOLS
+#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
#include "./histogram_inc.h" /* NOLINT(build/include) */
#undef DataType
#undef DATA_SIZE
diff --git a/c/enc/histogram_inc.h b/c/enc/histogram_inc.h
index 7807036..50eaf74 100644
--- a/c/enc/histogram_inc.h
+++ b/c/enc/histogram_inc.h
@@ -33,7 +33,7 @@ static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
}
static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
- const DataType *p, size_t n) {
+ const DataType* p, size_t n) {
self->total_count_ += n;
n += 1;
while (--n) ++self->data_[*p++];
diff --git a/c/enc/literal_cost.c b/c/enc/literal_cost.c
index 9bcb680..c231100 100644
--- a/c/enc/literal_cost.c
+++ b/c/enc/literal_cost.c
@@ -25,7 +25,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
return BROTLI_MIN(size_t, 1, clamp);
} else {
/* Let's decide over the last byte if this ends the sequence. */
- if (last < 0xe0) {
+ if (last < 0xE0) {
return 0; /* Completed two or three byte coding. */
} else { /* Next one is the 'Byte 3' of utf-8 encoding. */
return BROTLI_MIN(size_t, 2, clamp);
@@ -34,7 +34,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
}
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
- const uint8_t *data) {
+ const uint8_t* data) {
size_t counts[3] = { 0 };
size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
size_t last_c = 0;
@@ -54,7 +54,7 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
}
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
- const uint8_t *data, float *cost) {
+ const uint8_t* data, float* cost) {
/* max_utf8 is 0 (normal ASCII single byte modeling),
1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
@@ -125,7 +125,7 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
}
void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
- const uint8_t *data, float *cost) {
+ const uint8_t* data, float* cost) {
if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
return;
diff --git a/c/enc/literal_cost.h b/c/enc/literal_cost.h
index d2f430c..8f53f39 100644
--- a/c/enc/literal_cost.h
+++ b/c/enc/literal_cost.h
@@ -21,7 +21,7 @@ extern "C" {
ring-buffer (data, mask) will take entropy coded and writes these estimates
to the cost[0..len) array. */
BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
- size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost);
+ size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost);
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
diff --git a/c/enc/memory.c b/c/enc/memory.c
index 3716b98..f6ed7e3 100644
--- a/c/enc/memory.c
+++ b/c/enc/memory.c
@@ -27,22 +27,12 @@ extern "C" {
#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
-static void* DefaultAllocFunc(void* opaque, size_t size) {
- BROTLI_UNUSED(opaque);
- return malloc(size);
-}
-
-static void DefaultFreeFunc(void* opaque, void* address) {
- BROTLI_UNUSED(opaque);
- free(address);
-}
-
void BrotliInitMemoryManager(
MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
void* opaque) {
if (!alloc_func) {
- m->alloc_func = DefaultAllocFunc;
- m->free_func = DefaultFreeFunc;
+ m->alloc_func = BrotliDefaultAllocFunc;
+ m->free_func = BrotliDefaultFreeFunc;
m->opaque = 0;
} else {
m->alloc_func = alloc_func;
diff --git a/c/enc/metablock.c b/c/enc/metablock.c
index 50f2ea2..6219292 100644
--- a/c/enc/metablock.c
+++ b/c/enc/metablock.c
@@ -10,12 +10,12 @@
#include "./metablock.h"
#include "../common/constants.h"
+#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./bit_cost.h"
#include "./block_splitter.h"
#include "./cluster.h"
-#include "./context.h"
#include "./entropy_encode.h"
#include "./histogram.h"
#include "./memory.h"
@@ -398,9 +398,9 @@ static void MapStaticContexts(MemoryManager* m,
static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
- uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
+ uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
const size_t num_contexts, const uint32_t* static_context_map,
- const Command *commands, size_t n_commands, MetaBlockSplit* mb) {
+ const Command* commands, size_t n_commands, MetaBlockSplit* mb) {
union {
BlockSplitterLiteral plain;
ContextBlockSplitter ctx;
@@ -441,7 +441,8 @@ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
if (num_contexts == 1) {
BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
} else {
- size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
+ size_t context =
+ BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal,
static_context_map[context]);
if (BROTLI_IS_OOM(m)) return;
@@ -455,7 +456,7 @@ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
- BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
+ BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_ & 0x3FF);
}
}
}
@@ -482,7 +483,7 @@ void BrotliBuildMetaBlockGreedy(MemoryManager* m,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
- ContextType literal_context_mode,
+ ContextLut literal_context_lut,
size_t num_contexts,
const uint32_t* static_context_map,
const Command* commands,
@@ -490,19 +491,17 @@ void BrotliBuildMetaBlockGreedy(MemoryManager* m,
MetaBlockSplit* mb) {
if (num_contexts == 1) {
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
- prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb);
+ prev_byte2, literal_context_lut, 1, NULL, commands, n_commands, mb);
} else {
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
- prev_byte2, literal_context_mode, num_contexts, static_context_map,
+ prev_byte2, literal_context_lut, num_contexts, static_context_map,
commands, n_commands, mb);
}
}
-void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
- size_t distance_postfix_bits,
+void BrotliOptimizeHistograms(uint32_t num_distance_codes,
MetaBlockSplit* mb) {
uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
- size_t num_distance_codes;
size_t i;
for (i = 0; i < mb->literal_histograms_size; ++i) {
BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
@@ -513,9 +512,6 @@ void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
mb->command_histograms[i].data_,
good_for_rle);
}
- num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
- num_direct_distance_codes +
- ((2 * BROTLI_MAX_DISTANCE_BITS) << distance_postfix_bits);
for (i = 0; i < mb->distance_histograms_size; ++i) {
BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
mb->distance_histograms[i].data_,
diff --git a/c/enc/metablock.h b/c/enc/metablock.h
index 3fa6d65..76a6594 100644
--- a/c/enc/metablock.h
+++ b/c/enc/metablock.h
@@ -10,11 +10,11 @@
#ifndef BROTLI_ENC_METABLOCK_H_
#define BROTLI_ENC_METABLOCK_H_
+#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./block_splitter.h"
#include "./command.h"
-#include "./context.h"
#include "./histogram.h"
#include "./memory.h"
#include "./quality.h"
@@ -85,12 +85,11 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
is the same for all block types. */
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
- uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
+ uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
size_t num_contexts, const uint32_t* static_context_map,
const Command* commands, size_t n_commands, MetaBlockSplit* mb);
-BROTLI_INTERNAL void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
- size_t distance_postfix_bits,
+BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
MetaBlockSplit* mb);
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/params.h b/c/enc/params.h
index acb3668..9bcf236 100755
--- a/c/enc/params.h
+++ b/c/enc/params.h
@@ -10,6 +10,7 @@
#define BROTLI_ENC_PARAMS_H_
#include <brotli/encode.h>
+#include "./encoder_dict.h"
typedef struct BrotliHasherParams {
int type;
@@ -19,6 +20,13 @@ typedef struct BrotliHasherParams {
int num_last_distances_to_check;
} BrotliHasherParams;
+typedef struct BrotliDistanceParams {
+ uint32_t num_direct_distance_codes;
+ uint32_t distance_postfix_bits;
+ uint32_t alphabet_size;
+ size_t max_distance;
+} BrotliDistanceParams;
+
/* Encoding parameters */
typedef struct BrotliEncoderParams {
BrotliEncoderMode mode;
@@ -27,7 +35,10 @@ typedef struct BrotliEncoderParams {
int lgblock;
size_t size_hint;
BROTLI_BOOL disable_literal_context_modeling;
+ BROTLI_BOOL large_window;
BrotliHasherParams hasher;
+ BrotliDistanceParams dist;
+ BrotliEncoderDictionary dictionary;
} BrotliEncoderParams;
#endif /* BROTLI_ENC_PARAMS_H_ */
diff --git a/c/enc/prefix.h b/c/enc/prefix.h
index 0168d4e..fd359a4 100644
--- a/c/enc/prefix.h
+++ b/c/enc/prefix.h
@@ -39,11 +39,10 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
size_t prefix = (dist >> bucket) & 1;
size_t offset = (2 + prefix) << bucket;
size_t nbits = bucket - postfix_bits;
- *code = (uint16_t)(
+ *code = (uint16_t)((nbits << 10) |
(BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
- *extra_bits = (uint32_t)(
- (nbits << 24) | ((dist - offset) >> postfix_bits));
+ *extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
}
}
diff --git a/c/enc/quality.h b/c/enc/quality.h
index 80b7051..f9b1111 100644
--- a/c/enc/quality.h
+++ b/c/enc/quality.h
@@ -31,7 +31,7 @@
/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
so we buffer at most this much literals and commands. */
-#define MAX_NUM_DELAYED_SYMBOLS 0x2fff
+#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
/* Returns hash-table size for quality levels 0 and 1. */
static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
@@ -60,10 +60,15 @@ static BROTLI_INLINE size_t MaxZopfliCandidates(
static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
+ if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
+ params->large_window = BROTLI_FALSE;
+ }
if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
params->lgwin = BROTLI_MIN_WINDOW_BITS;
- } else if (params->lgwin > BROTLI_MAX_WINDOW_BITS) {
- params->lgwin = BROTLI_MAX_WINDOW_BITS;
+ } else {
+ int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
+ BROTLI_MAX_WINDOW_BITS;
+ if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
}
}
diff --git a/c/enc/ringbuffer.h b/c/enc/ringbuffer.h
index 4e58749..86079a8 100644
--- a/c/enc/ringbuffer.h
+++ b/c/enc/ringbuffer.h
@@ -41,9 +41,9 @@ typedef struct RingBuffer {
uint32_t pos_;
/* The actual ring buffer containing the copy of the last two bytes, the data,
and the copy of the beginning as a tail. */
- uint8_t *data_;
+ uint8_t* data_;
/* The start of the ring-buffer. */
- uint8_t *buffer_;
+ uint8_t* buffer_;
} RingBuffer;
static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
@@ -91,7 +91,7 @@ static BROTLI_INLINE void RingBufferInitBuffer(
}
static BROTLI_INLINE void RingBufferWriteTail(
- const uint8_t *bytes, size_t n, RingBuffer* rb) {
+ const uint8_t* bytes, size_t n, RingBuffer* rb) {
const size_t masked_pos = rb->pos_ & rb->mask_;
if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
/* Just fill the tail buffer with the beginning data. */
@@ -103,7 +103,7 @@ static BROTLI_INLINE void RingBufferWriteTail(
/* Push bytes into the ring buffer. */
static BROTLI_INLINE void RingBufferWrite(
- MemoryManager* m, const uint8_t *bytes, size_t n, RingBuffer* rb) {
+ MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
if (rb->pos_ == 0 && n < rb->tail_size_) {
/* Special case for the first write: to process the first block, we don't
need to allocate the whole ring-buffer and we don't need the tail
@@ -144,12 +144,16 @@ static BROTLI_INLINE void RingBufferWrite(
n - (rb->size_ - masked_pos));
}
}
- rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
- rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
- rb->pos_ += (uint32_t)n;
- if (rb->pos_ > (1u << 30)) {
- /* Wrap, but preserve not-a-first-lap feature. */
- rb->pos_ = (rb->pos_ & ((1u << 30) - 1)) | (1u << 30);
+ {
+ BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
+ uint32_t rb_pos_mask = (1u << 31) - 1;
+ rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
+ rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
+ rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
+ if (not_first_lap) {
+ /* Wrap, but preserve not-a-first-lap feature. */
+ rb->pos_ |= 1u << 31;
+ }
}
}
diff --git a/c/enc/static_dict.c b/c/enc/static_dict.c
index 36caa61..758ef80 100644
--- a/c/enc/static_dict.c
+++ b/c/enc/static_dict.c
@@ -8,19 +8,20 @@
#include "../common/dictionary.h"
#include "../common/platform.h"
+#include "../common/transform.h"
+#include "./encoder_dict.h"
#include "./find_match_length.h"
-#include "./static_dict_lut.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-static const uint8_t kUppercaseFirst = 10;
+/* TODO: use BrotliTransforms.cutOffTransforms instead. */
static const uint8_t kOmitLastNTransforms[10] = {
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
};
-static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
+static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@@ -79,32 +80,33 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
}
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
- const BrotliDictionary* dictionary, const uint8_t* data,
+ const BrotliEncoderDictionary* dictionary, const uint8_t* data,
size_t min_length, size_t max_length, uint32_t* matches) {
BROTLI_BOOL has_found_match = BROTLI_FALSE;
{
- size_t offset = kStaticDictionaryBuckets[Hash(data)];
+ size_t offset = dictionary->buckets[Hash(data)];
BROTLI_BOOL end = !offset;
while (!end) {
- DictWord w = kStaticDictionaryWords[offset++];
+ DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
- const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0) {
const size_t matchlen =
- DictMatchLength(dictionary, data, id, l, max_length);
+ DictMatchLength(dictionary->words, data, id, l, max_length);
const uint8_t* s;
size_t minlen;
size_t maxlen;
size_t len;
- /* Transform "" + kIdentity + "" */
+ /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
if (matchlen == l) {
AddMatch(id, l, l, matches);
has_found_match = BROTLI_TRUE;
}
- /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
+ /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
+ "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
if (matchlen >= l - 1) {
AddMatch(id + 12 * n, l - 1, l, matches);
if (l + 2 < max_length &&
@@ -114,7 +116,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
}
has_found_match = BROTLI_TRUE;
}
- /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
+ /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
minlen = min_length;
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
@@ -126,7 +128,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
continue;
}
s = &data[l];
- /* Transforms "" + kIdentity + <suffix> */
+ /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
if (s[0] == ' ') {
AddMatch(id + n, l + 1, l, matches);
if (s[1] == 'a') {
@@ -273,12 +275,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
}
}
} else {
- /* Set is_all_caps=0 for kUppercaseFirst and
- is_all_caps=1 otherwise (kUppercaseAll) transform. */
+ /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+ is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+ transform. */
const BROTLI_BOOL is_all_caps =
- TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
+ TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
const uint8_t* s;
- if (!IsMatch(dictionary, w, data, max_length)) {
+ if (!IsMatch(dictionary->words, w, data, max_length)) {
continue;
}
/* Transform "" + kUppercase{First,All} + "" */
@@ -323,27 +326,29 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
/* Transforms with prefixes " " and "." */
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
- size_t offset = kStaticDictionaryBuckets[Hash(&data[1])];
+ size_t offset = dictionary->buckets[Hash(&data[1])];
BROTLI_BOOL end = !offset;
while (!end) {
- DictWord w = kStaticDictionaryWords[offset++];
+ DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
- const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0) {
const uint8_t* s;
- if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
+ if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
continue;
}
- /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
+ /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
+ "." + BROTLI_TRANSFORM_IDENTITY + "" */
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 2 >= max_length) {
continue;
}
- /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
+ /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
+ "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
*/
s = &data[l + 1];
if (s[0] == ' ') {
@@ -370,12 +375,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
}
}
} else if (is_space) {
- /* Set is_all_caps=0 for kUppercaseFirst and
- is_all_caps=1 otherwise (kUppercaseAll) transform. */
+ /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
+ is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
+ transform. */
const BROTLI_BOOL is_all_caps =
- TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
+ TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
const uint8_t* s;
- if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
+ if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
continue;
}
/* Transforms " " + kUppercase{First,All} + "" */
@@ -411,22 +417,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
}
}
if (max_length >= 6) {
- /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */
+ /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
if ((data[1] == ' ' &&
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
- (data[0] == 0xc2 && data[1] == 0xa0)) {
- size_t offset = kStaticDictionaryBuckets[Hash(&data[2])];
+ (data[0] == 0xC2 && data[1] == 0xA0)) {
+ size_t offset = dictionary->buckets[Hash(&data[2])];
BROTLI_BOOL end = !offset;
while (!end) {
- DictWord w = kStaticDictionaryWords[offset++];
+ DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
- const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0 &&
- IsMatch(dictionary, w, &data[2], max_length - 2)) {
- if (data[0] == 0xc2) {
+ IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
+ if (data[0] == 0xC2) {
AddMatch(id + 102 * n, l + 2, l, matches);
has_found_match = BROTLI_TRUE;
} else if (l + 2 < max_length && data[l + 2] == ' ') {
@@ -444,17 +450,17 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
data[3] == 'e' && data[4] == ' ') ||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
data[3] == 'm' && data[4] == '/')) {
- size_t offset = kStaticDictionaryBuckets[Hash(&data[5])];
+ size_t offset = dictionary->buckets[Hash(&data[5])];
BROTLI_BOOL end = !offset;
while (!end) {
- DictWord w = kStaticDictionaryWords[offset++];
+ DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
- const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0 &&
- IsMatch(dictionary, w, &data[5], max_length - 5)) {
+ IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 5 < max_length) {
diff --git a/c/enc/static_dict.h b/c/enc/static_dict.h
index 0b3f6b3..6b5d4eb 100644
--- a/c/enc/static_dict.h
+++ b/c/enc/static_dict.h
@@ -12,13 +12,14 @@
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/types.h>
+#include "./encoder_dict.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
-static const uint32_t kInvalidMatch = 0xfffffff;
+static const uint32_t kInvalidMatch = 0xFFFFFFF;
/* Matches data against static dictionary words, and for each length l,
for which a match is found, updates matches[l] to be the minimum possible
@@ -28,7 +29,7 @@ static const uint32_t kInvalidMatch = 0xfffffff;
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
all elements are initialized to kInvalidMatch */
BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
- const BrotliDictionary* dictionary,
+ const BrotliEncoderDictionary* dictionary,
const uint8_t* data, size_t min_length, size_t max_length,
uint32_t* matches);
diff --git a/c/enc/static_dict_lut.h b/c/enc/static_dict_lut.h
index ba94f76..e299cda 100644
--- a/c/enc/static_dict_lut.h
+++ b/c/enc/static_dict_lut.h
@@ -23,7 +23,7 @@ typedef struct DictWord {
} DictWord;
static const int kDictNumBits = 15;
-static const uint32_t kDictHashMul32 = 0x1e35a7bd;
+static const uint32_t kDictHashMul32 = 0x1E35A7BD;
static const uint16_t kStaticDictionaryBuckets[32768] = {
1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29,
diff --git a/c/enc/utf8_util.c b/c/enc/utf8_util.c
index a334927..04a7805 100644
--- a/c/enc/utf8_util.c
+++ b/c/enc/utf8_util.c
@@ -25,37 +25,37 @@ static size_t BrotliParseAsUTF8(
}
/* 2-byte UTF8 */
if (size > 1u &&
- (input[0] & 0xe0) == 0xc0 &&
- (input[1] & 0xc0) == 0x80) {
- *symbol = (((input[0] & 0x1f) << 6) |
- (input[1] & 0x3f));
- if (*symbol > 0x7f) {
+ (input[0] & 0xE0) == 0xC0 &&
+ (input[1] & 0xC0) == 0x80) {
+ *symbol = (((input[0] & 0x1F) << 6) |
+ (input[1] & 0x3F));
+ if (*symbol > 0x7F) {
return 2;
}
}
/* 3-byte UFT8 */
if (size > 2u &&
- (input[0] & 0xf0) == 0xe0 &&
- (input[1] & 0xc0) == 0x80 &&
- (input[2] & 0xc0) == 0x80) {
- *symbol = (((input[0] & 0x0f) << 12) |
- ((input[1] & 0x3f) << 6) |
- (input[2] & 0x3f));
- if (*symbol > 0x7ff) {
+ (input[0] & 0xF0) == 0xE0 &&
+ (input[1] & 0xC0) == 0x80 &&
+ (input[2] & 0xC0) == 0x80) {
+ *symbol = (((input[0] & 0x0F) << 12) |
+ ((input[1] & 0x3F) << 6) |
+ (input[2] & 0x3F));
+ if (*symbol > 0x7FF) {
return 3;
}
}
/* 4-byte UFT8 */
if (size > 3u &&
- (input[0] & 0xf8) == 0xf0 &&
- (input[1] & 0xc0) == 0x80 &&
- (input[2] & 0xc0) == 0x80 &&
- (input[3] & 0xc0) == 0x80) {
+ (input[0] & 0xF8) == 0xF0 &&
+ (input[1] & 0xC0) == 0x80 &&
+ (input[2] & 0xC0) == 0x80 &&
+ (input[3] & 0xC0) == 0x80) {
*symbol = (((input[0] & 0x07) << 18) |
- ((input[1] & 0x3f) << 12) |
- ((input[2] & 0x3f) << 6) |
- (input[3] & 0x3f));
- if (*symbol > 0xffff && *symbol <= 0x10ffff) {
+ ((input[1] & 0x3F) << 12) |
+ ((input[2] & 0x3F) << 6) |
+ (input[3] & 0x3F));
+ if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
return 4;
}
}
diff --git a/c/enc/write_bits.h b/c/enc/write_bits.h
index efa66f8..7733d92 100644
--- a/c/enc/write_bits.h
+++ b/c/enc/write_bits.h
@@ -35,25 +35,27 @@ extern "C" {
and locate the rest in BYTE+1, BYTE+2, etc. */
static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
uint64_t bits,
- size_t * BROTLI_RESTRICT pos,
- uint8_t * BROTLI_RESTRICT array) {
+ size_t* BROTLI_RESTRICT pos,
+ uint8_t* BROTLI_RESTRICT array) {
#ifdef BROTLI_LITTLE_ENDIAN
/* This branch of the code can write up to 56 bits at a time,
7 bits are lost by being perhaps already in *p and at least
1 bit is needed to initialize the bit-stream ahead (i.e. if 7
bits are in *p and we write 57 bits, then the next write will
access a byte that was never initialized). */
- uint8_t *p = &array[*pos >> 3];
+ uint8_t* p = &array[*pos >> 3];
uint64_t v = *p;
- BROTLI_LOG(("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos));
+ BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits,
+ (uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
+ (int)*pos));
BROTLI_DCHECK((bits >> n_bits) == 0);
BROTLI_DCHECK(n_bits <= 56);
v |= bits << (*pos & 7);
BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */
*pos += n_bits;
#else
- /* implicit & 0xff is assumed for uint8_t arithmetics */
- uint8_t *array_pos = &array[*pos >> 3];
+ /* implicit & 0xFF is assumed for uint8_t arithmetics */
+ uint8_t* array_pos = &array[*pos >> 3];
const size_t bits_reserved_in_first_byte = (*pos & 7);
size_t bits_left_to_write;
bits <<= bits_reserved_in_first_byte;
@@ -70,8 +72,8 @@ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
}
static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
- size_t pos, uint8_t *array) {
- BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", pos));
+ size_t pos, uint8_t* array) {
+ BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", (int)pos));
BROTLI_DCHECK((pos & 7) == 0);
array[pos >> 3] = 0;
}
diff --git a/c/include/brotli/decode.h b/c/include/brotli/decode.h
index 1acf605..61a4326 100644
--- a/c/include/brotli/decode.h
+++ b/c/include/brotli/decode.h
@@ -34,11 +34,11 @@ typedef struct BrotliDecoderStateStruct BrotliDecoderState;
typedef enum {
/** Decoding error, e.g. corrupted input or memory allocation problem. */
BROTLI_DECODER_RESULT_ERROR = 0,
- /** Decoding successfully completed */
+ /** Decoding successfully completed. */
BROTLI_DECODER_RESULT_SUCCESS = 1,
- /** Partially done; should be called again with more input */
+ /** Partially done; should be called again with more input. */
BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT = 2,
- /** Partially done; should be called again with more output */
+ /** Partially done; should be called again with more output. */
BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT = 3
} BrotliDecoderResult;
@@ -83,8 +83,9 @@ typedef enum {
BROTLI_ERROR_CODE(_ERROR_FORMAT_, WINDOW_BITS, -13) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_1, -14) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_2, -15) SEPARATOR \
+ BROTLI_ERROR_CODE(_ERROR_FORMAT_, DISTANCE, -16) SEPARATOR \
\
- /* -16..-17 codes are reserved */ \
+ /* -17 code is reserved */ \
\
BROTLI_ERROR_CODE(_ERROR_, COMPOUND_DICTIONARY, -18) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_, DICTIONARY_NOT_SET, -19) SEPARATOR \
@@ -135,7 +136,11 @@ typedef enum BrotliDecoderParameter {
* Ring buffer is allocated according to window size, despite the real size of
* the content.
*/
- BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0
+ BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0,
+ /**
+ * Flag that determines if "Large Window Brotli" is used.
+ */
+ BROTLI_DECODER_PARAM_LARGE_WINDOW = 1
} BrotliDecoderParameter;
/**
diff --git a/c/include/brotli/encode.h b/c/include/brotli/encode.h
index 1fa85cc..0b5c8c7 100644
--- a/c/include/brotli/encode.h
+++ b/c/include/brotli/encode.h
@@ -27,6 +27,11 @@ extern "C" {
* @note equal to @c BROTLI_MAX_DISTANCE_BITS constant.
*/
#define BROTLI_MAX_WINDOW_BITS 24
+/**
+ * Maximal value for ::BROTLI_PARAM_LGWIN parameter
+ * in "Large Window Brotli" (32-bit).
+ */
+#define BROTLI_LARGE_MAX_WINDOW_BITS 30
/** Minimal value for ::BROTLI_PARAM_LGBLOCK parameter. */
#define BROTLI_MIN_INPUT_BLOCK_BITS 16
/** Maximal value for ::BROTLI_PARAM_LGBLOCK parameter. */
@@ -176,7 +181,11 @@ typedef enum BrotliEncoderParameter {
*
* The default value is 0, which means that the total input size is unknown.
*/
- BROTLI_PARAM_SIZE_HINT = 5
+ BROTLI_PARAM_SIZE_HINT = 5,
+ /**
+ * Flag that determines if "Large Window Brotli" is used.
+ */
+ BROTLI_PARAM_LARGE_WINDOW = 6
} BrotliEncoderParameter;
/**
diff --git a/c/tools/brotli.c b/c/tools/brotli.c
index 497ae65..2abfc27 100644
--- a/c/tools/brotli.c
+++ b/c/tools/brotli.c
@@ -111,8 +111,15 @@ typedef struct {
uint8_t* output;
const char* current_input_path;
const char* current_output_path;
+ int64_t input_file_length; /* -1, if impossible to calculate */
FILE* fin;
FILE* fout;
+
+ /* I/O buffers */
+ size_t available_in;
+ const uint8_t* next_in;
+ size_t available_out;
+ uint8_t* next_out;
} Context;
/* Parse up to 5 decimal digits. */
@@ -185,8 +192,8 @@ static Command ParseParams(Context* params) {
/* Too many options. The expected longest option list is:
"-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total.
- This check is an additinal guard that is never triggered, but provides an
- additional guard for future changes. */
+ This check is an additional guard that is never triggered, but provides
+ a guard for future changes. */
if (next_option_index > (MAX_OPTIONS - 2)) {
return COMMAND_INVALID;
}
@@ -414,8 +421,8 @@ static void PrintHelp(const char* name) {
" -t, --test test compressed file integrity\n"
" -v, --verbose verbose mode\n");
fprintf(stdout,
-" -w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d) (default:%d)\n",
- BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS, DEFAULT_LGWIN);
+" -w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d)\n",
+ BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS);
fprintf(stdout,
" window size = 2**NUM - 16\n"
" 0 lets compressor choose the optimal value\n");
@@ -473,6 +480,23 @@ static BROTLI_BOOL OpenOutputFile(const char* output_path, FILE** f,
return BROTLI_TRUE;
}
+static int64_t FileSize(const char* path) {
+ FILE* f = fopen(path, "rb");
+ int64_t retval;
+ if (f == NULL) {
+ return -1;
+ }
+ if (fseek(f, 0L, SEEK_END) != 0) {
+ fclose(f);
+ return -1;
+ }
+ retval = ftell(f);
+ if (fclose(f) != 0) {
+ return -1;
+ }
+ return retval;
+}
+
/* Copy file times and permissions.
TODO: this is a "best effort" implementation; honest cross-platform
fully featured implementation is way too hacky; add more hacks by request. */
@@ -513,6 +537,8 @@ static BROTLI_BOOL NextFile(Context* context) {
/* Iterator points to last used arg; increment to search for the next one. */
context->iterator++;
+ context->input_file_length = -1;
+
/* No input path; read from console. */
if (context->input_count == 0) {
if (context->iterator > 1) return BROTLI_FALSE;
@@ -542,6 +568,7 @@ static BROTLI_BOOL NextFile(Context* context) {
}
context->current_input_path = arg;
+ context->input_file_length = FileSize(arg);
context->current_output_path = context->output_path;
if (context->output_path) return BROTLI_TRUE;
@@ -626,44 +653,73 @@ static BROTLI_BOOL CloseFiles(Context* context, BROTLI_BOOL success) {
static const size_t kFileBufferSize = 1 << 16;
+static void InitializeBuffers(Context* context) {
+ context->available_in = 0;
+ context->next_in = NULL;
+ context->available_out = kFileBufferSize;
+ context->next_out = context->output;
+}
+
+static BROTLI_BOOL HasMoreInput(Context* context) {
+ return feof(context->fin) ? BROTLI_FALSE : BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProvideInput(Context* context) {
+ context->available_in =
+ fread(context->input, 1, kFileBufferSize, context->fin);
+ context->next_in = context->input;
+ if (ferror(context->fin)) {
+ fprintf(stderr, "failed to read input [%s]: %s\n",
+ PrintablePath(context->current_input_path), strerror(errno));
+ return BROTLI_FALSE;
+ }
+ return BROTLI_TRUE;
+}
+
+/* Internal: should be used only in Provide-/Flush-Output. */
+static BROTLI_BOOL WriteOutput(Context* context) {
+ size_t out_size = (size_t)(context->next_out - context->output);
+ if (out_size == 0) return BROTLI_TRUE;
+ if (context->test_integrity) return BROTLI_TRUE;
+
+ fwrite(context->output, 1, out_size, context->fout);
+ if (ferror(context->fout)) {
+ fprintf(stderr, "failed to write output [%s]: %s\n",
+ PrintablePath(context->current_output_path), strerror(errno));
+ return BROTLI_FALSE;
+ }
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ProvideOutput(Context* context) {
+ if (!WriteOutput(context)) return BROTLI_FALSE;
+ context->available_out = kFileBufferSize;
+ context->next_out = context->output;
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL FlushOutput(Context* context) {
+ if (!WriteOutput(context)) return BROTLI_FALSE;
+ context->available_out = 0;
+ return BROTLI_TRUE;
+}
+
static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
- size_t available_in = 0;
- const uint8_t* next_in = NULL;
- size_t available_out = kFileBufferSize;
- uint8_t* next_out = context->output;
BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+ InitializeBuffers(context);
for (;;) {
- if (next_out != context->output) {
- if (!context->test_integrity) {
- size_t out_size = (size_t)(next_out - context->output);
- fwrite(context->output, 1, out_size, context->fout);
- if (ferror(context->fout)) {
- fprintf(stderr, "failed to write output [%s]: %s\n",
- PrintablePath(context->current_output_path), strerror(errno));
- return BROTLI_FALSE;
- }
- }
- available_out = kFileBufferSize;
- next_out = context->output;
- }
-
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
- if (feof(context->fin)) {
+ if (!HasMoreInput(context)) {
fprintf(stderr, "corrupt input [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
}
- available_in = fread(context->input, 1, kFileBufferSize, context->fin);
- next_in = context->input;
- if (ferror(context->fin)) {
- fprintf(stderr, "failed to read input [%s]: %s\n",
- PrintablePath(context->current_input_path), strerror(errno));
- return BROTLI_FALSE;
- }
+ if (!ProvideInput(context)) return BROTLI_FALSE;
} else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
- /* Nothing to do - output is already written. */
+ if (!ProvideOutput(context)) return BROTLI_FALSE;
} else if (result == BROTLI_DECODER_RESULT_SUCCESS) {
- if (available_in != 0 || !feof(context->fin)) {
+ if (!FlushOutput(context)) return BROTLI_FALSE;
+ if (context->available_in != 0 || HasMoreInput(context)) {
fprintf(stderr, "corrupt input [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
@@ -675,8 +731,8 @@ static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
return BROTLI_FALSE;
}
- result = BrotliDecoderDecompressStream(
- s, &available_in, &next_in, &available_out, &next_out, 0);
+ result = BrotliDecoderDecompressStream(s, &context->available_in,
+ &context->next_in, &context->available_out, &context->next_out, 0);
}
}
@@ -703,46 +759,31 @@ static BROTLI_BOOL DecompressFiles(Context* context) {
}
static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
- size_t available_in = 0;
- const uint8_t* next_in = NULL;
- size_t available_out = kFileBufferSize;
- uint8_t* next_out = context->output;
BROTLI_BOOL is_eof = BROTLI_FALSE;
-
+ InitializeBuffers(context);
for (;;) {
- if (available_in == 0 && !is_eof) {
- available_in = fread(context->input, 1, kFileBufferSize, context->fin);
- next_in = context->input;
- if (ferror(context->fin)) {
- fprintf(stderr, "failed to read input [%s]: %s\n",
- PrintablePath(context->current_input_path), strerror(errno));
- return BROTLI_FALSE;
- }
- is_eof = feof(context->fin) ? BROTLI_TRUE : BROTLI_FALSE;
+ if (context->available_in == 0 && !is_eof) {
+ if (!ProvideInput(context)) return BROTLI_FALSE;
+ is_eof = !HasMoreInput(context);
}
if (!BrotliEncoderCompressStream(s,
is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
- &available_in, &next_in, &available_out, &next_out, NULL)) {
+ &context->available_in, &context->next_in,
+ &context->available_out, &context->next_out, NULL)) {
/* Should detect OOM? */
fprintf(stderr, "failed to compress data [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
}
- if (available_out != kFileBufferSize) {
- size_t out_size = kFileBufferSize - available_out;
- fwrite(context->output, 1, out_size, context->fout);
- if (ferror(context->fout)) {
- fprintf(stderr, "failed to write output [%s]: %s\n",
- PrintablePath(context->current_output_path), strerror(errno));
- return BROTLI_FALSE;
- }
- available_out = kFileBufferSize;
- next_out = context->output;
+ if (context->available_out == 0) {
+ if (!ProvideOutput(context)) return BROTLI_FALSE;
}
- if (BrotliEncoderIsFinished(s)) return BROTLI_TRUE;
+ if (BrotliEncoderIsFinished(s)) {
+ return FlushOutput(context);
+ }
}
}
@@ -756,8 +797,30 @@ static BROTLI_BOOL CompressFiles(Context* context) {
}
BrotliEncoderSetParameter(s,
BROTLI_PARAM_QUALITY, (uint32_t)context->quality);
- BrotliEncoderSetParameter(s,
- BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin);
+ if (context->lgwin > 0) {
+ /* Specified by user. */
+ BrotliEncoderSetParameter(s,
+ BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin);
+ } else {
+ /* 0, or not specified by user; could be chosen by compressor. */
+ uint32_t lgwin = DEFAULT_LGWIN;
+ /* Use file size to limit lgwin. */
+ if (context->input_file_length >= 0) {
+ int32_t size = 1 << BROTLI_MIN_WINDOW_BITS;
+ lgwin = BROTLI_MIN_WINDOW_BITS;
+ while (size < context->input_file_length) {
+ size <<= 1;
+ lgwin++;
+ if (lgwin == BROTLI_MAX_WINDOW_BITS) break;
+ }
+ }
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, lgwin);
+ }
+ if (context->input_file_length > 0) {
+ uint32_t size_hint = context->input_file_length < (1 << 30) ?
+ (uint32_t)context->input_file_length : (1u << 30);
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, size_hint);
+ }
is_ok = OpenFiles(context);
if (is_ok && !context->current_output_path &&
!context->force_overwrite && isatty(STDOUT_FILENO)) {
@@ -779,7 +842,7 @@ int main(int argc, char** argv) {
int i;
context.quality = 11;
- context.lgwin = DEFAULT_LGWIN;
+ context.lgwin = -1;
context.force_overwrite = BROTLI_FALSE;
context.junk_source = BROTLI_FALSE;
context.copy_stat = BROTLI_TRUE;
diff --git a/docs/brotli.1 b/docs/brotli.1
index c55b906..7242a32 100644
--- a/docs/brotli.1
+++ b/docs/brotli.1
@@ -1,4 +1,4 @@
-.TH "BROTLI" "1" "August 2017" "brotli 1.0.0" "User commands"
+.TH "BROTLI" "1" "February 2018" "brotli 1.0.0" "User commands"
.SH "NAME"
\fBbrotli\fR \- brotli, unbrotli \- compress or decompress files
.SH SYNOPSIS
diff --git a/docs/decode.h.3 b/docs/decode.h.3
index 0948bf7..7b8581c 100644
--- a/docs/decode.h.3
+++ b/docs/decode.h.3
@@ -1,4 +1,4 @@
-.TH "decode.h" 3 "Fri Dec 8 2017" "Brotli" \" -*- nroff -*-
+.TH "decode.h" 3 "Thu Feb 22 2018" "Brotli" \" -*- nroff -*-
.ad l
.nh
.SH NAME
@@ -143,6 +143,9 @@ Options to be used with \fBBrotliDecoderSetParameter\fP\&.
.TP
\fB\fIBROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION \fP\fP
Disable 'canny' ring buffer allocation strategy\&. Ring buffer is allocated according to window size, despite the real size of the content\&.
+.TP
+\fB\fIBROTLI_DECODER_PARAM_LARGE_WINDOW \fP\fP
+Flag that determines if 'Large Window Brotli' is used\&.
.SS "enum \fBBrotliDecoderResult\fP"
.PP
diff --git a/docs/encode.h.3 b/docs/encode.h.3
index 1e1193e..906ce07 100644
--- a/docs/encode.h.3
+++ b/docs/encode.h.3
@@ -1,4 +1,4 @@
-.TH "encode.h" 3 "Fri Dec 8 2017" "Brotli" \" -*- nroff -*-
+.TH "encode.h" 3 "Thu Feb 22 2018" "Brotli" \" -*- nroff -*-
.ad l
.nh
.SH NAME
@@ -23,6 +23,10 @@ encode.h \- API for Brotli compression\&.
.br
.RI "\fIDefault value for \fBBROTLI_PARAM_LGWIN\fP parameter\&. \fP"
.ti -1c
+.RI "#define \fBBROTLI_LARGE_MAX_WINDOW_BITS\fP 30"
+.br
+.RI "\fIMaximal value for \fBBROTLI_PARAM_LGWIN\fP parameter in 'Large Window Brotli' (32-bit)\&. \fP"
+.ti -1c
.RI "#define \fBBROTLI_MAX_INPUT_BLOCK_BITS\fP 24"
.br
.RI "\fIMaximal value for \fBBROTLI_PARAM_LGBLOCK\fP parameter\&. \fP"
@@ -287,6 +291,9 @@ Flag that affects usage of 'literal context modeling' format feature\&. This fla
.TP
\fB\fIBROTLI_PARAM_SIZE_HINT \fP\fP
Estimated total input size for all \fBBrotliEncoderCompressStream\fP calls\&. The default value is 0, which means that the total input size is unknown\&.
+.TP
+\fB\fIBROTLI_PARAM_LARGE_WINDOW \fP\fP
+Flag that determines if 'Large Window Brotli' is used\&.
.SH "Function Documentation"
.PP
.SS "\fBBROTLI_BOOL\fP BrotliEncoderCompress (int quality, int lgwin, \fBBrotliEncoderMode\fP mode, size_t input_size, const uint8_t input_buffer[input_size], size_t * encoded_size, uint8_t encoded_buffer[*encoded_size])"
diff --git a/docs/types.h.3 b/docs/types.h.3
index e72ae6e..bef9313 100644
--- a/docs/types.h.3
+++ b/docs/types.h.3
@@ -1,4 +1,4 @@
-.TH "types.h" 3 "Wed Aug 2 2017" "Brotli" \" -*- nroff -*-
+.TH "types.h" 3 "Thu Feb 22 2018" "Brotli" \" -*- nroff -*-
.ad l
.nh
.SH NAME
diff --git a/java/org/brotli/dec/BUILD b/java/org/brotli/dec/BUILD
index 8a2558c..e6d3a4d 100644
--- a/java/org/brotli/dec/BUILD
+++ b/java/org/brotli/dec/BUILD
@@ -43,6 +43,12 @@ java_test(
)
java_test(
+ name = "EagerStreamTest",
+ test_class = "org.brotli.dec.EagerStreamTest",
+ runtime_deps = [":test_lib"],
+)
+
+java_test(
name = "SynthTest",
test_class = "org.brotli.dec.SynthTest",
runtime_deps = [":test_lib"],
diff --git a/java/org/brotli/dec/BrotliInputStream.java b/java/org/brotli/dec/BrotliInputStream.java
index a2bca95..a27e928 100644
--- a/java/org/brotli/dec/BrotliInputStream.java
+++ b/java/org/brotli/dec/BrotliInputStream.java
@@ -16,7 +16,7 @@ import java.io.InputStream;
*/
public class BrotliInputStream extends InputStream {
- public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 16384;
+ public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 256;
/**
* Internal buffer used for efficient byte-by-byte reading.
@@ -44,7 +44,8 @@ public class BrotliInputStream extends InputStream {
* <p> For byte-by-byte reading ({@link #read()}) internal buffer with
* {@link #DEFAULT_INTERNAL_BUFFER_SIZE} size is allocated and used.
*
- * <p> Will block the thread until first kilobyte of data of source is available.
+ * <p> Will block the thread until first {@link BitReader#CAPACITY} bytes of data of source
+ * are available.
*
* @param source underlying data source
* @throws IOException in case of corrupted data or source stream problems
@@ -59,7 +60,8 @@ public class BrotliInputStream extends InputStream {
* <p> For byte-by-byte reading ({@link #read()}) internal buffer of specified size is
* allocated and used.
*
- * <p> Will block the thread until first kilobyte of data of source is available.
+ * <p> Will block the thread until first {@link BitReader#CAPACITY} bytes of data of source
+ * are available.
*
* @param source compressed data source
* @param byteReadBufferSize size of internal buffer used in case of
@@ -82,6 +84,10 @@ public class BrotliInputStream extends InputStream {
}
}
+ public void setEager(boolean eager) {
+ state.isEager = eager ? 1 : 0;
+ }
+
/**
* {@inheritDoc}
*/
diff --git a/java/org/brotli/dec/Decode.java b/java/org/brotli/dec/Decode.java
index 4a1ded6..9e3d43b 100644
--- a/java/org/brotli/dec/Decode.java
+++ b/java/org/brotli/dec/Decode.java
@@ -25,10 +25,10 @@ final class Decode {
private static final int COPY_UNCOMPRESSED = 5;
private static final int INSERT_LOOP = 6;
private static final int COPY_LOOP = 7;
- private static final int COPY_WRAP_BUFFER = 8;
- private static final int TRANSFORM = 9;
- private static final int FINISHED = 10;
- private static final int CLOSED = 11;
+ private static final int TRANSFORM = 8;
+ private static final int FINISHED = 9;
+ private static final int CLOSED = 10;
+ private static final int INIT_WRITE = 11;
private static final int WRITE = 12;
private static final int DEFAULT_CODE_LENGTH = 8;
@@ -550,9 +550,7 @@ final class Decode {
private static void readNextMetablockHeader(State s) {
if (s.inputEnd != 0) {
s.nextRunningState = FINISHED;
- s.bytesToWrite = s.pos;
- s.bytesWritten = 0;
- s.runningState = WRITE;
+ s.runningState = INIT_WRITE;
return;
}
// TODO: Reset? Do we need this?
@@ -674,9 +672,7 @@ final class Decode {
s.pos += chunkLength;
if (s.pos == s.ringBufferSize) {
s.nextRunningState = COPY_UNCOMPRESSED;
- s.bytesToWrite = s.ringBufferSize;
- s.bytesWritten = 0;
- s.runningState = WRITE;
+ s.runningState = INIT_WRITE;
return;
}
@@ -686,12 +682,12 @@ final class Decode {
private static int writeRingBuffer(State s) {
int toWrite = Math.min(s.outputLength - s.outputUsed,
- s.bytesToWrite - s.bytesWritten);
+ s.ringBufferBytesReady - s.ringBufferBytesWritten);
if (toWrite != 0) {
- System.arraycopy(s.ringBuffer, s.bytesWritten, s.output,
+ System.arraycopy(s.ringBuffer, s.ringBufferBytesWritten, s.output,
s.outputOffset + s.outputUsed, toWrite);
s.outputUsed += toWrite;
- s.bytesWritten += toWrite;
+ s.ringBufferBytesWritten += toWrite;
}
if (s.outputUsed < s.outputLength) {
@@ -712,6 +708,15 @@ final class Decode {
return group;
}
+ // Returns offset in ringBuffer that should trigger WRITE when filled.
+ private static int calculateFence(State s) {
+ int result = s.ringBufferSize;
+ if (s.isEager != 0) {
+ result = Math.min(result, s.ringBufferBytesWritten + s.outputLength - s.outputUsed);
+ }
+ return result;
+ }
+
/**
* Actual decompress implementation.
*/
@@ -722,6 +727,7 @@ final class Decode {
if (s.runningState == CLOSED) {
throw new IllegalStateException("Can't decompress after close");
}
+ int fence = calculateFence(s);
int ringBufferMask = s.ringBufferSize - 1;
byte[] ringBuffer = s.ringBuffer;
@@ -734,6 +740,7 @@ final class Decode {
}
readNextMetablockHeader(s);
/* Ring-buffer would be reallocated here. */
+ fence = calculateFence(s);
ringBufferMask = s.ringBufferSize - 1;
ringBuffer = s.ringBuffer;
continue;
@@ -787,12 +794,11 @@ final class Decode {
BitReader.fillBitWindow(s);
ringBuffer[s.pos] =
(byte) readSymbol(s.hGroup0, s.literalTree, s);
+ s.pos++;
s.j++;
- if (s.pos++ == ringBufferMask) {
+ if (s.pos >= fence) {
s.nextRunningState = INSERT_LOOP;
- s.bytesToWrite = s.ringBufferSize;
- s.bytesWritten = 0;
- s.runningState = WRITE;
+ s.runningState = INIT_WRITE;
break;
}
}
@@ -813,12 +819,11 @@ final class Decode {
prevByte1 = readSymbol(
s.hGroup0, s.hGroup0[literalTreeIndex], s);
ringBuffer[s.pos] = (byte) prevByte1;
+ s.pos++;
s.j++;
- if (s.pos++ == ringBufferMask) {
+ if (s.pos >= fence) {
s.nextRunningState = INSERT_LOOP;
- s.bytesToWrite = s.ringBufferSize;
- s.bytesWritten = 0;
- s.runningState = WRITE;
+ s.runningState = INIT_WRITE;
break;
}
}
@@ -868,7 +873,6 @@ final class Decode {
s.maxDistance = s.maxBackwardDistance;
}
- s.copyDst = s.pos;
if (s.distance > s.maxDistance) {
s.runningState = TRANSFORM;
continue;
@@ -907,12 +911,11 @@ final class Decode {
ringBuffer[s.pos] =
ringBuffer[(s.pos - s.distance) & ringBufferMask];
s.metaBlockLength--;
+ s.pos++;
s.j++;
- if (s.pos++ == ringBufferMask) {
+ if (s.pos >= fence) {
s.nextRunningState = COPY_LOOP;
- s.bytesToWrite = s.ringBufferSize;
- s.bytesWritten = 0;
- s.runningState = WRITE;
+ s.runningState = INIT_WRITE;
break;
}
}
@@ -933,16 +936,13 @@ final class Decode {
int transformIdx = wordId >>> shift;
offset += wordIdx * s.copyLength;
if (transformIdx < Transform.NUM_TRANSFORMS) {
- int len = Transform.transformDictionaryWord(ringBuffer, s.copyDst,
+ int len = Transform.transformDictionaryWord(ringBuffer, s.pos,
Dictionary.getData(), offset, s.copyLength, transformIdx);
- s.copyDst += len;
s.pos += len;
s.metaBlockLength -= len;
- if (s.copyDst >= s.ringBufferSize) {
- s.nextRunningState = COPY_WRAP_BUFFER;
- s.bytesToWrite = s.ringBufferSize;
- s.bytesWritten = 0;
- s.runningState = WRITE;
+ if (s.pos >= fence) {
+ s.nextRunningState = MAIN_LOOP;
+ s.runningState = INIT_WRITE;
continue;
}
} else {
@@ -954,11 +954,6 @@ final class Decode {
s.runningState = MAIN_LOOP;
continue;
- case COPY_WRAP_BUFFER:
- Utils.copyBytesWithin(ringBuffer, 0, s.ringBufferSize, s.copyDst);
- s.runningState = MAIN_LOOP;
- continue;
-
case READ_METADATA:
while (s.metaBlockLength > 0) {
BitReader.readMoreInput(s);
@@ -975,6 +970,10 @@ final class Decode {
copyUncompressedData(s);
continue;
+ case INIT_WRITE:
+ s.ringBufferBytesReady = Math.min(s.pos, s.ringBufferSize);
+ s.runningState = WRITE;
+ // fall through
case WRITE:
if (writeRingBuffer(s) == 0) {
// Output buffer is full.
@@ -983,7 +982,14 @@ final class Decode {
if (s.pos >= s.maxBackwardDistance) {
s.maxDistance = s.maxBackwardDistance;
}
- s.pos &= ringBufferMask;
+ // Wrap the ringBuffer.
+ if (s.pos >= s.ringBufferSize) {
+ if (s.pos > s.ringBufferSize) {
+ Utils.copyBytesWithin(ringBuffer, 0, s.ringBufferSize, s.pos);
+ }
+ s.pos &= ringBufferMask;
+ s.ringBufferBytesWritten = 0;
+ }
s.runningState = s.nextRunningState;
continue;
diff --git a/java/org/brotli/dec/DictionaryData.java b/java/org/brotli/dec/DictionaryData.java
index 9ac6e55..2355b28 100644
--- a/java/org/brotli/dec/DictionaryData.java
+++ b/java/org/brotli/dec/DictionaryData.java
@@ -6,6 +6,7 @@
package org.brotli.dec;
+import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
/**
@@ -20,31 +21,33 @@ final class DictionaryData {
private static void unpackDictionaryData(
ByteBuffer dictionary, String data0, String data1, String skipFlip) {
- int n0 = data0.length();
- int n1 = data1.length();
- if (n0 + n1 != dictionary.capacity()) {
+ // Initialize lower 7 bits of every byte in the dictionary.
+ byte[] dict;
+ try {
+ // NB: String#getBytes(String) is present in JDK 1.1, while other variants require JDK 1.6 and
+ // above.
+ dict = (data0 + data1).getBytes("US-ASCII");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e); // cannot happen
+ }
+ if (dict.length != dictionary.capacity()) {
throw new RuntimeException("Corrupted brotli dictionary");
}
+
+ // Toggle high bit using run-length delta encoded "skipFlip".
int offset = 0;
- for (int i = 0; i < n0; ++i) {
- dictionary.put(offset, (byte) data0.charAt(i));
- offset++;
- }
- for (int i = 0; i < n1; ++i) {
- dictionary.put(offset, (byte) data1.charAt(i));
- offset++;
- }
- offset = 0;
int n = skipFlip.length();
for (int i = 0; i < n; i += 2) {
int skip = skipFlip.charAt(i) - 36;
int flip = skipFlip.charAt(i + 1) - 36;
offset += skip;
for (int j = 0; j < flip; ++j) {
- dictionary.put(offset, (byte) (dictionary.get(offset) | 0x80));
+ dict[offset] |= 0x80;
offset++;
}
}
+
+ dictionary.put(dict);
}
static {
diff --git a/java/org/brotli/dec/EagerStreamTest.java b/java/org/brotli/dec/EagerStreamTest.java
new file mode 100755
index 0000000..069ae34
--- /dev/null
+++ b/java/org/brotli/dec/EagerStreamTest.java
@@ -0,0 +1,386 @@
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+package org.brotli.dec;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link Decode}.
+ */
+@RunWith(JUnit4.class)
+public class EagerStreamTest {
+
+ private static final byte[] DATA = {
+ 31, 118, -122, 17, -43, -92, 84, 0, -76, 42, -80, -101, 95, -74, -104, -120, -89, -127, 30, 58,
+ -4, 11, 91, -104, -99, -81, 44, 86, 61, 108, -74, -97, 68, 32, -120, -78, 97, -107, 88, -52,
+ -22, -55, -8, -56, -106, -117, 49, 113, -106, -82, -43, -12, -11, -91, -66, 55, 68, 118, -127,
+ -77, -104, -12, 103, -14, -94, -30, -112, 100, 79, -72, -42, 121, 62, -99, 76, -39, -89, 42, 58,
+ -110, 91, 65, 32, -102, -113, 49, 4, 73, 60, 122, -106, 107, 16, -123, 30, -97, 90, -102, -83,
+ -65, -90, 34, 26, -26, 52, 75, -118, 43, -47, -47, 52, 84, -10, -121, -68, -2, 20, 80, 101, 53,
+ 101, -119, -17, -111, -75, -21, -66, -96, -80, -114, 4, -65, 124, -89, -3, -25, -25, -21, -35,
+ -15, -114, 55, 14, -76, -68, 71, 9, 123, 46, 78, 67, -18, -127, 70, -93, -128, -44, -87, 3, 36,
+ -107, -3, 62, 83, 75, -123, -125, -11, 50, 46, -68, 80, 54, 9, -116, -29, 82, -14, -87, -94, 92,
+ -88, -86, -18, -1, 22, 3, 46, -98, -128, -27, 121, -56, 88, -37, 85, -43, 61, -60, 12, -122,
+ 107, -64, -27, -45, -110, 123, 60, 99, 108, 46, -29, -77, 76, 65, 100, 92, -104, 40, 63, 19, 36,
+ -89, 80, -39, 37, -95, -74, 97, 90, -109, 54, 105, -10, -38, 100, 95, 27, 36, 33, -60, 39, 100,
+ 32, -18, 93, -46, -99, 103, 127, -91, -62, 82, 76, 56, -66, -110, -16, 83, -116, -76, -9, -47,
+ -5, -32, -65, 111, 0, 55, 47, -60, -95, -56, -100, 65, 125, 38, 77, 38, -32, -62, 55, 119, 10,
+ 120, -69, 33, -111, -62, -87, 17, 102, -95, -106, 26, -50, -16, -109, 94, 83, -79, 90, 2, 42,
+ -47, 37, -124, 114, -68, 6, 104, -65, 38, -108, -114, -110, 73, -95, -83, -90, -86, -36, -48,
+ -63, -97, -120, -25, -53, 93, -77, -50, 59, -74, -9, 36, 85, 11, 76, 95, 74, -61, -9, 116, -14,
+ -38, 73, 78, 44, -92, 58, -27, -54, 38, 81, 50, -36, -46, -117, 126, 89, 53, -37, -58, -12, 61,
+ 77, -56, -85, -21, -128, 43, -111, 14, 54, 57, 116, 52, -85, 70, 88, -72, -26, 54, 109, -70,
+ -84, -13, -1, -54, 91, 81, 101, -65, 49, -48, -16, 26, -115, -39, 100, -21, 105, -121, 38, 72,
+ -115, 104, -100, 36, 120, 15, -109, 115, 64, 118, -68, -14, -26, -57, -71, 9, -118, -113, 15,
+ 94, 108, 114, 109, -14, -80, -31, -57, -6, 57, 111, -36, -92, -25, -23, -71, -61, 120, 93, -65,
+ 104, -123, -53, 35, -77, -8, -23, -31, 99, -3, 73, 75, 98, -2, -94, 73, 91, -109, -38, -78,
+ -106, -121, -17, -21, 55, 45, -26, -7, -93, 38, 59, -90, -116, 3, -68, -2, -110, 19, -96, 28,
+ -23, -39, 102, 99, 8, -82, -41, 63, 88, -70, 115, -123, -11, 111, 92, 47, -12, -16, -70, -2,
+ -29, 101, 61, -45, -57, 54, 24, -125, 20, -37, -75, 89, -56, 52, 125, 22, -68, -63, 105, -91,
+ -20, 91, 56, -99, -56, 35, -77, -78, -24, -79, 57, 5, -55, 101, -127, 75, -35, -113, -51, -103,
+ 79, 102, 16, -124, -79, -128, -45, -65, -84, -97, -91, -90, -105, 76, 90, -93, 90, -49, -41,
+ 104, 44, 81, -37, -84, 103, -120, -51, 79, -43, -114, -101, 38, -78, -94, -1, 15, 109, -62, 34,
+ -65, -127, 98, 32, 46, -72, 70, 58, -61, -55, 90, 30, -103, 5, 109, -105, -119, 81, 92, -40,
+ -75, -23, -77, 36, 18, 62, -33, -51, -38, -19, -12, 89, -101, 117, 94, 71, 127, -43, 54, 115,
+ -67, 34, -83, -115, 127, 127, 42, 126, -121, -121, -40, 56, -113, 60, -27, 30, 44, -21, 98,
+ -123, -14, 91, -69, 15, -81, 119, -101, 25, -73, 40, 105, 26, -86, -31, 86, -75, 74, 94, -74,
+ 19, -4, -20, 69, 24, 43, -5, -91, 6, -89, 52, 77, -65, -71, 82, -81, -52, 22, -61, -15, 51, 22,
+ 1, 70, -43, -3, -39, -27, 123, -13, -127, -86, 65, 51, 45, 127, -101, -27, -3, -44, -34, 75, 69,
+ 77, 71, -34, 7, -51, 93, -83, -84, -57, -38, -100, 59, -105, -1, 44, -47, 63, 96, -127, 32, -63,
+ 16, 80, -64, -127, 6, 54, 12, 44, 28, 48, -128, 4, 10, -104, 64, 3, 11, -47, 59, -79, -125, 52,
+ -16, -78, -66, 19, -6, -33, -107, -10, -4, -42, 102, -31, -32, 99, 115, -22, -96, -45, -112, 28,
+ 126, -44, -4, -47, -99, 127, -84, 37, -112, -34, 36, 1, -68, -14, -16, 55, 83, -99, 120, -69,
+ -30, 89, 48, 126, -80, -43, 15, 13, -18, 14, -4, -126, -120, -118, -11, 100, 16, 76, 17, 54,
+ -75, 114, 101, 37, 121, -23, -65, 39, 94, -48, -78, 67, -61, 75, 48, 23, -127, 83, -124, 95, -5,
+ 67, 13, 87, 18, -2, 117, -36, -121, 115, -112, -107, -54, -36, 14, -4, -68, 35, 32, 79, -118,
+ 81, 94, -56, -110, 37, -84, -121, 72, -7, -52, -40, -44, -1, 73, 123, 12, 42, -67, -87, 63, -2,
+ -100, 29, -41, 112, 98, -125, 88, 97, -56, 90, 7, -40, -111, -126, 74, 121, -95, -45, -69, 48,
+ -98, 18, -20, -124, 3, 46, -5, 26, 24, -79, 109, 4, 43, 60, 97, 96, -76, -21, 95, -52, -40, -45,
+ 2, -103, -107, -9, 79, -79, -82, -73, -51, -74, -10, 81, -77, 111, -96, -41, -120, -38, 24, -87,
+ 93, -41, 64, 72, 57, -81, -32, 60, -79, 36, -84, -89, -7, -25, 81, -98, 36, -22, -69, 86, 123,
+ 120, -16, -113, -70, 47, -125, 2, 97, 78, -91, 102, 120, -91, 5, -71, 39, 116, -12, -79, -29,
+ -9, 87, -5, -37, 87, -73, 116, -15, -10, -106, -49, -3, -21, 5, 120, 47, 72, -40, 79, -3, 85,
+ -84, -87, 57, -83, -67, -64, 122, -39, 36, 70, -27, 71, -73, 42, -100, -99, 124, -90, 90, -29,
+ -54, -115, 7, 89, -51, 9, -43, 32, 79, -104, 127, -38, 7, 93, -80, -124, 27, 96, 54, -51, -7,
+ 57, 57, 63, 21, 110, 70, 122, 76, 51, 124, 78, -5, 126, -100, -98, 116, 59, 125, -106, -113,
+ -111, -128, 92, 43, -19, -2, 105, -90, 96, -116, -116, -30, 115, -20, -106, 64, -108, -111, 94,
+ -9, -123, 52, -71, -88, -84, 87, -25, -54, -117, -2, -29, 29, -85, -22, -20, -94, -25, 98, 101,
+ 114, 80, -55, -51, 97, 99, 117, -86, 2, 79, 48, 110, 44, -94, -127, -85, 61, -95, 30, -91, -125,
+ 83, 113, -93, -4, -126, -98, -93, -68, -99, -70, -37, -73, -90, 4, 53, -2, 78, -35, 101, 42, -6,
+ -3, 106, -117, -127, 48, 31, 88, 117, 116, 106, -98, -23, -117, -7, -57, -128, -118, -117, -118,
+ 115, 30, -61, 6, -38, -114, -103, 37, 53, -4, -100, -121, 98, -110, -113, 2, -20, 26, -88, -118,
+ 19, -71, 39, -54, -11, -28, 47, 28, 89, 35, -13, -20, -48, 14, -6, -91, -85, -119, -7, 116, 112,
+ 114, 41, 44, -1, -39, 60, -85, -54, 101, -119, 95, -77, -64, -121, 47, 75, -78, -30, -66, -38,
+ -15, 98, 14, 82, -60, 85, -90, -78, 112, -7, 64, 5, 28, 64, 41, -64, 57, 85, 21, 122, -52, 90,
+ 70, -73, 17, 47, -125, 40, -45, -7, -91, 100, -21, -120, -51, 21, 65, 31, 110, -105, -79, -80,
+ 105, -43, 73, -61, 45, -30, -4, 83, 95, 3, 109, 55, -92, 120, 74, -36, -111, 54, -26, 76, -69,
+ 7, -20, 55, 4, 70, -124, -31, -32, 127, -63, -58, 73, 106, 109, -41, -45, 96, 30, 63, 14, 8, 16,
+ -88, 69, -115, -17, -14, -116, 115, -88, 119, -65, 16, -64, -112, -73, -10, -46, -7, 113, 5, 54,
+ -38, -47, -18, 106, 23, 12, -117, 120, -107, 121, -62, -35, -6, -56, 112, 81, 3, 5, 31, -11,
+ -92, -85, -29, 102, 43, 108, 88, -69, 55, -74, 110, 97, -128, 29, -63, -114, -19, 77, 123, 23,
+ 76, 81, 57, 51, 117, -74, -1, 74, 84, 70, 86, -109, -127, 122, 10, 9, 23, 71, 110, -116, -30,
+ -85, -104, 2, 40, -62, 20, 46, 8, 95, 46, 13, 113, -83, 124, 33, 38, 105, -99, 72, -62, -80,
+ -16, -118, 92, -66, -14, -124, 112, 79, 103, 53, -127, 61, -31, -92, 92, -42, -37, -37, -24,
+ -116, 2, -81, 40, 46, -44, 23, -68, -113, 88, 92, 95, 11, 118, 98, 19, -80, -102, 96, 73, -20,
+ 47, -105, -120, -74, -83, -77, -87, -59, -97, 112, 99, -52, 80, 116, -119, -44, 18, 62, 108, 73,
+ -34, 70, 28, 73, 81, -26, 87, -125, -55, 64, -53, -73, 114, -3, -45, -109, 19, -2, 68, 119, 14,
+ 26, 72, 19, 13, -121, -98, 26, -52, 85, 34, 17, -95, -7, 20, -12, 106, -11, 104, 20, -106, -42,
+ -26, 107, -106, 112, 103, -53, -62, 13, -58, -23, 23, 65, -104, -55, -90, 107, 55, -77, -25,
+ -125, 63, -61, -21, 117, -102, -70, -93, -67, -45, -61, 18, -63, 7, -127, 90, 16, -25, 116, 80,
+ 35, 105, 80, -93, 105, -44, 114, -126, -103, 88, -102, -76, -94, -66, 69, -35, 22, 36, 95, -55,
+ 22, 43, 78, -111, 109, 72, 104, -49, -9, -48, 59, 102, -54, -43, -128, 111, 127, -9, 35, 23,
+ -79, 40, -122, -52, 36, -81, -4, -102, -2, -62, 53, -111, -117, 40, 122, -95, 55, 32, -127, -9,
+ -91, 79, -109, -81, -3, 98, -78, 56, -119, 69, -41, 76, -102, 18, 90, -15, 12, -60, 86, -106,
+ 34, -118, -43, -13, 61, -106, -56, 48, 27, -15, -70, -41, 127, 61, -2, -80, -13, 86, 28, 91,
+ -10, -8, 98, -20, 54, 122, -116, -55, -70, -94, 54, -64, 71, 102, -106, -1, 99, -73, -71, -18,
+ -11, 56, 11, -27, -5, 11, -86, 126, 8, 46, -21, 63, -66, -43, 88, 46, -113, -5, 113, 26, -9,
+ -32, 18, -3, -6, -38, 81, 38, -110, 111, 97, 34, 65, 114, -71, -118, 9, -110, -109, -61, -113,
+ 31, -82, -102, -127, 16, -7, -16, -11, -87, -76, -41, -52, 58, -116, 100, 102, -127, 6, 127, 64,
+ 14, 110, 112, 43, 44, 87, 42, -118, -119, -39, 64, -7, 57, 16, 2, -69, -12, -54, -94, 36, -48,
+ 123, -119, 82, 46, 26, -62, 30, 97, -17, 34, 80, 32, -15, 116, -96, -3, 33, 34, 51, 59, -63,
+ -100, -7, -79, -126, -21, -15, -18, -113, 30, -25, 107, -25, -125, 53, 82, -15, -80, 96, -24,
+ -47, 94, -25, -109, -94, 114, -62, 112, -104, 26, -107, -68, -14, -36, -9, -89, 27, -75, 62, 62,
+ -20, -125, -77, -57, -127, 80, 58, -118, 63, -27, -82, -126, 74, -23, -91, -28, -95, 8, -122,
+ -73, 28, -87, -74, 80, -15, -119, 14, 32, 124, 73, 15, 61, -32, -68, 81, 56, -119, 66, 105, 3,
+ -15, 20, -86, 124, -70, -113, 100, -72, -117, -97, 127, 103, 16, 105, 8, 39, -128, -64, -47, 66,
+ 123, -110, 13, 123, -124, -24, 42, 102, -4, 47, 107, 125, 63, -52, -35, 113, -74, 13, 8, 17, 16,
+ -106, -21, -69, 47, -3, 103, -2, 19, -100, 111, -11, 1, 112, 90, -38, -31, -45, -55, 25, 92,
+ -122, 66, -18, -98, -82, -49, 119, -35, -128, 26, 60, -79, -23, 127, 82, -52, 115, 77, -109,
+ -111, 17, -99, 31, 33, 41, 35, 87, -47, -126, -18, -25, 81, -71, 9, -72, -92, 64, -92, 23, 116,
+ 96, 40, 55, -87, 119, -105, 66, 49, 46, -10, 26, -25, -105, 127, -124, 86, -2, 39, 116, -108, 6,
+ 21, 15, 1, 75, -5, 101, 13, 57, 70, 126, -50, -97, 123, -73, -77, 53, -11, -73, 44, -99, 91, 85,
+ 21, -59, -1, 117, 64, -100, 47, 75, 93, 9, -4, 83, -55, 15, 99, 31, 43, -49, 15, -89, -115,
+ -114, -50, -35, -19, -65, 122, -39, 92, -21, -3, -66, 8, -70, 107, -55, -86, -36, -23, -21, 80,
+ -79, 48, 116, 57, -71, 33, -111, -68, -75, 37, 55, 39, 124, 96, -66, 10, 14, 118, 50, 85, -33,
+ 54, -101, -7, 21, 88, -122, 50, -92, 123, 37, 109, -60, -127, 26, 110, -20, -31, -66, -56, -24,
+ 47, -14, -60, -101, 69, -38, 78, 0, 44, -71, 108, 4, 25, -68, -106, 20, -40, -103, 108, -70,
+ -56, 78, 12, 82, 81, 46, -105, -123, -46, -20, -127, -67, -77, 76, -74, 40, 105, 2, 27, 112,
+ -107, -121, -53, 6, -88, -11, 26, 41, 64, -69, -44, 27, 47, 24, -31, -86, -4, 4, -46, 42, 50,
+ -55, 37, -11, -95, 108, 54, 37, 67, 37, -14, -40, 41, 124, 22, 108, 99, 16, 55, 88, 19, 49, -87,
+ 27, 17, -68, -107, 15, -62, 84, 109, 72, -26, 71, 63, -17, -72, -63, -101, -8, 62, 24, -112,
+ 126, -102, -64, 29, -19, -75, 74, 29, 90, -90, 83, -22, 106, -27, -114, 56, -111, -33, 11, 3,
+ -16, 94, 115, -97, 67, -78, 62, -93, -36, 60, -65, -54, 72, 70, 44, -77, 73, 29, -106, 38, 72,
+ -37, -110, 79, -98, -15, -58, 96, -85, -68, -15, 73, 57, -127, 14, -123, -40, 70, 63, -64, 115,
+ -63, 127, 94, 85, 52, 30, -62, 83, -30, -97, 82, 39, 2, 36, -50, 106, 116, 66, 104, -14, 73, 14,
+ -106, -127, 11, 41, -27, 56, -99, -74, 55, 123, 124, 9, 46, 12, -97, -37, -10, 122, 124, -27,
+ -64, 93, -70, 9, 119, 13, -9, -71, -118, 19, 50, -36, 114, 120, -24, -62, 40, 127, 9, -62, 84,
+ 57, 66, 91, -114, 120, -49, 63, 99, -73, -66, -64, 84, -31, 67, -52, 12, 38, -62, 37, -122, -50,
+ -95, 24, 19, 54, -80, 57, -118, -84, 124, 90, 53, 72, 29, -123, 67, -65, 99, -58, -28, 20, -110,
+ -103, 92, -91, -108, 23, -118, 44, 74, 76, -29, 94, -121, -37, -32, 107, -62, -67, -55, -45,
+ -50, -44, 25, -77, -102, 90, -128, -31, -5, -64, 110, 122, 88, -18, -53, -85, 122, -11, 100,
+ -106, 97, 59, -103, -110, 5, -16, 59, -126, -74, 9, -119, 115, 49, -73, -42, 32, 100, 59, -98,
+ 106, 55, -101, 87, 126, 59, -23, 106, -102, 100, -69, -46, 76, 53, -107, -119, -113, 104, 117,
+ -27, 75, -32, 8, -81, -10, 50, 108, -32, 51, -79, -53, -2, 66, -9, 113, 14, 99, -100, -34, -21,
+ 13, 2, 45, -33, 0, -16, -64, -126, 69, -25, -34, 28, 105, -48, -38, 82, 12, 27, -71, 35, 13, 11,
+ 21, 26, -19, -4, 44, -52, -126, -63, -32, -84, -22, -63, -29, 96, -97, -82, -12, -53, 98, 41,
+ -69, -38, 101, -31, 47, -9, 16, -10, 9, -36, -103, -91, -65, -36, -93, -45, 94, 110, 54, -94,
+ 68, -39, -116, -40, 61, -112, -91, -79, 98, -36, 87, 35, 88, -61, 125, 112, -84, 48, -38, 105,
+ -92, 69, -68, 92, 0, 27, -72, -65, 97, 98, 66, 97, -74, 29, 46, -21, 102, 61, 120, -62, 38,
+ -125, -60, -43, 4, 5, -27, 113, -43, 105, -22, -110, 68, 13, -14, -23, 18, 95, -79, -108, 87,
+ 19, -80, 16, 54, -121, 88, -64, -113, 73, 3, -20, 17, 0, 26, -88, -49, -2, 21, 120, -105, -85,
+ -113, 76, 106, 37, -13, -75, 29, -127, 10, -17, -53, -124, 24, 37, -31, 26, -1, 109, 88, -88,
+ -37, -51, -32, -125, 48, -40, 123, -108, 55, -120, -62, -91, 47, 62, -127, -25, 99, 68, 22, -40,
+ 58, 119, -31, -93, -122, 39, -92, 25, -127, -42, 97, 69, -6, 110, -61, -21, -94, 82, 123, -93,
+ -51, -90, 50, -96, 127, -32, 125, -76, 117, 75, -52, 79, 110, -51, -15, -81, 49, 62, 118, 120,
+ -27, 22, 84, -22, 77, -105, 87, -7, -23, 47, -8, 108, 82, -12, 84, -52, -85, 68, -89, -24, -32,
+ 6, -34, -83, 80, 44, 12, -51, 50, 74, -121, -106, 6, 85, 32, 42, 76, -59, -52, -99, 102, 108,
+ -127, -49, 0, 60, 62, 2, 13, -19, -92, -41, -69, 55, -70, 94, 23, 36, 89, 70, -115, -51, 26,
+ -95, 13, -69, 42, 62, 59, -24, -63, -50, -6, -86, -97, -115, -58, -107, 69, -12, -109, 73, 4,
+ 63, 12, 32, 13, -123, -72, -41, -7, -81, 37, -91, -128, 109, -79, -80, 88, -22, 108, 126, 103,
+ 27, -29, -81, 52, 55, -91, -13, -43, -75, -59, 80, -6, 6, 83, -103, -64, 8, 63, -34, -59, 21,
+ 55, -115, 62, 77, 30, -50, -71, -66, 87, 99, -47, 0, 124, 76, 120, 79, -12, 54, -16, -98, -72,
+ -41, -66, -14, 114, -27, 108, 57, -49, 107, -73, 90, 107, -103, 25, -107, 112, -119, -54, 106,
+ -54, -8, -13, -81, -62, 92, -84, 113, 77, 74, -63, 104, 92, -94, -128, -43, -54, -71, 117, 27,
+ 14, 98, 52, 119, -93, -77, -80, -46, 88, 35, 123, 86, 87, 122, 62, 108, 19, 27, 111, 2, 62, -67,
+ 89, 14, -82, 41, 123, -117, 74, 109, -124, -115, 15, 123, -65, 42, -81, -105, 19, -30, 86, -72,
+ 84, 63, -109, 34, -65, -127, 6, -104, 77, 103, -111, 90, 16, 31, -74, -33, 122, 58, 52, 10, 2,
+ 65, 72, 68, 79, 52, 31, -19, 100, -86, 21, -49, 116, 101, 82, 111, -96, -76, 67, -40, -62, -15,
+ -79, 109, -58, 6, 11, -91, -29, 65, 21, 75, 74, -28, 21, 103, 46, 48, -42, 51, -110, 80, -95,
+ -102, -9, 8, -95, 102, 102, 16, 105, 103, 92, -106, -109, 77, 93, 32, -12, -25, 5, 17, -86, -34,
+ 58, -50, 55, 63, -8, -72, 3, 26, 91, 72, 71, -77, 94, 91, 39, 45, 7, 0, 30, -45, -100, 35, 43,
+ -41, -72, 16, -103, -115, -4, 51, 39, -23, -89, -84, 105, 94, -91, -88, 82, 123, -26, 51, -16,
+ 97, 47, -39, 35, 46, -89, 74, 7, -80, 116, -21, 82, -84, -13, -99, 31, -58, -93, 36, 99, 36, 44,
+ -65, 45, 94, -91, -41, 115, -10, 116, -67, 45, 19, -20, 113, -62, 111, 124, 108, 71, -121, -64,
+ 122, -121, -105, 114, 115, -126, -93, -108, -113, -1, -80, -86, 116, -111, -29, 53, -76, 87, 19,
+ 45, -30, 91, 91, -7, -49, 12, 112, -8, -26, 82, 58, -82, -76, 119, -50, 14, 85, 113, 20, 48,
+ -102, 37, 24, -120, -107, -52, 67, -44, -92, -79, -40, 28, 21, 55, 116, 88, 19, -49, -78, 86,
+ -89, 74, -4, 118, 75, 11, -103, -127, -47, -16, -77, -78, 8, 2, -88, 50, 23, -99, 102, -100,
+ -116, -99, -109, -112, -115, 78, 55, -39, -84, 100, -91, -101, 73, -9, 39, -23, 62, -125, -106,
+ -55, 119, -118, 114, -33, -99, 20, -53, 91, 115, 47, -93, 51, -99, -9, 92, -71, 120, 57, -44,
+ -87, -11, 108, 30, 43, -4, 118, 90, 126, -54, -99, -47, -2, -61, -3, -62, 45, 92, -70, -105, 30,
+ 98, 112, -94, 56, 35, -22, 32, -93, -6, -36, -5, -77, -78, 120, 45, 104, 69, -49, -30, 39, 75,
+ 38, -94, -12, 34, 34, -44, 48, -100, 74, 34, 69, 94, -12, 73, 27, -111, 90, 33, -38, 93, 40,
+ -16, 89, 26, -110, -116, -10, -65, 85, -57, 48, -86, 121, 118, -41, 63, 33, 109, -78, -26, 122,
+ 111, -115, -52, 95, 26, -70, -14, -86, -80, -27, -6, 12, -44, 123, 28, 93, -74, 14, -124, 87,
+ -28, -12, 111, -117, -83, 48, -41, -3, 60, -51, -91, 118, 54, 110, 18, -2, -120, -66, 46, -35,
+ -91, 106, 94, -91, -11, 41, -92, -22, 96, -113, -109, 105, 56, -80, 17, -118, 124, -16, 30, 30,
+ 117, 126, -99, -106, -69, -28, 85, 85, -41, 21, -95, -85, -112, -125, -45, 69, 10, -34, -120,
+ 33, -58, 120, 51, -22, -7, 31, -34, 4, 55, -102, -70, 118, -83, 49, 111, -45, -9, 69, -95, -66,
+ 116, -3, 104, -61, 17, 21, -20, 121, 117, 127, -70, 5, 89, -89, 51, 15, 64, 126, -73, 97, 90,
+ 119, -22, -37, -54, 52, -33, 26, -54, 75, 79, 73, 100, 44, 3, 53, -25, 49, -123, -101, -80, -54,
+ -81, -32, 88, 49, -14, -4, 18, 42, 52, -65, -33, 68, 83, -89, -11, 57, 102, 71, 122, 74, -92,
+ -44, -94, -108, 14, 104, -107, -124, -63, 8, 32, 85, -18, -16, -91, -63, -38, 27, -108, 24, 19,
+ -33, 53, 70, -32, 41, 38, -77, -30, 89, 28, -15, -89, -86, 32, 51, 28, 67, 124, -96, -103, -34,
+ -113, 22, 15, -8, 104, -38, -56, 65, -96, -111, 104, -9, -38, 107, 55, 112, 47, 99, 50, -18, 90,
+ -69, 116, 80, 95, 52, -27, -98, 6, 12, -11, 124, -120, -96, -91, 118, -51, -120, 90, -92, -104,
+ -83, -73, 84, 61, 78, -39, -99, 33, 58, -45, -14, 127, -20, -44, 125, 21, -26, -21, -36, 51, 73,
+ 71, 73, -17, 83, 11, 107, 91, 36, -65, -24, 56, 117, 114, -126, -34, 1, 120, 66, -50, 14, 91,
+ 97, -35, 75, 87, 123, -53, 63, -38, -74, -62, -117, -45, -40, 125, -5, 53, 50, 0, -110, 7, 7,
+ 45, 37, -71, -21, 70, -95, -60, 74, -55, -54, -96, 115, -62, -32, -3, -121, -18, 27, -107, 49,
+ -39, 58, -39, 91, 107, 65, -99, -64, -19, -10, -126, 38, -40, -112, 0, 16, 107, -59, 119, -70,
+ 79, 49, -18, -76, -22, -38, -98, 35, -99, 61, 67, -100, 29, -104, -17, 22, 108, 105, 88, -114,
+ -65, 84, 99, -69, -84, -87, -81, -28, 68, -66, 3, 69, -69, 83, 16, 61, -102, 50, 67, 46, -98,
+ -77, -40, -78, 48, 68, -85, 123, -92, 37, 14, 75, 13, -23, -110, 23, 26, 90, -81, -1, -109, 85,
+ 121, -68, -55, -7, 21, -81, -35, 41, 3, -72, -52, 36, 35, -83, -9, -81, -124, -104, 31, 54, 8,
+ -32, 80, 73, 89, -41, 116, 127, -110, 68, -82, 82, -79, 105, 113, -110, -70, 121, -24, -54, 37,
+ -12, -70, -77, 15, 14, 105, -19, 16, -6, 73, 102, 121, -116, -62, 54, 65, 119, 43, 60, -79, -66,
+ -17, 1, 97, -1, -11, -5, 104, 10, 59, -108, 21, -8, 64, -71, -86, 14, -98, -87, -49, 30, -45,
+ 109, 43, -67, 10, -122, 25, 98, -102, 127, -27, -52, -61, -66, -47, 114, -94, -126, 4, 0, -65,
+ -11, -51, -67, 84, -43, 44, 88, 53, -6, 124, 11, -123, 34, 12, 102, -13, -106, 47, 62, -71, 43,
+ -65, 28, 37, 32, 80, 23, 6, 75, -103, 73, 112, 33, 84, -89, 12, -81, 42, 65, 58, 14, -102, 90,
+ 29, -116, 104, 107, -99, -1, -43, 122, 118, 88, -2, 117, 84, 1, -123, -2, 2, -32, -18, -122,
+ -36, -58, 16, 76, 115, 27, -121, -2, -79, -44, -39, 33, -29, 33, -34, 55, 71, 61, 117, -22,
+ -126, 51, 29, 55, -34, -48, 17, -57, 74, 71, -33, -50, 60, 41, -119, -93, -45, -127, -30, 104,
+ 35, 60, -117, -113, 81, -59, -39, -84, -39, -46, -106, 57, 77, 62, -11, -44, -87, 71, 35, -117,
+ -87, -77, -98, 68, -29, -121, -16, -16, 39, 48, -74, 23, 82, -62, 32, 62, 27, 125, 84, 39, -91,
+ -91, -93, 76, -24, 98, 123, -58, -114, 17, 28, 93, -17, 74, 92, -17, 9, -86, -116, -72, 54, -74,
+ 71, 9, -97, -33, -20, -126, -50, 117, 102, 54, 123, 124, -70, 30, -102, 27, 23, 105, -40, -35,
+ -89, -33, 89, 3, 44, 18, -15, 10, 116, -111, 1, -81, -31, -125, -102, 103, -93, -15, 72, 84, 19,
+ -30, -17, -115, 99, 43, 5, -92, 52, 59, -55, -105, -128, 19, 8, -78, 43, 7, -55, -126, -106, 11,
+ 69, 118, 24, -128, -54, -86, 22, -121, -43, 69, -15, 96, 52, 52, 90, -118, -10, -58, 121, 63,
+ -48, -13, 22, -101, 17, 42, -28, -54, -63, 121, -96, 111, 113, 103, 126, 37, -52, -40, -106,
+ -104, 123, -48, -92, 83, 100, -70, -52, -59, -93, -116, -90, -93, 82, -117, 103, 52, -71, -42,
+ 57, 25, 57, -74, 71, 7, 32, 96, -60, 11, 121, 58, 71, 40, -92, 35, 88, -12, -109, -56, -122,
+ -30, -118, 103, 65, -5, -90, -97, 103, -117, 66, -20, -42, -46, 67, -29, -23, 72, -97, 26, -54,
+ -103, -76, -47, -71, 23, -83, -20, 95, 111, 101, -83, 106, -71, -70, -63, 55, -85, -41, 117, -9,
+ 37, 96, -71, -118, -44, -43, 2, 107, 113, -39, -107, 41, -13, 0, -87, 77, 83, 99, 68, -84, -6,
+ -1, 67, 124, -57, 115, 29, 24, 26, -42, 104, 58, -87, -38, 12, -98, 11, 109, 62, 59, -66, -48,
+ -20, 70, -111, 11, 120, 21, -58, -29, -76, 44, -7, 26, -119, -59, -87, 44, 122, 8, 114, -58,
+ -109, -119, -63, -58, -51, 33, 35, -109, 81, 110, -90, 121, -21, 64, -60, 68, 18, 75, -82, -81,
+ -103, -76, -116, 23, 53, 58, -41, -23, 49, -102, 81, 101, 39, -59, -91, -98, 111, 2, 65, 110,
+ 121, 5, 13, 97, -119, 109, 40, 82, 47, -51, 47, -57, 35, -109, 53, -42, 10, 3, -15, 122, -25,
+ -67, -62, -121, -120, -31, 18, -20, 87, -88, 75, 95, -121, -93, 33, 61, -88, -96, 88, -69, -54,
+ -121, -99, 49, 122, -53, -49, -125, 53, -79, -46, -128, 109, 125, -93, -83, 44, -101, 69, 68,
+ -91, -17, 55, -13, -75, -80, 21, 32, -13, 40, 86, -65, 85, 80, -82, -38, -52, 110, -119, 100, 8,
+ 77, -23, 67, -41, 73, 27, 38, 9, -11, -32, -30, 75, -15, 67, -41, 46, 27, -89, 9, 117, -38, -14,
+ -81, -4, 71, 113, -79, 81, -36, 63, 15, -70, 104, 34, -56, -39, 93, -34, -127, 90, -36, 73, 47,
+ -76, 113, 55, 123, -92, 48, 116, 108, -123, 31, -67, -39, 3, -9, 6, 13, -17, -50, -125, 1, 105,
+ 121, 100, 79, 82, -85, 123, -33, -73, 54, -61, -113, 121, -110, 69, 119, 94, -112, -120, -34,
+ -35, -104, -116, 44, 85, 109, -104, 127, 120, 87, 75, -48, -115, 74, 85, -47, -53, 16, -5, 92,
+ 67, -32, 12, 79, 109, 105, 5, -92, 51, 46, 96, -96, 63, 106, 82, -54, -95, 20, -60, -23, 48, -5,
+ -128, 22, 23, -93, 93, -64, 35, 21, -121, -79, 59, -1, -50, 55, -7, -10, -85, 3, -7, 121, 98, 5,
+ -19, 76, -78, -128, -47, -42, 61, -59, -46, -24, -16, -51, -48, 122, -26, 74, -91, 54, 53, 46,
+ 74, 25, -30, -74, 52, -22, 118, -103, -53, -113, 44, -19, 70, -86, 106, 72, -68, -86, 110, 34,
+ -35, 57, -43, 32, -4, 14, 102, 25, -76, -84, -86, -83, -2, -107, -4, 49, -97, -83, -95, 6, 100,
+ -73, 6, 34, 49, 59, 50, 30, -8, 6, -55, 24, -6, 67, -121, 115, 40, -50, -75, -46, -26, 111, -20,
+ -75, -83, -16, -48, 65, -64, 119, 62, -59, 3, -12, 109, 0, -118, -94, 17, -51, 124, 63, 42, -3,
+ 44, 53, -81, -35, -33, -83, 115, -114, -4, -104, 44, 7, -81, -97, -102, 104, 29, -97, 70, 91, 3,
+ 88, 67, -127, 78, -92, -16, -34, -18, -81, -125, -38, 117, -78, -36, 9, 76, -85, 121, 2, 10,
+ 114, 65, -5, -29, -34, 101, 20, -108, 46, -90, -98, 85, -62, -51, 108, -72, -51, 44, 22, 112,
+ 121, 58, -58, 109, -96, 58, 103, 27, -88, -81, 99, -7, -33, -113, 64, -122, 115, 19, -93, 37,
+ -19, 93, -98, 78, 115, 91, -88, -82, -36, 61, 90, 77, 27, 26, -116, 80, 90, 85, 6, -87, 59, 110,
+ 63, 20, -81, -127, -53, 18, -73, 39, 75, 79, -106, 29, -50, -13, 43, -99, -92, 109, 80, -83, 69,
+ -102, 38, 90, -41, 48, -47, -93, 18, 116, 32, 90, -73, -96, 90, 49, 19, 73, -35, 60, 53, -72,
+ -52, 84, 52, 27, -67, -114, 82, 79, -89, -80, -111, 124, -51, 80, 110, -76, 125, 18, -73, 44,
+ -100, 118, -16, -64, -35, 22, -86, -116, -19, -101, -35, 42, 85, -83, 69, -65, 37, -104, -88,
+ -108, -25, -9, 15, 91, -100, -86, 8, -75, -37, 103, 3, -69, -9, 114, -25, 25, -87, 118, -75,
+ -115, -8, 74, 53, 73, 46, -22, -108, 30, 71, -96, 40, -76, 121, 71, -63, 95, 96, 113, -54, 87,
+ 1, -79, 2, -40, 11, 22, -118, -117, 94, -44, -112, -27, -86, 96, -4, -58, 121, -71, 54, -58,
+ -71, -125, -65, 126, -116, -107, 125, -28, -74, 97, 15, -76, 59, -26, 58, -38, -39, 122, 55, 85,
+ -109, -114, 75, 25, -74, 57, -78, -10, -76, -115, -12, 29, 84, 86, 97, 5, 116, -114, 62, -98,
+ -36, 105, -119, -19, 12, 11, 49, 76, 21, 56, 1, 115, 115, 42, -67, 60, -40, 19, 38, 50, 33, 112,
+ 98, 123, -76, -74, 50, 66, 18, -61, -114, 36, -95, 92, 124, 20, -56, 29, -41, 28, -4, -106, 115,
+ -83, 98, -47, 96, 87, -72, 96, -83, -93, 1, 112, -43, 59, -80, -24, 46, -45, 87, 92, -108, -78,
+ 101, -112, 111, -119, -67, 26, 97, 1, 36, -128, 120, 8, -20, 84, 107, -9, -104, 25, 0, -36, 58,
+ 111, 81, -83, 65, 42, 51, 61, -71, 118, 111, 29, -93, 39, -56, -72, -18, -53, 0, 34, -77, -59,
+ 112, -79, 51, 86, 82, -24, 64, -120, -1, -102, -3, 42, -93, 16, 38, 100, 39, -124, 92, -89, 31,
+ 94, -32, 40, 19, -8, 48, -83, -66, -68, 110, -72, 36, -38, -91, -63, 33, 35, -96, -121, -119,
+ -59, 56, 89, -117, -123, -79, -68, 42, -4, -116, -108, -104, -84, -111, -26, 94, -38, 61, 94,
+ -72, -85, -18, -30, 118, -14, -94, -74, -24, -21, -90, -83, -116, -38, -8, 9, -17, 72, -62, -78,
+ -75, 47, -117, 109, 127, -87, -36, 53, 90, 16, -72, -50, 40, 87, 97, -51, -96, -55, -120, -32,
+ -58, -21, 102, 117, -121, -98, 74, -67, 104, -122, 108, -3, -96, 64, -114, -3, 30, 48, -14, 44,
+ -41, 91, 54, 58, 80, -13, -88, 121, 32, 122, 25, 24, 9, 72, 17, -1, -93, -66, 96, -84, 4, 37,
+ 69, 91, 64, 32, 46, 89, 7, -32, -120, 10, -38, -3, -59, -75, 14, 116, 115, 121, 99, 122, -95,
+ 107, 1, 65, 70, -45, 35, -52, -87, -56, 43, 121, 12, -93, -8, 83, -118, 15, -33, -67, 45, 74,
+ -66, -31, -28, 5, 104, -13, 113, 19, -89, 105, 66, -82, 74, 54, -104, 69, 103, 86, 118, -44,
+ -75, -47, 81, -75, 8, -32, -95, 121, 48, -121, -106, -88, -15, -52, -99, -78, 58, 113, 16, 71,
+ -48, 76, 80, 81, 59, 43, -106, 27, -49, 2, -11, -71, -30, -80, -44, 62, -113, -20, 12, -60, -87,
+ 22, -30, 64, -120, 127, 121, 47, 127, 58, -98, -4, 79, -72, -117, 115, 52, 95, 40, -59, -125,
+ -33, 125, -96, -93, -92, 17, -99, -85, 10, -119, 91, -115, -63, -32, -11, -102, -105, -93, 90,
+ 37, 94, -104, -47, -63, -94, 15, -34, 20, 73, -59, 85, -31, 6, 106, -67, 14, -125, 28, -63, 40,
+ 86, -68, 104, -22, 124, -27, -84, -13, 43, -45, -30, -95, 95, 16, 79, 23, -66, -78, -74, 43, 86,
+ 70, -95, 90, -65, -1, -58, 54, 12, 47, -47, 28, 91, -54, -19, -75, -43, 12, -108, 12, 71, 38,
+ 118, -8, 1, 42, -113, -6, 1, -93, 118, 67, -79, 25, -80, 118, 34, -29, 0, -23, 86, 53, -118, 89,
+ 112, 0, -61, -88, 76, -24, 59, -75, 23, -1, 64, -80, -52, -40, 34, -50, -19, -127, 57, 79, 43,
+ 92, -113, -96, 73, 0, 33, 122, 42, 104, -62, -66, -108, -104, 45, -120, 69, -3, -20, -113, -40,
+ -70, -96, 72, -21, -95, 1, -16, -124, -87, 125, 56, -108, 7, -112, -104, 105, 80, -34, -93, 24,
+ -6, 35, -38, 42, -4, 23, -112, 40, 45, 106, -72, 29, 44, -36, -61, -8, -93, -34, 3, -41, -26,
+ 121, 6, 100, -14, -112, -117, -15, -120, -92, 44, -43, 94, -13, 121, -59, -82, -68, 7, -19,
+ -110, -121, -58, -118, -121, 92, -8, 33, -120, -28, -95, -31, -120, -62, 49, 51, 3, 68, 4, -56,
+ 51, -13, -90, 47, -16, -24, 63, 125, -11, -94, 99, 69, -84, -54, 127, 81, -120, 42, -47, -128,
+ -13, 38, 115, 59, -112, -30, -9, -116, 121, 63, 111, 32, -116, -2, 0, -33, 79, -67, 90, -65,
+ -108, -107, -5, -107, 11, -102, 91, 106, -42, 74, 45, -80, -65, 54, 36, 121, -125, -118, -34,
+ -51, 36, -85, -78, 86, 121, -103, -39, 35, -76, 17, 59, 68, -40, -43, -27, 63, -76, 126, -94,
+ 18, 87, 20, 92, 38, -6, -54, 9, 45, 93, -57, 53, -11, -44, -38, -24, -126, -40, -24, -35, -121,
+ -55, -87, -63, 70, -88, 13, -78, -89, 2, 50, 59, 4, -14, 81, 25, 34, -20, 87, 116, -76, -31,
+ -93, 15, 112, 61, -43, -11, -86, -25, 10, 41, 1, 60, 105, -42, -90, -44, 38, 98, 126, -128, 28,
+ 99, 20, -97, 105, -101, 27, -106, 13, -108, -18, 23, -79, 121, 57, 93, -16, -37, -82, -1, -128,
+ -67, 99, 117, 79, 85, 83, 12, 53, -101, -52, -75, 72, -128, -62, 45, -54, 11, 0, -58, -88, 11,
+ 121, 33, 86, -87, 31, -54, 109, -37, 10, 119, -9, 55, -7, 77, -52, 93, 64, -62, 115, -88, -4,
+ 67, -1, -37, 31, 107, 90, -109, -121, 71, 105, -123, 61, 75, 89, 108, -91, -6, 115, 45, 109, 10,
+ -35, -84, 41, 127, 104, -84, -70, -6, -118, 6, 110, 99, -7, -112, 15, -79, -20, 51, -41, 78, 25,
+ -97, -2, -121, -117, 7, -87, -76, 60, -7, -7, 0, 51, 91, 34, 85, 21, -1, 108, 41, 8, 126, -25,
+ -30, 68, 109, -52, -51, 1, -111, 11, -22, -70, -33, 95, 40, 6, 63, 52, -66, -20, -6, -104, 81,
+ 57, 22, 82, 119, 126, 76, -10, -108, -63, -123, 19, 23, -106, -1, 117, 26, 112, -85, -78, 81,
+ -116, 53, 86, -126, -80, 122, 36, 67, 18, 19, -114, 73, 125, -3, -69, 99, 10, -30, 19, 112,
+ -103, 0, -61, 47, -106, -45, -105, -107, -56, 23, 14, 51, -70, 30, -32, 30, 7, 22, -31, -41, 19,
+ -47, -64, -52, 119, -66, 54, -109, -87, 3, 95, -124, 94, -48, 36, -40, 13, 19, 91, -14, -115,
+ 103, 66, 20, 44, 47, 8, -40, 4, -114, -110, -47, -28, -108, 89, 0, -7, -71, -91, -43, 98, 8,
+ -85, -98, -113, 103, -71, 69, 14, -95, -36, 92, -17, -66, -95, 123, -15, 52, 88, -60, -23, 123,
+ -61, -4, -33, -45, 77, 57, -121, 119, 116, -40, -31, -15, 96, 54, -49, -44, 36, -37, 111, -45,
+ -17, 12, 14, 21, 105, 48, 51, 42, -89, 55, 61, -5, -2, -36, -88, 36, -35, -29, -7, -68, -28,
+ -76, 5, -38, -66, -72, 24, -120, 8, -86, -28, 0, 71, -89, 20, -40, -100, 61, -57, 52, 23, 66,
+ -2, -24, -7, 86, -100, 111, -114, -47, -25, -40, -61, -67, -104, 33, 49, 16, -115, 9, -64, 27,
+ 122, 34, -33, -89, -113, -50, 42, 111, -14, 110, 43, 32, -112, 101, -59, 28, 76, -2, -117, 47,
+ 5, -73, -75, 21, -91, 99, 81, 93, -17, -119, 68, -21, -84, -51, -64, -98, 58, -33, 77, 4, 18,
+ 116, 62, 111, -105, -13, 91, -92, 81, -34, 40, 17, -128, 85, -19, 20, 8, 92, 83, 10, 3, 40, 89,
+ 60, 109, -23, 59, -66, -22, 43, 124, 25, -105, 77, 14, 75, -111, 13, 45, -90, -108, -79, 78,
+ -45, -55, -44, -86, -20, -41, -11, 65, 76, -79, 91, -23, 77, -84, 114, -109, 2, -71, 68, 8, -31,
+ 99, 97, -104, -94, 69, 64, -16, -48, -78, 99, -58, -17, 95, 96, -64, 47, 96, -69, 60, 28, 114,
+ 64, -128, -128, 114, 28, -124, 72, -41, -48, 82, -6, 63, -27, -126, -86, -121, 0, 4, 4, 35,
+ -111, 66, 64, -61, 117, -92, 48, 88, -128, 116, 7, -24, -111, -55, 96, -59, -96, 49, -70, -41,
+ -47, 85, 86, -37, -32, 53, -49, 62, 68, 80, -37, 95, 29, -114, 11, -65, 90, -99, -97, 101, 96,
+ -88, 5, 34, 3, 23, -22, 42, 4, -4, 17, -121, 106, -60, 33, -38, -32, -8, 41, -87, -4, -35, -102,
+ 7, 18, 35, -7, 85, -18, 60, 15, 34, 82, 46, 68, 63, 80, -38, 4, 51, -74, -34, 83, -33, -8, 44,
+ 87, -18, -8, 46, -53, -109, -121, -114, 10, 63, -36, -1, -123, 69, 107, -58, 33, -11, 63, -117,
+ 60, 22, 73, -36, 22, -76, -92, -74, -37, -35, 87, 40, -97, -6, 95, -25, -2, -99, -101, 102, -48,
+ 45, -55, 85, 94, -48, 57, -100, 34, 16, -63, -16, 106, -75, -7, -109, 71, -74, 20, -16, 37, 90,
+ -61, 69, 19, -111, 95, -104, 116, 75, -68, 85, -80, 66, 127, 127, 67, -98, 121, 53, 23, -3, 56,
+ -89, 99, 57, 9, 122, 76, 119, 1, -117, 47, -105, -42, -7, 51, -8, -81, 48, -60, -69, -29, 24,
+ 19, -81, 43, 31, -36, 62, 96, 20, -58, 39, -122, -115, 7, -114, 118, 27, 27, 78, -101, 75, -93,
+ -104, -8, 119, 121, -97, -84, 58, 33, 18, -35, -29, 20, 20, 7, 112, 60, 31, -12, 7, -128, -55,
+ -68, -7, -12, -115, 97, 115, 44, -46, -68, 108, 36, 121, -1, 84, -4, -26, -126, 85, -32, 36, 26,
+ -19, 71, -121, -92, -51, -116, 81, -71, -83, -50, 21, -119, -60, -78, -84, 102, 19, -26, 118,
+ -53, -13, 16, 36, -64, -83, -66, 32, -99, 54, 83, 104, 61, -19, 107, 95, -66, -42, -6, 25, 86,
+ -13, -53, -49, -9, 74, -13, 58, 125, -96, -32, -22, -21, -12, -38, -114, -88, -100, 35, -87,
+ -108, -2, -103, 87, -119, -109, 50, -28, -101, -4, -43, 105, 119, -118, 103, -104, 41, 47, 71,
+ 53, 11, -53, 59, -13, -11, 83, -33, 28, 11, 78, -59, 73, -33, -60, 119, -73, -127, 98, 39, 77,
+ 21, -8, -103, 103, 44, -87, -52, -74, 56, -63, -70, -121, 40, 103, 7, -100, 113, 53, -46, 44,
+ 16, 31, 102, -31, 104, -38, -120, 118, -122, -55, 25, 1, 92, 22, -14, 24, 108, 92, -90, -93,
+ -16, -99, -13, -127, 75, 101, -42, -86, -29, -51, -49, -105, -118, 91, -56, -51, -73, 117, 53,
+ -39, -73, 121, 83, -49, -10, -86, 11, -97, 40, -33, 6, -40, -9, -32, 92, -101, -83, 116, -5,
+ -57, -93, -121, 2, 38, -65, -6, 45, 100, 92, 92, 74, 115, 45, -33, 92, -11, 70, 33, 76, 85, 94,
+ 1, -111, -103, 6, -4, -31, 44, -53, -77, -45, 100, -83, 92, -11, 10, -7, 126, 23, 36, 61, -18,
+ -28, 67, 126, 53, -45, -77, 95, 43, -73, 30, -37, 122, -53, -79, -77, -42, 71, -124, 43, -89,
+ 60, -80, -89, -68, 96, 29, 103, -50, -93, 105, 7
+ };
+
+ static class ProxyStream extends FilterInputStream {
+ int readBytes;
+
+ ProxyStream(InputStream is) {
+ super(is);
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ int result = super.read(b, off, len);
+ if (result > 0) {
+ readBytes += result;
+ }
+ return result;
+ }
+ }
+
+ @Test
+ public void testEagerStream() throws IOException {
+ ProxyStream ps = new ProxyStream(new ByteArrayInputStream(DATA));
+ BrotliInputStream reader = new BrotliInputStream(ps, 1);
+ byte[] buffer = new byte[1];
+ reader.read(buffer);
+ reader.close();
+ int normalReadBytes = ps.readBytes;
+
+ ps = new ProxyStream(new ByteArrayInputStream(DATA));
+ reader = new BrotliInputStream(ps, 1);
+ reader.setEager(true);
+ reader.read(buffer);
+ reader.close();
+ int eagerReadBytes = ps.readBytes;
+
+ // Did not continue decoding - suspended as soon as enough data was decoded.
+ assertTrue(eagerReadBytes < normalReadBytes);
+ }
+}
diff --git a/java/org/brotli/dec/State.java b/java/org/brotli/dec/State.java
index 183df44..16d1072 100644
--- a/java/org/brotli/dec/State.java
+++ b/java/org/brotli/dec/State.java
@@ -63,16 +63,17 @@ final class State {
int distancePostfixBits;
int distance;
int copyLength;
- int copyDst;
int maxBackwardDistance;
int maxRingBufferSize;
int ringBufferSize;
+ int ringBufferFence;
int expectedTotalSize;
int outputOffset;
int outputLength;
int outputUsed;
- int bytesWritten;
- int bytesToWrite;
+ int ringBufferBytesWritten;
+ int ringBufferBytesReady;
+ int isEager;
InputStream input; // BitReader
diff --git a/java/org/brotli/wrapper/dec/BrotliInputStream.java b/java/org/brotli/wrapper/dec/BrotliInputStream.java
index d1aa76b..76bcf1d 100644
--- a/java/org/brotli/wrapper/dec/BrotliInputStream.java
+++ b/java/org/brotli/wrapper/dec/BrotliInputStream.java
@@ -34,6 +34,10 @@ public class BrotliInputStream extends InputStream {
this(source, DEFAULT_BUFFER_SIZE);
}
+ public void setEager(boolean eager) {
+ decoder.setEager(eager);
+ }
+
@Override
public void close() throws IOException {
decoder.close();
diff --git a/java/org/brotli/wrapper/dec/Decoder.java b/java/org/brotli/wrapper/dec/Decoder.java
index 95060ae..0326403 100644
--- a/java/org/brotli/wrapper/dec/Decoder.java
+++ b/java/org/brotli/wrapper/dec/Decoder.java
@@ -19,6 +19,7 @@ public class Decoder {
private final DecoderJNI.Wrapper decoder;
ByteBuffer buffer;
boolean closed;
+ boolean eager;
/**
* Creates a Decoder wrapper.
@@ -47,6 +48,10 @@ public class Decoder {
throw new IOException(message);
}
+ public void setEager(boolean eager) {
+ this.eager = eager;
+ }
+
/**
* Continue decoding.
*
@@ -71,6 +76,11 @@ public class Decoder {
break;
case NEEDS_MORE_INPUT:
+ // In "eager" more pulling preempts pushing.
+ if (eager && decoder.hasOutput()) {
+ buffer = decoder.pull();
+ break;
+ }
ByteBuffer inputBuffer = decoder.getInputBuffer();
inputBuffer.clear();
int bytesRead = source.read(inputBuffer);
diff --git a/java/org/brotli/wrapper/dec/DecoderJNI.java b/java/org/brotli/wrapper/dec/DecoderJNI.java
index 3a59053..320705c 100644
--- a/java/org/brotli/wrapper/dec/DecoderJNI.java
+++ b/java/org/brotli/wrapper/dec/DecoderJNI.java
@@ -12,13 +12,13 @@ import java.nio.ByteBuffer;
/**
* JNI wrapper for brotli decoder.
*/
-class DecoderJNI {
+public class DecoderJNI {
private static native ByteBuffer nativeCreate(long[] context);
private static native void nativePush(long[] context, int length);
private static native ByteBuffer nativePull(long[] context);
private static native void nativeDestroy(long[] context);
- enum Status {
+ public enum Status {
ERROR,
DONE,
NEEDS_MORE_INPUT,
@@ -26,12 +26,12 @@ class DecoderJNI {
OK
};
- static class Wrapper {
- private final long[] context = new long[2];
+ public static class Wrapper {
+ private final long[] context = new long[3];
private final ByteBuffer inputBuffer;
private Status lastStatus = Status.NEEDS_MORE_INPUT;
- Wrapper(int inputBufferSize) throws IOException {
+ public Wrapper(int inputBufferSize) throws IOException {
this.context[1] = inputBufferSize;
this.inputBuffer = nativeCreate(this.context);
if (this.context[0] == 0) {
@@ -39,7 +39,7 @@ class DecoderJNI {
}
}
- void push(int length) {
+ public void push(int length) {
if (length < 0) {
throw new IllegalArgumentException("negative block length");
}
@@ -71,19 +71,23 @@ class DecoderJNI {
}
}
- Status getStatus() {
+ public Status getStatus() {
return lastStatus;
}
- ByteBuffer getInputBuffer() {
+ public ByteBuffer getInputBuffer() {
return inputBuffer;
}
- ByteBuffer pull() {
+ public boolean hasOutput() {
+ return context[2] != 0;
+ }
+
+ public ByteBuffer pull() {
if (context[0] == 0) {
throw new IllegalStateException("brotli decoder is already destroyed");
}
- if (lastStatus != Status.NEEDS_MORE_OUTPUT) {
+ if (lastStatus != Status.NEEDS_MORE_OUTPUT && !hasOutput()) {
throw new IllegalStateException("pulling output from decoder in " + lastStatus + " state");
}
ByteBuffer result = nativePull(context);
@@ -94,7 +98,7 @@ class DecoderJNI {
/**
* Releases native resources.
*/
- void destroy() {
+ public void destroy() {
if (context[0] == 0) {
throw new IllegalStateException("brotli decoder is already destroyed");
}
diff --git a/java/org/brotli/wrapper/dec/EagerStreamTest.java b/java/org/brotli/wrapper/dec/EagerStreamTest.java
new file mode 100755
index 0000000..9166092
--- /dev/null
+++ b/java/org/brotli/wrapper/dec/EagerStreamTest.java
@@ -0,0 +1,75 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+package org.brotli.wrapper.dec;
+
+import static org.junit.Assert.assertEquals;
+
+import org.brotli.integration.BrotliJniTestBase;
+import java.io.IOException;
+import java.io.InputStream;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link org.brotli.wrapper.dec.BrotliInputStream}. */
+@RunWith(JUnit4.class)
+public class EagerStreamTest extends BrotliJniTestBase {
+
+ @Test
+ public void testEagerReading() throws IOException {
+ final StringBuilder log = new StringBuilder();
+ final byte[] data = {0, 0, 16, 42, 3};
+ InputStream source = new InputStream() {
+ int index;
+
+ @Override
+ public int read() {
+ if (index < data.length) {
+ log.append("<").append(index);
+ return data[index++];
+ } else {
+ log.append("<#");
+ return -1;
+ }
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException {
+ return read(b, 0, b.length);
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ if (len < 1) {
+ return 0;
+ }
+ int d = read();
+ if (d == -1) {
+ return 0;
+ }
+ b[off] = (byte) d;
+ return 1;
+ }
+ };
+ BrotliInputStream reader = new BrotliInputStream(source);
+ reader.setEager(true);
+ int count = 0;
+ while (true) {
+ log.append("^").append(count);
+ int b = reader.read();
+ if (b == -1) {
+ log.append(">#");
+ break;
+ } else {
+ log.append(">").append(count++);
+ }
+ }
+ // Lazy log: ^0<0<1<2<3<4>0^1>#
+ assertEquals("^0<0<1<2<3>0^1<4>#", log.toString());
+ }
+
+}
diff --git a/java/org/brotli/wrapper/dec/decoder_jni.cc b/java/org/brotli/wrapper/dec/decoder_jni.cc
index 19c022b..268a10b 100644
--- a/java/org/brotli/wrapper/dec/decoder_jni.cc
+++ b/java/org/brotli/wrapper/dec/decoder_jni.cc
@@ -45,10 +45,11 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativeCreate(
JNIEnv* env, jobject /*jobj*/, jlongArray ctx) {
bool ok = true;
DecoderHandle* handle = nullptr;
- jlong context[2];
- env->GetLongArrayRegion(ctx, 0, 2, context);
+ jlong context[3];
+ env->GetLongArrayRegion(ctx, 0, 3, context);
size_t input_size = context[1];
context[0] = 0;
+ context[2] = 0;
handle = new (std::nothrow) DecoderHandle();
ok = !!handle;
@@ -79,7 +80,7 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativeCreate(
delete handle;
}
- env->SetLongArrayRegion(ctx, 0, 2, context);
+ env->SetLongArrayRegion(ctx, 0, 3, context);
if (!ok) {
return nullptr;
@@ -105,11 +106,12 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativeCreate(
JNIEXPORT void JNICALL
Java_org_brotli_wrapper_dec_DecoderJNI_nativePush(
JNIEnv* env, jobject /*jobj*/, jlongArray ctx, jint input_length) {
- jlong context[2];
- env->GetLongArrayRegion(ctx, 0, 2, context);
+ jlong context[3];
+ env->GetLongArrayRegion(ctx, 0, 3, context);
DecoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
context[1] = 0; /* ERROR */
- env->SetLongArrayRegion(ctx, 0, 2, context);
+ context[2] = 0;
+ env->SetLongArrayRegion(ctx, 0, 3, context);
if (input_length != 0) {
/* Still have unconsumed data. Workflow is broken. */
@@ -145,7 +147,8 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativePush(
context[1] = 0;
break;
}
- env->SetLongArrayRegion(ctx, 0, 2, context);
+ context[2] = BrotliDecoderHasMoreOutput(handle->state) ? 1 : 0;
+ env->SetLongArrayRegion(ctx, 0, 3, context);
}
/**
@@ -158,12 +161,13 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativePush(
JNIEXPORT jobject JNICALL
Java_org_brotli_wrapper_dec_DecoderJNI_nativePull(
JNIEnv* env, jobject /*jobj*/, jlongArray ctx) {
- jlong context[2];
- env->GetLongArrayRegion(ctx, 0, 2, context);
+ jlong context[3];
+ env->GetLongArrayRegion(ctx, 0, 3, context);
DecoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
size_t data_length = 0;
const uint8_t* data = BrotliDecoderTakeOutput(handle->state, &data_length);
- if (BrotliDecoderHasMoreOutput(handle->state)) {
+ bool hasMoreOutput = !!BrotliDecoderHasMoreOutput(handle->state);
+ if (hasMoreOutput) {
context[1] = 3;
} else if (BrotliDecoderIsFinished(handle->state)) {
/* Bytes after stream end are not allowed. */
@@ -172,7 +176,8 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativePull(
/* Can proceed, or more data is required? */
context[1] = (handle->input_offset == handle->input_length) ? 2 : 4;
}
- env->SetLongArrayRegion(ctx, 0, 2, context);
+ context[2] = hasMoreOutput ? 1 : 0;
+ env->SetLongArrayRegion(ctx, 0, 3, context);
return env->NewDirectByteBuffer(const_cast<uint8_t*>(data), data_length);
}
@@ -184,8 +189,8 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativePull(
JNIEXPORT void JNICALL
Java_org_brotli_wrapper_dec_DecoderJNI_nativeDestroy(
JNIEnv* env, jobject /*jobj*/, jlongArray ctx) {
- jlong context[2];
- env->GetLongArrayRegion(ctx, 0, 2, context);
+ jlong context[3];
+ env->GetLongArrayRegion(ctx, 0, 3, context);
DecoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
BrotliDecoderDestroyInstance(handle->state);
delete[] handle->input_start;
diff --git a/research/BUILD b/research/BUILD
index 6ff5ac2..211b3e7 100755
--- a/research/BUILD
+++ b/research/BUILD
@@ -14,6 +14,13 @@ cc_library(
)
cc_library(
+ name = "durchschlag",
+ srcs = ["durchschlag.cc"],
+ hdrs = ["durchschlag.h"],
+ deps = ["@divsufsort//:libdivsufsort"],
+)
+
+cc_library(
name = "sieve",
srcs = ["sieve.cc"],
hdrs = ["sieve.h"],
@@ -24,6 +31,7 @@ cc_binary(
srcs = ["dictionary_generator.cc"],
deps = [
":dm",
+ ":durchschlag",
":sieve",
],
)
diff --git a/research/BUILD.libdivsufsort b/research/BUILD.libdivsufsort
new file mode 100644
index 0000000..ce60e9c
--- /dev/null
+++ b/research/BUILD.libdivsufsort
@@ -0,0 +1,55 @@
+package(
+ default_visibility = ["//visibility:public"],
+)
+
+cc_library(
+ name = "libdivsufsort",
+ srcs = [
+ "lib/divsufsort.c",
+ "lib/sssort.c",
+ "lib/trsort.c",
+ "lib/utils.c",
+ ],
+ hdrs = [
+ "include/config.h",
+ "include/divsufsort.h",
+ "include/divsufsort_private.h",
+ ],
+ copts = [
+ "-DHAVE_CONFIG_H=1",
+ ],
+ includes = ["include"],
+)
+
+commom_awk_replaces = (
+ "gsub(/#cmakedefine/, \"#define\"); " +
+ "gsub(/@DIVSUFSORT_EXPORT@/, \"\"); " +
+ "gsub(/@DIVSUFSORT_IMPORT@/, \"\"); " +
+ "gsub(/@INLINE@/, \"inline\"); " +
+ "gsub(/@INCFILE@/, \"#include <inttypes.h>\"); " +
+ "gsub(/@SAUCHAR_TYPE@/, \"uint8_t\"); " +
+ "gsub(/@SAINT32_TYPE@/, \"int32_t\"); " +
+ "gsub(/@SAINT_PRId@/, \"PRId32\"); "
+)
+
+genrule(
+ name = "config_h",
+ srcs = ["include/config.h.cmake"],
+ outs = ["include/config.h"],
+ cmd = ("awk '{ " +
+ "gsub(/@HAVE_IO_H 1@/, \"HAVE_IO_H 0\"); " +
+ commom_awk_replaces +
+ "print; }' $(<) > $(@)"),
+)
+
+genrule(
+ name = "divsufsort_h",
+ srcs = ["include/divsufsort.h.cmake"],
+ outs = ["include/divsufsort.h"],
+ cmd = ("awk '{ " +
+ "gsub(/@W64BIT@/, \"\"); " +
+ "gsub(/@SAINDEX_TYPE@/, \"int32_t\"); " +
+ "gsub(/@SAINDEX_PRId@/, \"PRId32\"); " +
+ commom_awk_replaces +
+ "print; }' $(<) > $(@)"),
+)
diff --git a/research/deorummolae.cc b/research/deorummolae.cc
index c53a53c..d15b7ee 100644
--- a/research/deorummolae.cc
+++ b/research/deorummolae.cc
@@ -15,20 +15,31 @@
/* Non tunable definitions. */
#define CHUNK_MASK (CHUNK_SIZE - 1)
-#define COVERAGE_SIZE (1 << (LOG_MAX_FILES - 6))
+#define COVERAGE_SIZE (1 << (DM_LOG_MAX_FILES - 6))
/* File coverage: every bit set to 1 denotes a file covered by an isle. */
typedef std::array<uint64_t, COVERAGE_SIZE> Coverage;
-static int popcount(uint64_t u) { return __builtin_popcountll(u); }
+/* Symbol of text alphabet. */
+typedef int32_t TextChar;
+
+/* Pointer to position in text. */
+typedef uint32_t TextIdx;
+
+/* SAIS sarray_type; unfortunately, must be a signed type. */
+typedef int32_t TextSaIdx;
+
+static size_t popcount(uint64_t u) {
+ return static_cast<size_t>(__builtin_popcountll(u));
+}
/* Condense terminators and pad file entries. */
-static void rewriteText(std::vector<int>* text) {
- int terminator = text->back();
- int prev = terminator;
- size_t to = 0;
- for (size_t from = 0; from < text->size(); ++from) {
- int next = text->at(from);
+static void rewriteText(std::vector<TextChar>* text) {
+ TextChar terminator = text->back();
+ TextChar prev = terminator;
+ TextIdx to = 0;
+ for (TextIdx from = 0; from < text->size(); ++from) {
+ TextChar next = text->at(from);
if (next < 256 || prev < 256) {
text->at(to++) = next;
if (next >= 256) terminator = next;
@@ -41,11 +52,12 @@ static void rewriteText(std::vector<int>* text) {
}
/* Reenumerate terminators for smaller alphabet. */
-static void remapTerminators(std::vector<int>* text, int* next_terminator) {
- int prev = -1;
- int x = 256;
- for (size_t i = 0; i < text->size(); ++i) {
- int next = text->at(i);
+static void remapTerminators(std::vector<TextChar>* text,
+ TextChar* next_terminator) {
+ TextChar prev = -1;
+ TextChar x = 256;
+ for (TextIdx i = 0; i < text->size(); ++i) {
+ TextChar next = text->at(i);
if (next < 256) { // Char.
// Do nothing.
} else if (prev < 256) { // Terminator after char.
@@ -60,15 +72,15 @@ static void remapTerminators(std::vector<int>* text, int* next_terminator) {
}
/* Combine all file entries; create mapping position->file. */
-static void buildFullText(std::vector<std::vector<int>>* data,
- std::vector<int>* full_text, std::vector<size_t>* file_map,
- std::vector<size_t>* file_offset, int* next_terminator) {
+static void buildFullText(std::vector<std::vector<TextChar>>* data,
+ std::vector<TextChar>* full_text, std::vector<TextIdx>* file_map,
+ std::vector<TextIdx>* file_offset, TextChar* next_terminator) {
file_map->resize(0);
file_offset->resize(0);
full_text->resize(0);
- for (size_t i = 0; i < data->size(); ++i) {
+ for (TextIdx i = 0; i < data->size(); ++i) {
file_offset->push_back(full_text->size());
- std::vector<int>& file = data->at(i);
+ std::vector<TextChar>& file = data->at(i);
rewriteText(&file);
full_text->insert(full_text->end(), file.begin(), file.end());
file_map->insert(file_map->end(), file.size() / CHUNK_SIZE, i);
@@ -78,18 +90,19 @@ static void buildFullText(std::vector<std::vector<int>>* data,
/* Build longest-common-prefix based on suffix array and text.
TODO: borrowed -> unknown efficiency. */
-static void buildLcp(std::vector<int>* text, std::vector<int>* sa,
- std::vector<int>* lcp, std::vector<int>* invese_sa) {
- int size = static_cast<int>(text->size());
+static void buildLcp(std::vector<TextChar>* text, std::vector<TextIdx>* sa,
+ std::vector<TextIdx>* lcp, std::vector<TextIdx>* invese_sa) {
+ TextIdx size = static_cast<TextIdx>(text->size());
lcp->resize(size);
- int k = 0;
+ TextIdx k = 0;
lcp->at(size - 1) = 0;
- for (int i = 0; i < size; ++i) {
+ for (TextIdx i = 0; i < size; ++i) {
if (invese_sa->at(i) == size - 1) {
k = 0;
continue;
}
- int j = sa->at(invese_sa->at(i) + 1); // Suffix which follow i-th suffix.
+ // Suffix which follow i-th suffix.
+ TextIdx j = sa->at(invese_sa->at(i) + 1);
while (i + k < size && j + k < size && text->at(i + k) == text->at(j + k)) {
++k;
}
@@ -102,21 +115,21 @@ static void buildLcp(std::vector<int>* text, std::vector<int>* sa,
When we raise the LCP requirement, the isle sunks and smaller isles appear
instead. */
typedef struct {
- int lcp;
- int l;
- int r;
+ TextIdx lcp;
+ TextIdx l;
+ TextIdx r;
Coverage coverage;
} Isle;
/* Helper routine for `cutMatch`. */
-static void poisonData(int pos, int length, std::vector<std::vector<int>>* data,
- std::vector<size_t>* file_map, std::vector<size_t>* file_offset,
- int* next_terminator) {
- size_t f = file_map->at(pos / CHUNK_SIZE);
+static void poisonData(TextIdx pos, TextIdx length,
+ std::vector<std::vector<TextChar>>* data, std::vector<TextIdx>* file_map,
+ std::vector<TextIdx>* file_offset, TextChar* next_terminator) {
+ TextIdx f = file_map->at(pos / CHUNK_SIZE);
pos -= file_offset->at(f);
- std::vector<int>& file = data->at(f);
- int l = (length == CUT_MATCH) ? CUT_MATCH : 1;
- for (int j = 0; j < l; j++, pos++) {
+ std::vector<TextChar>& file = data->at(f);
+ TextIdx l = (length == CUT_MATCH) ? CUT_MATCH : 1;
+ for (TextIdx j = 0; j < l; j++, pos++) {
if (file[pos] >= 256) continue;
if (file[pos + 1] >= 256) {
file[pos] = file[pos + 1];
@@ -131,12 +144,12 @@ static void poisonData(int pos, int length, std::vector<std::vector<int>>* data,
/* Remove substrings of a given match from files.
Substrings are replaced with unique terminators, so next iteration SA would
not allow to cross removed areas. */
-static void cutMatch(std::vector<std::vector<int>>* data, int index, int length,
- std::vector<int>* sa, std::vector<int>* lcp, std::vector<int>* invese_sa,
- int* next_terminator, std::vector<size_t>* file_map,
- std::vector<size_t>* file_offset) {
+static void cutMatch(std::vector<std::vector<TextChar>>* data, TextIdx index,
+ TextIdx length, std::vector<TextIdx>* sa, std::vector<TextIdx>* lcp,
+ std::vector<TextIdx>* invese_sa, TextChar* next_terminator,
+ std::vector<TextIdx>* file_map, std::vector<TextIdx>* file_offset) {
while (length >= CUT_MATCH) {
- int i = index;
+ TextIdx i = index;
while (lcp->at(i) >= length) {
i++;
poisonData(
@@ -156,54 +169,70 @@ static void cutMatch(std::vector<std::vector<int>>* data, int index, int length,
std::string DM_generate(size_t dictionary_size_limit,
const std::vector<size_t>& sample_sizes, const uint8_t* sample_data) {
{
- uint64_t tmp = 0;
- if (popcount(tmp - 1u) != 64) {
- fprintf(stderr, "64-bit platform is required\n");
- return 0;
+ TextIdx tmp = static_cast<TextIdx>(dictionary_size_limit);
+ if ((tmp != dictionary_size_limit) || (tmp > 1u << 30)) {
+ fprintf(stderr, "dictionary_size_limit is too large\n");
+ return "";
}
}
/* Could use 256 + '0' for easier debugging. */
- int next_terminator = 256;
+ TextChar next_terminator = 256;
std::string output;
- std::vector<std::vector<int>> data;
+ std::vector<std::vector<TextChar>> data;
- size_t offset = 0;
+ TextIdx offset = 0;
size_t num_samples = sample_sizes.size();
- if (num_samples > MAX_FILES) num_samples = MAX_FILES;
+ if (num_samples > DM_MAX_FILES) num_samples = DM_MAX_FILES;
for (size_t n = 0; n < num_samples; ++n) {
- size_t next_offset = offset + sample_sizes[n];
+ TextIdx delta = static_cast<TextIdx>(sample_sizes[n]);
+ if (delta != sample_sizes[n]) {
+ fprintf(stderr, "sample is too large\n");
+ return "";
+ }
+ if (delta == 0) {
+ fprintf(stderr, "0-length samples are prohibited\n");
+ return "";
+ }
+ TextIdx next_offset = offset + delta;
+ if (next_offset <= offset) {
+ fprintf(stderr, "corpus is too large\n");
+ return "";
+ }
data.push_back(
- std::vector<int>(sample_data + offset, sample_data + next_offset));
+ std::vector<TextChar>(sample_data + offset, sample_data + next_offset));
offset = next_offset;
data.back().push_back(next_terminator++);
}
/* Most arrays are allocated once, and then just resized to smaller and
smaller sizes. */
- std::vector<int> full_text;
- std::vector<size_t> file_map;
- std::vector<size_t> file_offset;
- std::vector<int> sa;
- std::vector<int> invese_sa;
- std::vector<int> lcp;
+ std::vector<TextChar> full_text;
+ std::vector<TextIdx> file_map;
+ std::vector<TextIdx> file_offset;
+ std::vector<TextIdx> sa;
+ std::vector<TextIdx> invese_sa;
+ std::vector<TextIdx> lcp;
std::vector<Isle> isles;
std::vector<char> output_data;
- size_t total = 0;
- size_t total_cost = 0;
- size_t best_cost;
+ TextIdx total = 0;
+ TextIdx total_cost = 0;
+ TextIdx best_cost;
Isle best_isle;
- int min_count = num_samples;
+ size_t min_count = num_samples;
while (true) {
- size_t max_match = dictionary_size_limit - total;
+ TextIdx max_match = static_cast<TextIdx>(dictionary_size_limit) - total;
buildFullText(&data, &full_text, &file_map, &file_offset, &next_terminator);
sa.resize(full_text.size());
- saisxx(full_text.data(), sa.data(), static_cast<int>(full_text.size()),
- next_terminator);
+ /* Hopefully, non-negative TextSaIdx is the same sa TextIdx counterpart. */
+ saisxx(full_text.data(), reinterpret_cast<TextSaIdx*>(sa.data()),
+ static_cast<TextChar>(full_text.size()), next_terminator);
invese_sa.resize(full_text.size());
- for (int i = 0; i < full_text.size(); ++i) invese_sa[sa[i]] = i;
+ for (TextIdx i = 0; i < full_text.size(); ++i) {
+ invese_sa[sa[i]] = i;
+ }
buildLcp(&full_text, &sa, &lcp, &invese_sa);
/* Do not rebuild SA/LCP, just use different selection. */
@@ -213,22 +242,22 @@ std::string DM_generate(size_t dictionary_size_limit,
isles.resize(0);
isles.push_back(best_isle);
- for (int i = 0; i < static_cast<int>(lcp.size()); ++i) {
- int l = i;
+ for (TextIdx i = 0; i < lcp.size(); ++i) {
+ TextIdx l = i;
Coverage cov = {{0}};
- int f = file_map[sa[i] / CHUNK_SIZE];
- cov[f >> 6] = ((uint64_t)1) << (f & 63);
+ size_t f = file_map[sa[i] / CHUNK_SIZE];
+ cov[f >> 6] = (static_cast<uint64_t>(1)) << (f & 63);
while (lcp[i] < isles.back().lcp) {
Isle& top = isles.back();
top.r = i;
l = top.l;
for (size_t x = 0; x < cov.size(); ++x) cov[x] |= top.coverage[x];
- int count = 0;
+ size_t count = 0;
for (size_t x = 0; x < cov.size(); ++x) count += popcount(cov[x]);
- int effective_lcp = top.lcp;
+ TextIdx effective_lcp = top.lcp;
/* Restrict (last) dictionary entry length. */
if (effective_lcp > max_match) effective_lcp = max_match;
- int cost = count * effective_lcp;
+ TextIdx cost = count * effective_lcp;
if (cost > best_cost && count >= min_count &&
effective_lcp >= MIN_MATCH) {
best_cost = cost;
@@ -251,14 +280,14 @@ std::string DM_generate(size_t dictionary_size_limit,
if (best_cost == 0 || best_isle.lcp < MIN_MATCH) {
if (min_count >= 8) {
min_count = (min_count * 7) / 8;
- fprintf(stderr, "Retry: min_count=%d\n", min_count);
+ fprintf(stderr, "Retry: min_count=%zu\n", min_count);
goto retry;
}
break;
}
/* Save the entry. */
- fprintf(stderr, "Savings: %zu+%zu, dictionary: %zu+%d\n",
+ fprintf(stderr, "Savings: %d+%d, dictionary: %d+%d\n",
total_cost, best_cost, total, best_isle.lcp);
int* piece = &full_text[sa[best_isle.l]];
output.insert(output.end(), piece, piece + best_isle.lcp);
diff --git a/research/deorummolae.h b/research/deorummolae.h
index 7f24add..5815097 100644
--- a/research/deorummolae.h
+++ b/research/deorummolae.h
@@ -1,17 +1,16 @@
#ifndef BROTLI_RESEARCH_DEORUMMOLAE_H_
#define BROTLI_RESEARCH_DEORUMMOLAE_H_
-#include <stddef.h>
-#include <stdint.h>
-
+#include <cstddef>
+#include <cstdint>
#include <string>
#include <vector>
/* log2(maximal number of files). Value 6 provides some speedups. */
-#define LOG_MAX_FILES 6
+#define DM_LOG_MAX_FILES 6
/* Non tunable definitions. */
-#define MAX_FILES (1 << LOG_MAX_FILES)
+#define DM_MAX_FILES (1 << DM_LOG_MAX_FILES)
/**
* Generate a dictionary for given samples.
diff --git a/research/dictionary_generator.cc b/research/dictionary_generator.cc
index b3ee89c..00cfaba 100755
--- a/research/dictionary_generator.cc
+++ b/research/dictionary_generator.cc
@@ -1,15 +1,20 @@
+#include <cstddef>
#include <cstdio>
#include <cstring>
#include <fstream>
#include <vector>
#include "./deorummolae.h"
+#include "./durchschlag.h"
#include "./sieve.h"
#define METHOD_DM 0
#define METHOD_SIEVE 1
+#define METHOD_DURCHSCHLAG 2
+#define METHOD_DISTILL 3
+#define METHOD_PURIFY 4
-size_t readInt(const char* str) {
+static size_t readInt(const char* str) {
size_t result = 0;
if (str[0] == 0 || str[0] == '0') {
return 0;
@@ -51,10 +56,25 @@ static std::string readFile(const std::string& path) {
static void writeFile(const char* file, const std::string& content) {
std::ofstream outfile(file, std::ofstream::binary);
- outfile.write(content.c_str(), content.size());
+ outfile.write(content.c_str(), static_cast<std::streamsize>(content.size()));
outfile.close();
}
+static void writeSamples(char const* argv[], const std::vector<int>& pathArgs,
+ const std::vector<size_t>& sizes, const uint8_t* data) {
+ size_t offset = 0;
+ for (size_t i = 0; i < pathArgs.size(); ++i) {
+ int j = pathArgs[i];
+ const char* file = argv[j];
+ size_t sampleSize = sizes[i];
+ std::ofstream outfile(file, std::ofstream::binary);
+ outfile.write(reinterpret_cast<const char*>(data + offset),
+ static_cast<std::streamsize>(sampleSize));
+ outfile.close();
+ offset += sampleSize;
+ }
+}
+
/* Returns "base file name" or its tail, if it contains '/' or '\'. */
static const char* fileName(const char* path) {
const char* separator_position = strrchr(path, '/');
@@ -68,21 +88,32 @@ static void printHelp(const char* name) {
fprintf(stderr, "Usage: %s [OPTION]... DICTIONARY [SAMPLE]...\n", name);
fprintf(stderr,
"Options:\n"
- " --dm use 'deorummolae' engine\n"
- " --sieve use 'sieve' engine (default)\n"
- " -t# set target dictionary size (limit); default: 16K\n"
- " -s# set slize length for 'sieve'; default: 33\n"
- "# is a decimal number with optional k/K/m/M suffix.\n\n");
+ " --dm use 'deorummolae' engine\n"
+ " --distill rewrite samples; unique text parts are removed\n"
+ " --dsh use 'durchschlag' engine (default)\n"
+ " --purify rewrite samples; unique text parts are zeroed out\n"
+ " --sieve use 'sieve' engine\n"
+ " -b# set block length for 'durchschlag'; default: 1024\n"
+ " -s# set slice length for 'distill', 'durchschlag', 'purify'\n"
+ " and 'sieve'; default: 16\n"
+ " -t# set target dictionary size (limit); default: 16K\n"
+ " -u# set minimum slice population (for rewrites); default: 2\n"
+ "# is a decimal number with optional k/K/m/M suffix.\n"
+ "WARNING: 'distill' and 'purify' will overwrite original samples!\n"
+ " Completely unique samples might become empty files.\n\n");
}
int main(int argc, char const* argv[]) {
int dictionaryArg = -1;
- int method = METHOD_SIEVE;
- int sieveSliceLen = 33;
- int targetSize = 16 << 10;
+ int method = METHOD_DURCHSCHLAG;
+ size_t sliceLen = 16;
+ size_t targetSize = 16 << 10;
+ size_t blockSize = 1024;
+ size_t minimumPopulation = 2;
std::vector<uint8_t> data;
std::vector<size_t> sizes;
+ std::vector<int> pathArgs;
size_t total = 0;
for (int i = 1; i < argc; ++i) {
if (argv[i] == nullptr) {
@@ -90,6 +121,12 @@ int main(int argc, char const* argv[]) {
}
if (argv[i][0] == '-') {
if (argv[i][1] == '-') {
+ if (dictionaryArg != -1) {
+ fprintf(stderr,
+ "Method should be specified before dictionary / sample '%s'\n",
+ argv[i]);
+ exit(1);
+ }
if (std::strcmp("--sieve", argv[i]) == 0) {
method = METHOD_SIEVE;
continue;
@@ -98,13 +135,32 @@ int main(int argc, char const* argv[]) {
method = METHOD_DM;
continue;
}
+ if (std::strcmp("--dsh", argv[i]) == 0) {
+ method = METHOD_DURCHSCHLAG;
+ continue;
+ }
+ if (std::strcmp("--distill", argv[i]) == 0) {
+ method = METHOD_DISTILL;
+ continue;
+ }
+ if (std::strcmp("--purify", argv[i]) == 0) {
+ method = METHOD_PURIFY;
+ continue;
+ }
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
}
- if (argv[i][1] == 's') {
- sieveSliceLen = readInt(&argv[i][2]);
- if (sieveSliceLen < 4 || sieveSliceLen > 256) {
+ if (argv[i][1] == 'b') {
+ blockSize = readInt(&argv[i][2]);
+ if (blockSize < 16 || blockSize > 65536) {
+ printHelp(fileName(argv[0]));
+ fprintf(stderr, "Invalid option '%s'\n", argv[i]);
+ exit(1);
+ }
+ } else if (argv[i][1] == 's') {
+ sliceLen = readInt(&argv[i][2]);
+ if (sliceLen < 4 || sliceLen > 256) {
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
@@ -116,6 +172,13 @@ int main(int argc, char const* argv[]) {
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
}
+ } else if (argv[i][1] == 'u') {
+ minimumPopulation = readInt(&argv[i][2]);
+ if (minimumPopulation < 256 || minimumPopulation > 65536) {
+ printHelp(fileName(argv[0]));
+ fprintf(stderr, "Invalid option '%s'\n", argv[i]);
+ exit(1);
+ }
} else {
printHelp(fileName(argv[0]));
fprintf(stderr, "Unrecognized option '%s'\n", argv[i]);
@@ -124,26 +187,42 @@ int main(int argc, char const* argv[]) {
continue;
}
if (dictionaryArg == -1) {
- dictionaryArg = i;
- continue;
+ if (method != METHOD_DISTILL && method != METHOD_PURIFY) {
+ dictionaryArg = i;
+ continue;
+ }
}
std::string content = readFile(argv[i]);
data.insert(data.end(), content.begin(), content.end());
total += content.size();
+ pathArgs.push_back(i);
sizes.push_back(content.size());
}
- if (dictionaryArg == -1 || total == 0) {
+ bool wantDictionary = (dictionaryArg == -1);
+ if (method == METHOD_DISTILL || method == METHOD_PURIFY) {
+ wantDictionary = false;
+ }
+ if (wantDictionary || total == 0) {
printHelp(fileName(argv[0]));
fprintf(stderr, "Not enough arguments\n");
exit(1);
}
if (method == METHOD_SIEVE) {
- writeFile(argv[dictionaryArg],
- sieve_generate(targetSize, sieveSliceLen, sizes, data.data()));
+ writeFile(argv[dictionaryArg], sieve_generate(
+ targetSize, sliceLen, sizes, data.data()));
} else if (method == METHOD_DM) {
- writeFile(argv[dictionaryArg],
- DM_generate(targetSize, sizes, data.data()));
+ writeFile(argv[dictionaryArg], DM_generate(
+ targetSize, sizes, data.data()));
+ } else if (method == METHOD_DURCHSCHLAG) {
+ writeFile(argv[dictionaryArg], durchschlag_generate(
+ targetSize, sliceLen, blockSize, sizes, data.data()));
+ } else if (method == METHOD_DISTILL) {
+ durchschlag_distill(sliceLen, minimumPopulation, &sizes, data.data());
+ writeSamples(argv, pathArgs, sizes, data.data());
+ } else if (method == METHOD_PURIFY) {
+ durchschlag_purify(sliceLen, minimumPopulation, sizes, data.data());
+ writeSamples(argv, pathArgs, sizes, data.data());
} else {
printHelp(fileName(argv[0]));
fprintf(stderr, "Unknown generator\n");
diff --git a/research/draw_diff.cc b/research/draw_diff.cc
index 01b6716..6541dac 100644
--- a/research/draw_diff.cc
+++ b/research/draw_diff.cc
@@ -20,18 +20,23 @@
#define CHECK(X) if (!(X)) exit(EXIT_FAILURE);
#endif
-void ReadPGM(FILE* f, uint8_t*** image, size_t* height, size_t* width) {
+typedef uint8_t* ScanLine;
+typedef ScanLine* Image;
+
+void ReadPGM(FILE* f, Image* image, size_t* height, size_t* width) {
int colors;
CHECK(fscanf(f, "P5\n%lu %lu\n%d\n", width, height, &colors) == 3);
assert(colors == 255);
- *image = new uint8_t*[*height];
+ ScanLine* lines = new ScanLine[*height];
+ *image = lines;
for (int i = *height - 1; i >= 0; --i) {
- (*image)[i] = new uint8_t[*width];
- CHECK(fread((*image)[i], 1, *width, f) == *width);
+ ScanLine line = new uint8_t[*width];
+ lines[i] = line;
+ CHECK(fread(line, 1, *width, f) == *width);
}
}
-void CalculateDiff(int** diff, uint8_t** image1, uint8_t** image2,
+void CalculateDiff(int** diff, Image image1, Image image2,
size_t height, size_t width) {
for (size_t i = 0; i < height; ++i) {
for (size_t j = 0; j < width; ++j) {
@@ -40,7 +45,7 @@ void CalculateDiff(int** diff, uint8_t** image1, uint8_t** image2,
}
}
-void DrawDiff(int** diff, uint8_t** image1, uint8_t** image2,
+void DrawDiff(int** diff, Image image1, Image image2,
size_t height, size_t width, FILE* f) {
int max = -1234;
int min = +1234;
@@ -78,13 +83,13 @@ void DrawDiff(int** diff, uint8_t** image1, uint8_t** image2,
delete[] row;
}
-int main(int argc, char* argv[]) {
+int main(int argc, char** argv) {
if (argc != 4) {
printf("usage: %s pgm1 pgm2 diff_ppm_path\n", argv[0]);
return 1;
}
- uint8_t **image1, **image2;
+ Image image1, image2;
size_t h1, w1, h2, w2;
FILE* fimage1 = fopen(argv[1], "rb");
diff --git a/research/durchschlag.cc b/research/durchschlag.cc
new file mode 100755
index 0000000..cc4ed68
--- /dev/null
+++ b/research/durchschlag.cc
@@ -0,0 +1,714 @@
+#include "./durchschlag.h"
+
+#include <algorithm>
+#include <exception> /* terminate */
+
+#include "divsufsort.h"
+
+/* Pointer to position in text. */
+typedef DurchschlagTextIdx TextIdx;
+
+/* (Sum of) value(s) of slice(s). */
+typedef uint32_t Score;
+
+typedef struct HashSlot {
+ TextIdx next;
+ TextIdx offset;
+} HashSlot;
+
+typedef struct MetaSlot {
+ TextIdx mark;
+ Score score;
+} MetaSlot;
+
+typedef struct Range {
+ TextIdx start;
+ TextIdx end;
+} Range;
+
+typedef struct Candidate {
+ Score score;
+ TextIdx position;
+} Candidate;
+
+struct greaterScore {
+ bool operator()(const Candidate& a, const Candidate& b) const {
+ return (a.score > b.score) ||
+ ((a.score == b.score) && (a.position < b.position));
+ }
+};
+
+struct lessScore {
+ bool operator()(const Candidate& a, const Candidate& b) const {
+ return (a.score < b.score) ||
+ ((a.score == b.score) && (a.position > b.position));
+ }
+};
+
+#define CANDIDATE_BUNDLE_SIZE (1 << 18)
+
+static void fatal(const char* error) {
+ fprintf(stderr, "%s\n", error);
+ std::terminate();
+}
+
+static TextIdx calculateDictionarySize(const std::vector<Range>& ranges) {
+ TextIdx result = 0;
+ for (size_t i = 0; i < ranges.size(); ++i) {
+ const Range& r = ranges[i];
+ result += r.end - r.start;
+ }
+ return result;
+}
+
+static std::string createDictionary(
+ const uint8_t* data, const std::vector<Range>& ranges, size_t limit) {
+ std::string output;
+ output.reserve(calculateDictionarySize(ranges));
+ for (size_t i = 0; i < ranges.size(); ++i) {
+ const Range& r = ranges[i];
+ output.insert(output.end(), &data[r.start], &data[r.end]);
+ }
+ if (output.size() > limit) {
+ output.resize(limit);
+ }
+ return output;
+}
+
+static Score buildCandidatesList(std::vector<Candidate>* candidates,
+ std::vector<MetaSlot>* map, TextIdx span, const TextIdx* shortcut,
+ TextIdx end) {
+ candidates->resize(0);
+
+ size_t n = map->size();
+ MetaSlot* slots = map->data();
+ for (size_t j = 0; j < n; ++j) {
+ slots[j].mark = 0;
+ }
+
+ Score score = 0;
+ for (size_t j = 0; j < span; ++j) {
+ MetaSlot& item = slots[shortcut[j]];
+ if (item.mark == 0) {
+ score += item.score;
+ }
+ item.mark++;
+ }
+
+ TextIdx i = 0;
+ TextIdx limit = std::min<TextIdx>(end, CANDIDATE_BUNDLE_SIZE);
+ Score maxScore = 0;
+ for (; i < limit; ++i) {
+ MetaSlot& pick = slots[shortcut[i + span]];
+ if (pick.mark == 0) {
+ score += pick.score;
+ }
+ pick.mark++;
+
+ if (score > maxScore) {
+ maxScore = score;
+ }
+ candidates->push_back({score, i});
+
+ MetaSlot& drop = slots[shortcut[i]];
+ drop.mark--;
+ if (drop.mark == 0) {
+ score -= drop.score;
+ }
+ }
+
+ std::make_heap(candidates->begin(), candidates->end(), greaterScore());
+ Score minScore = candidates->at(0).score;
+ for (; i < end; ++i) {
+ MetaSlot& pick = slots[shortcut[i + span]];
+ if (pick.mark == 0) {
+ score += pick.score;
+ }
+ pick.mark++;
+
+ if (score > maxScore) {
+ maxScore = score;
+ }
+ if (score >= minScore) {
+ candidates->push_back({score, i});
+ std::push_heap(candidates->begin(), candidates->end(), greaterScore());
+ if (candidates->size() > CANDIDATE_BUNDLE_SIZE && maxScore != minScore) {
+ while (candidates->at(0).score == minScore) {
+ std::pop_heap(candidates->begin(), candidates->end(), greaterScore());
+ candidates->pop_back();
+ }
+ minScore = candidates->at(0).score;
+ }
+ }
+
+ MetaSlot& drop = slots[shortcut[i]];
+ drop.mark--;
+ if (drop.mark == 0) {
+ score -= drop.score;
+ }
+ }
+
+ for (size_t j = 0; j < n; ++j) {
+ slots[j].mark = 0;
+ }
+
+ std::make_heap(candidates->begin(), candidates->end(), lessScore());
+ return minScore;
+}
+
+static Score rebuildCandidatesList(std::vector<TextIdx>* candidates,
+ std::vector<MetaSlot>* map, TextIdx span, const TextIdx* shortcut,
+ TextIdx end, TextIdx* next) {
+ size_t n = candidates->size();
+ TextIdx* data = candidates->data();
+ for (size_t i = 0; i < n; ++i) {
+ data[i] = 0;
+ }
+
+ n = map->size();
+ MetaSlot* slots = map->data();
+ for (size_t i = 0; i < n; ++i) {
+ slots[i].mark = 0;
+ }
+
+ Score score = 0;
+ for (TextIdx i = 0; i < span; ++i) {
+ MetaSlot& item = slots[shortcut[i]];
+ if (item.mark == 0) {
+ score += item.score;
+ }
+ item.mark++;
+ }
+
+ Score maxScore = 0;
+ for (TextIdx i = 0; i < end; ++i) {
+ MetaSlot& pick = slots[shortcut[i + span]];
+ if (pick.mark == 0) {
+ score += pick.score;
+ }
+ pick.mark++;
+
+ if (candidates->size() <= score) {
+ candidates->resize(score + 1);
+ }
+ if (score > maxScore) {
+ maxScore = score;
+ }
+ next[i] = candidates->at(score);
+ candidates->at(score) = i;
+
+ MetaSlot& drop = slots[shortcut[i]];
+ drop.mark--;
+ if (drop.mark == 0) {
+ score -= drop.score;
+ }
+ }
+
+ for (size_t i = 0; i < n; ++i) {
+ slots[i].mark = 0;
+ }
+
+ candidates->resize(maxScore + 1);
+ return maxScore;
+}
+
+static void addRange(std::vector<Range>* ranges, TextIdx start, TextIdx end) {
+ for (auto it = ranges->begin(); it != ranges->end();) {
+ if (end < it->start) {
+ ranges->insert(it, {start, end});
+ return;
+ }
+ if (it->end < start) {
+ it++;
+ continue;
+ }
+ // Combine with existing.
+ start = std::min(start, it->start);
+ end = std::max(end, it->end);
+ // Remove consumed vector and continue.
+ it = ranges->erase(it);
+ }
+ ranges->push_back({start, end});
+}
+
+std::string durchschlag_generate(
+ size_t dictionary_size_limit, size_t slice_len, size_t block_len,
+ const std::vector<size_t>& sample_sizes, const uint8_t* sample_data) {
+ DurchschlagContext ctx = durchschlag_prepare(
+ slice_len, sample_sizes, sample_data);
+ return durchschlag_generate(DURCHSCHLAG_COLLABORATIVE,
+ dictionary_size_limit, block_len, ctx, sample_data);
+}
+
+DurchschlagContext durchschlag_prepare(size_t slice_len,
+ const std::vector<size_t>& sample_sizes, const uint8_t* sample_data) {
+ /* Parameters aliasing */
+ TextIdx sliceLen = static_cast<TextIdx>(slice_len);
+ if (sliceLen != slice_len) fatal("slice_len is too large");
+ if (sliceLen < 1) fatal("slice_len is too small");
+ const uint8_t* data = sample_data;
+
+ TextIdx total = 0;
+ std::vector<TextIdx> offsets;
+ offsets.reserve(sample_sizes.size());
+ for (size_t i = 0; i < sample_sizes.size(); ++i) {
+ TextIdx delta = static_cast<TextIdx>(sample_sizes[i]);
+ if (delta != sample_sizes[i]) fatal("sample is too large");
+ if (delta == 0) fatal("0-length samples are prohibited");
+ TextIdx next_total = total + delta;
+ if (next_total <= total) fatal("corpus is too large");
+ total = next_total;
+ offsets.push_back(total);
+ }
+
+ if (total < sliceLen) fatal("slice_len is larger than corpus size");
+ TextIdx end = total - static_cast<TextIdx>(sliceLen) + 1;
+ TextIdx hashLen = 11;
+ while (hashLen < 29 && ((1u << hashLen) < end)) {
+ hashLen += 3;
+ }
+ hashLen -= 3;
+ TextIdx hashMask = (1u << hashLen) - 1u;
+ std::vector<TextIdx> hashHead(1 << hashLen);
+ TextIdx hash = 0;
+ TextIdx lShift = 3;
+ TextIdx rShift = hashLen - lShift;
+ for (TextIdx i = 0; i < sliceLen - 1; ++i) {
+ TextIdx v = data[i];
+ hash = (((hash << lShift) | (hash >> rShift)) & hashMask) ^ v;
+ }
+ TextIdx lShiftX = (lShift * (sliceLen - 1)) % hashLen;
+ TextIdx rShiftX = hashLen - lShiftX;
+
+ std::vector<HashSlot> map;
+ map.push_back({0, 0});
+ TextIdx hashSlot = 1;
+ std::vector<TextIdx> sliceMap;
+ sliceMap.reserve(end);
+ for (TextIdx i = 0; i < end; ++i) {
+ TextIdx v = data[i + sliceLen - 1];
+ TextIdx bucket = (((hash << lShift) | (hash >> rShift)) & hashMask) ^ v;
+ v = data[i];
+ hash = bucket ^ (((v << lShiftX) | (v >> rShiftX)) & hashMask);
+ TextIdx slot = hashHead[bucket];
+ while (slot != 0) {
+ HashSlot& item = map[slot];
+ TextIdx start = item.offset;
+ bool miss = false;
+ for (TextIdx j = 0; j < sliceLen; ++j) {
+ if (data[i + j] != data[start + j]) {
+ miss = true;
+ break;
+ }
+ }
+ if (!miss) {
+ sliceMap.push_back(slot);
+ break;
+ }
+ slot = item.next;
+ }
+ if (slot == 0) {
+ map.push_back({hashHead[bucket], i});
+ hashHead[bucket] = hashSlot;
+ sliceMap.push_back(hashSlot);
+ hashSlot++;
+ }
+ }
+
+ return {total, sliceLen, static_cast<TextIdx>(map.size()),
+ std::move(offsets), std::move(sliceMap)};
+}
+
+DurchschlagContext durchschlag_prepare(size_t slice_len,
+ const std::vector<size_t>& sample_sizes, const DurchschlagIndex& index) {
+ /* Parameters aliasing */
+ TextIdx sliceLen = static_cast<TextIdx>(slice_len);
+ if (sliceLen != slice_len) fatal("slice_len is too large");
+ if (sliceLen < 1) fatal("slice_len is too small");
+ const TextIdx* lcp = index.lcp.data();
+ const TextIdx* sa = index.sa.data();
+
+ TextIdx total = 0;
+ std::vector<TextIdx> offsets;
+ offsets.reserve(sample_sizes.size());
+ for (size_t i = 0; i < sample_sizes.size(); ++i) {
+ TextIdx delta = static_cast<TextIdx>(sample_sizes[i]);
+ if (delta != sample_sizes[i]) fatal("sample is too large");
+ if (delta == 0) fatal("0-length samples are prohibited");
+ TextIdx next_total = total + delta;
+ if (next_total <= total) fatal("corpus is too large");
+ total = next_total;
+ offsets.push_back(total);
+ }
+
+ if (total < sliceLen) fatal("slice_len is larger than corpus size");
+ TextIdx counter = 1;
+ TextIdx end = total - sliceLen + 1;
+ std::vector<TextIdx> sliceMap(total);
+ TextIdx last = 0;
+ TextIdx current = 1;
+ while (current <= total) {
+ if (lcp[current - 1] < sliceLen) {
+ for (TextIdx i = last; i < current; ++i) {
+ sliceMap[sa[i]] = counter;
+ }
+ counter++;
+ last = current;
+ }
+ current++;
+ }
+ sliceMap.resize(end);
+
+ // Reorder items for the better locality.
+ std::vector<TextIdx> reorder(counter);
+ counter = 1;
+ for (TextIdx i = 0; i < end; ++i) {
+ if (reorder[sliceMap[i]] == 0) {
+ reorder[sliceMap[i]] = counter++;
+ }
+ }
+ for (TextIdx i = 0; i < end; ++i) {
+ sliceMap[i] = reorder[sliceMap[i]];
+ }
+
+ return {total, sliceLen, counter, std::move(offsets), std::move(sliceMap)};
+}
+
+DurchschlagIndex durchschlag_index(const std::vector<uint8_t>& data) {
+ TextIdx total = static_cast<TextIdx>(data.size());
+ if (total != data.size()) fatal("corpus is too large");
+ saidx_t saTotal = static_cast<saidx_t>(total);
+ if (saTotal < 0) fatal("corpus is too large");
+ if (static_cast<TextIdx>(saTotal) != total) fatal("corpus is too large");
+ std::vector<TextIdx> sa(total);
+ /* Hopefully, non-negative int32_t values match TextIdx ones. */
+ if (sizeof(TextIdx) != sizeof(int32_t)) fatal("type length mismatch");
+ int32_t* saData = reinterpret_cast<int32_t*>(sa.data());
+ divsufsort(data.data(), saData, saTotal);
+
+ std::vector<TextIdx> isa(total);
+ for (TextIdx i = 0; i < total; ++i) isa[sa[i]] = i;
+
+ // TODO: borrowed -> unknown efficiency.
+ std::vector<TextIdx> lcp(total);
+ TextIdx k = 0;
+ lcp[total - 1] = 0;
+ for (TextIdx i = 0; i < total; ++i) {
+ TextIdx current = isa[i];
+ if (current == total - 1) {
+ k = 0;
+ continue;
+ }
+ TextIdx j = sa[current + 1]; // Suffix which follow i-th suffix.
+ while ((i + k < total) && (j + k < total) && (data[i + k] == data[j + k])) {
+ ++k;
+ }
+ lcp[current] = k;
+ if (k > 0) --k;
+ }
+
+ return {std::move(lcp), std::move(sa)};
+}
+
+static void ScoreSlices(const std::vector<TextIdx>& offsets,
+ std::vector<MetaSlot>& map, const TextIdx* shortcut, TextIdx end) {
+ TextIdx piece = 0;
+ /* Fresh map contains all zeroes -> initial mark should be different. */
+ TextIdx mark = 1;
+ for (TextIdx i = 0; i < end; ++i) {
+ if (offsets[piece] == i) {
+ piece++;
+ mark++;
+ }
+ MetaSlot& item = map[shortcut[i]];
+ if (item.mark != mark) {
+ item.mark = mark;
+ item.score++;
+ }
+ }
+}
+
+static std::string durchschlagGenerateExclusive(
+ size_t dictionary_size_limit, size_t block_len,
+ const DurchschlagContext& context, const uint8_t* sample_data) {
+ /* Parameters aliasing */
+ TextIdx targetSize = static_cast<TextIdx>(dictionary_size_limit);
+ if (targetSize != dictionary_size_limit) {
+ fprintf(stderr, "dictionary_size_limit is too large\n");
+ return "";
+ }
+ TextIdx sliceLen = context.sliceLen;
+ TextIdx total = context.dataSize;
+ TextIdx blockLen = static_cast<TextIdx>(block_len);
+ if (blockLen != block_len) {
+ fprintf(stderr, "block_len is too large\n");
+ return "";
+ }
+ const uint8_t* data = sample_data;
+ const std::vector<TextIdx>& offsets = context.offsets;
+ std::vector<MetaSlot> map(context.numUniqueSlices);
+ const TextIdx* shortcut = context.sliceMap.data();
+
+ /* Initialization */
+ if (blockLen < sliceLen) {
+ fprintf(stderr, "sliceLen is larger than block_len\n");
+ return "";
+ }
+ if (targetSize < blockLen || total < blockLen) {
+ fprintf(stderr, "block_len is too large\n");
+ return "";
+ }
+ TextIdx end = total - sliceLen + 1;
+ ScoreSlices(offsets, map, shortcut, end);
+ end = total - blockLen + 1;
+ std::vector<TextIdx> candidates;
+ std::vector<TextIdx> next(end);
+ TextIdx span = blockLen - sliceLen + 1;
+ Score maxScore = rebuildCandidatesList(
+ &candidates, &map, span, shortcut, end, next.data());
+
+ /* Block selection */
+ const size_t triesLimit = (600 * 1000000) / span;
+ const size_t candidatesLimit = (150 * 1000000) / span;
+ std::vector<Range> ranges;
+ TextIdx mark = 0;
+ size_t numTries = 0;
+ while (true) {
+ TextIdx dictSize = calculateDictionarySize(ranges);
+ size_t numCandidates = 0;
+ if (dictSize > targetSize - blockLen) {
+ break;
+ }
+ if (maxScore == 0) {
+ break;
+ }
+ while (true) {
+ TextIdx candidate = 0;
+ while (maxScore > 0) {
+ if (candidates[maxScore] != 0) {
+ candidate = candidates[maxScore];
+ candidates[maxScore] = next[candidate];
+ break;
+ }
+ maxScore--;
+ }
+ if (maxScore == 0) {
+ break;
+ }
+ mark++;
+ numTries++;
+ numCandidates++;
+ Score score = 0;
+ for (size_t j = candidate; j <= candidate + span; ++j) {
+ MetaSlot& item = map[shortcut[j]];
+ if (item.mark != mark) {
+ score += item.score;
+ item.mark = mark;
+ }
+ }
+ if (score < maxScore) {
+ if (numTries < triesLimit && numCandidates < candidatesLimit) {
+ next[candidate] = candidates[score];
+ candidates[score] = candidate;
+ } else {
+ maxScore = rebuildCandidatesList(
+ &candidates, &map, span, shortcut, end, next.data());
+ mark = 0;
+ numTries = 0;
+ numCandidates = 0;
+ }
+ continue;
+ } else if (score > maxScore) {
+ fprintf(stderr, "Broken invariant\n");
+ return "";
+ }
+ for (TextIdx j = candidate; j <= candidate + span; ++j) {
+ MetaSlot& item = map[shortcut[j]];
+ item.score = 0;
+ }
+ addRange(&ranges, candidate, candidate + blockLen);
+ break;
+ }
+ }
+
+ return createDictionary(data, ranges, targetSize);
+}
+
+static std::string durchschlagGenerateCollaborative(
+ size_t dictionary_size_limit, size_t block_len,
+ const DurchschlagContext& context, const uint8_t* sample_data) {
+ /* Parameters aliasing */
+ TextIdx targetSize = static_cast<TextIdx>(dictionary_size_limit);
+ if (targetSize != dictionary_size_limit) {
+ fprintf(stderr, "dictionary_size_limit is too large\n");
+ return "";
+ }
+ TextIdx sliceLen = context.sliceLen;
+ TextIdx total = context.dataSize;
+ TextIdx blockLen = static_cast<TextIdx>(block_len);
+ if (blockLen != block_len) {
+ fprintf(stderr, "block_len is too large\n");
+ return "";
+ }
+ const uint8_t* data = sample_data;
+ const std::vector<TextIdx>& offsets = context.offsets;
+ std::vector<MetaSlot> map(context.numUniqueSlices);
+ const TextIdx* shortcut = context.sliceMap.data();
+
+ /* Initialization */
+ if (blockLen < sliceLen) {
+ fprintf(stderr, "sliceLen is larger than block_len\n");
+ return "";
+ }
+ if (targetSize < blockLen || total < blockLen) {
+ fprintf(stderr, "block_len is too large\n");
+ return "";
+ }
+ TextIdx end = total - sliceLen + 1;
+ ScoreSlices(offsets, map, shortcut, end);
+ end = total - blockLen + 1;
+ std::vector<Candidate> candidates;
+ candidates.reserve(CANDIDATE_BUNDLE_SIZE + 1024);
+ TextIdx span = blockLen - sliceLen + 1;
+ Score minScore = buildCandidatesList(&candidates, &map, span, shortcut, end);
+
+ /* Block selection */
+ std::vector<Range> ranges;
+ TextIdx mark = 0;
+ while (true) {
+ TextIdx dictSize = calculateDictionarySize(ranges);
+ if (dictSize > targetSize - blockLen) {
+ break;
+ }
+ if (minScore == 0 && candidates.empty()) {
+ break;
+ }
+ while (true) {
+ if (candidates.empty()) {
+ minScore = buildCandidatesList(&candidates, &map, span, shortcut, end);
+ mark = 0;
+ }
+ TextIdx candidate = candidates[0].position;
+ Score expectedScore = candidates[0].score;
+ if (expectedScore == 0) {
+ candidates.resize(0);
+ break;
+ }
+ std::pop_heap(candidates.begin(), candidates.end(), lessScore());
+ candidates.pop_back();
+ mark++;
+ Score score = 0;
+ for (TextIdx j = candidate; j <= candidate + span; ++j) {
+ MetaSlot& item = map[shortcut[j]];
+ if (item.mark != mark) {
+ score += item.score;
+ item.mark = mark;
+ }
+ }
+ if (score < expectedScore) {
+ if (score >= minScore) {
+ candidates.push_back({score, candidate});
+ std::push_heap(candidates.begin(), candidates.end(), lessScore());
+ }
+ continue;
+ } else if (score > expectedScore) {
+ fatal("Broken invariant");
+ }
+ for (TextIdx j = candidate; j <= candidate + span; ++j) {
+ MetaSlot& item = map[shortcut[j]];
+ item.score = 0;
+ }
+ addRange(&ranges, candidate, candidate + blockLen);
+ break;
+ }
+ }
+
+ return createDictionary(data, ranges, targetSize);
+}
+
+std::string durchschlag_generate(DurchschalgResourceStrategy strategy,
+ size_t dictionary_size_limit, size_t block_len,
+ const DurchschlagContext& context, const uint8_t* sample_data) {
+ if (strategy == DURCHSCHLAG_COLLABORATIVE) {
+ return durchschlagGenerateCollaborative(
+ dictionary_size_limit, block_len, context, sample_data);
+ } else {
+ return durchschlagGenerateExclusive(
+ dictionary_size_limit, block_len, context, sample_data);
+ }
+}
+
+void durchschlag_distill(size_t slice_len, size_t minimum_population,
+ std::vector<size_t>* sample_sizes, uint8_t* sample_data) {
+ /* Parameters aliasing */
+ uint8_t* data = sample_data;
+
+ /* Build slice map. */
+ DurchschlagContext context = durchschlag_prepare(
+ slice_len, *sample_sizes, data);
+
+ /* Calculate slice population. */
+ const std::vector<TextIdx>& offsets = context.offsets;
+ std::vector<MetaSlot> map(context.numUniqueSlices);
+ const TextIdx* shortcut = context.sliceMap.data();
+ TextIdx sliceLen = context.sliceLen;
+ TextIdx total = context.dataSize;
+ TextIdx end = total - sliceLen + 1;
+ ScoreSlices(offsets, map, shortcut, end);
+
+ /* Condense samples, omitting unique slices. */
+ TextIdx readPos = 0;
+ TextIdx writePos = 0;
+ TextIdx lastNonUniquePos = 0;
+ for (TextIdx i = 0; i < sample_sizes->size(); ++i) {
+ TextIdx sampleStart = writePos;
+ TextIdx oldSampleEnd =
+ readPos + static_cast<TextIdx>(sample_sizes->at(i));
+ while (readPos < oldSampleEnd) {
+ if (readPos < end) {
+ MetaSlot& item = map[shortcut[readPos]];
+ if (item.score >= minimum_population) {
+ lastNonUniquePos = readPos + sliceLen;
+ }
+ }
+ if (readPos < lastNonUniquePos) {
+ data[writePos++] = data[readPos];
+ }
+ readPos++;
+ }
+ sample_sizes->at(i) = writePos - sampleStart;
+ }
+}
+
+void durchschlag_purify(size_t slice_len, size_t minimum_population,
+ const std::vector<size_t>& sample_sizes, uint8_t* sample_data) {
+ /* Parameters aliasing */
+ uint8_t* data = sample_data;
+
+ /* Build slice map. */
+ DurchschlagContext context = durchschlag_prepare(
+ slice_len, sample_sizes, data);
+
+ /* Calculate slice population. */
+ const std::vector<TextIdx>& offsets = context.offsets;
+ std::vector<MetaSlot> map(context.numUniqueSlices);
+ const TextIdx* shortcut = context.sliceMap.data();
+ TextIdx sliceLen = context.sliceLen;
+ TextIdx total = context.dataSize;
+ TextIdx end = total - sliceLen + 1;
+ ScoreSlices(offsets, map, shortcut, end);
+
+ /* Rewrite samples, zeroing out unique slices. */
+ TextIdx lastNonUniquePos = 0;
+ for (TextIdx readPos = 0; readPos < total; ++readPos) {
+ if (readPos < end) {
+ MetaSlot& item = map[shortcut[readPos]];
+ if (item.score >= minimum_population) {
+ lastNonUniquePos = readPos + sliceLen;
+ }
+ }
+ if (readPos >= lastNonUniquePos) {
+ data[readPos] = 0;
+ }
+ }
+}
diff --git a/research/durchschlag.h b/research/durchschlag.h
new file mode 100755
index 0000000..adbc531
--- /dev/null
+++ b/research/durchschlag.h
@@ -0,0 +1,99 @@
+#ifndef BROTLI_RESEARCH_DURCHSCHLAG_H_
+#define BROTLI_RESEARCH_DURCHSCHLAG_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+
+/**
+ * Generate a dictionary for given samples.
+ *
+ * @param dictionary_size_limit maximal dictionary size
+ * @param slice_len text slice size
+ * @param block_len score block length
+ * @param sample_sizes vector with sample sizes
+ * @param sample_data concatenated samples
+ * @return generated dictionary
+ */
+std::string durchschlag_generate(
+ size_t dictionary_size_limit, size_t slice_len, size_t block_len,
+ const std::vector<size_t>& sample_sizes, const uint8_t* sample_data);
+
+//------------------------------------------------------------------------------
+// Lower level API for repetitive dictionary generation.
+//------------------------------------------------------------------------------
+
+/* Pointer to position in text. */
+typedef uint32_t DurchschlagTextIdx;
+
+/* Context is made public for flexible serialization / deserialization. */
+typedef struct DurchschlagContext {
+ DurchschlagTextIdx dataSize;
+ DurchschlagTextIdx sliceLen;
+ DurchschlagTextIdx numUniqueSlices;
+ std::vector<DurchschlagTextIdx> offsets;
+ std::vector<DurchschlagTextIdx> sliceMap;
+} DurchschlagContext;
+
+DurchschlagContext durchschlag_prepare(size_t slice_len,
+ const std::vector<size_t>& sample_sizes, const uint8_t* sample_data);
+
+typedef enum DurchschalgResourceStrategy {
+ // Faster
+ DURCHSCHLAG_EXCLUSIVE = 0,
+ // Uses much less memory
+ DURCHSCHLAG_COLLABORATIVE = 1
+} DurchschalgResourceStrategy;
+
+std::string durchschlag_generate(DurchschalgResourceStrategy strategy,
+ size_t dictionary_size_limit, size_t block_len,
+ const DurchschlagContext& context, const uint8_t* sample_data);
+
+//------------------------------------------------------------------------------
+// Suffix Array based preparation.
+//------------------------------------------------------------------------------
+
+typedef struct DurchschlagIndex {
+ std::vector<DurchschlagTextIdx> lcp;
+ std::vector<DurchschlagTextIdx> sa;
+} DurchschlagIndex;
+
+DurchschlagIndex durchschlag_index(const std::vector<uint8_t>& data);
+
+DurchschlagContext durchschlag_prepare(size_t slice_len,
+ const std::vector<size_t>& sample_sizes, const DurchschlagIndex& index);
+
+//------------------------------------------------------------------------------
+// Data preparation.
+//------------------------------------------------------------------------------
+
+/**
+ * Cut out unique slices.
+ *
+ * Both @p sample_sizes and @p sample_data are modified in-place. Number of
+ * samples remains unchanged, but some samples become shorter.
+ *
+ * @param slice_len (unique) slice size
+ * @param minimum_population minimum non-unique slice occurrence
+ * @param sample_sizes [in / out] vector with sample sizes
+ * @param sample_data [in / out] concatenated samples
+ */
+void durchschlag_distill(size_t slice_len, size_t minimum_population,
+ std::vector<size_t>* sample_sizes, uint8_t* sample_data);
+
+/**
+ * Replace unique slices with zeroes.
+ *
+ * @p sample_data is modified in-place. Number of samples and their length
+ * remain unchanged.
+ *
+ * @param slice_len (unique) slice size
+ * @param minimum_population minimum non-unique slice occurrence
+ * @param sample_sizes vector with sample sizes
+ * @param sample_data [in / out] concatenated samples
+ */
+void durchschlag_purify(size_t slice_len, size_t minimum_population,
+ const std::vector<size_t>& sample_sizes, uint8_t* sample_data);
+
+#endif // BROTLI_RESEARCH_DURCHSCHLAG_H_
diff --git a/research/libdivsufsort b/research/libdivsufsort
new file mode 160000
+Subproject 5f60d6f026c30fb4ac296f696b3c8b0eb71bd42
diff --git a/research/sieve.cc b/research/sieve.cc
index fbc1dbf..4d147e1 100755
--- a/research/sieve.cc
+++ b/research/sieve.cc
@@ -1,19 +1,27 @@
#include "./sieve.h"
+/* Pointer to position in (combined corpus) text. */
+typedef uint32_t TextIdx;
+
+/* Index of sample / generation. */
+typedef uint16_t SampleIdx;
+
typedef struct Slot {
- uint32_t next;
- uint32_t offset;
- uint16_t presence;
- uint16_t mark;
+ TextIdx next;
+ TextIdx offset;
+ SampleIdx presence;
+ SampleIdx mark;
} Slot;
-static size_t dryRun(size_t sliceLen, Slot* map, uint32_t* shortcut, size_t end,
- size_t middle, uint16_t minPresence, uint16_t iteration) {
- int from = -2;
- int to = -1;
- size_t result = 0;
- uint16_t targetPresence = minPresence;
- for (uint32_t i = 0; i < end; ++i) {
+static const TextIdx kNowhere = static_cast<TextIdx>(-1);
+
+static TextIdx dryRun(TextIdx sliceLen, Slot* map, TextIdx* shortcut,
+ TextIdx end, TextIdx middle, SampleIdx minPresence, SampleIdx iteration) {
+ TextIdx from = kNowhere;
+ TextIdx to = kNowhere;
+ TextIdx result = 0;
+ SampleIdx targetPresence = minPresence;
+ for (TextIdx i = 0; i < end; ++i) {
if (i == middle) {
targetPresence++;
}
@@ -21,8 +29,8 @@ static size_t dryRun(size_t sliceLen, Slot* map, uint32_t* shortcut, size_t end,
if (item.mark != iteration) {
item.mark = iteration;
if (item.presence >= targetPresence) {
- if (to < i) {
- if (from > 0) {
+ if ((to == kNowhere) || (to < i)) {
+ if (from != kNowhere) {
result += to - from;
}
from = i;
@@ -31,20 +39,20 @@ static size_t dryRun(size_t sliceLen, Slot* map, uint32_t* shortcut, size_t end,
}
}
}
- if (from > 0) {
+ if (from != kNowhere) {
result += to - from;
}
return result;
}
-static std::string createDictionary(const uint8_t* data, size_t sliceLen,
- Slot* map, uint32_t* shortcut, size_t end, size_t middle,
- uint16_t minPresence, uint16_t iteration) {
+static std::string createDictionary(const uint8_t* data, TextIdx sliceLen,
+ Slot* map, TextIdx* shortcut, TextIdx end, TextIdx middle,
+ SampleIdx minPresence, SampleIdx iteration) {
std::string output;
- int from = -2;
- int to = -1;
- uint16_t targetPresence = minPresence;
- for (uint32_t i = 0; i < end; ++i) {
+ TextIdx from = kNowhere;
+ TextIdx to = kNowhere;
+ SampleIdx targetPresence = minPresence;
+ for (TextIdx i = 0; i < end; ++i) {
if (i == middle) {
targetPresence++;
}
@@ -52,8 +60,8 @@ static std::string createDictionary(const uint8_t* data, size_t sliceLen,
if (item.mark != iteration) {
item.mark = iteration;
if (item.presence >= targetPresence) {
- if (to < i) {
- if (from > 0) {
+ if ((to == kNowhere) || (to < i)) {
+ if (from != kNowhere) {
output.insert(output.end(), &data[from], &data[to]);
}
from = i;
@@ -62,7 +70,7 @@ static std::string createDictionary(const uint8_t* data, size_t sliceLen,
}
}
}
- if (from > 0) {
+ if (from != kNowhere) {
output.insert(output.end(), &data[from], &data[to]);
}
return output;
@@ -71,55 +79,95 @@ static std::string createDictionary(const uint8_t* data, size_t sliceLen,
std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len,
const std::vector<size_t>& sample_sizes, const uint8_t* sample_data) {
/* Parameters aliasing */
- size_t targetSize = dictionary_size_limit;
- size_t sliceLen = slice_len;
+ TextIdx targetSize = static_cast<TextIdx>(dictionary_size_limit);
+ if (targetSize != dictionary_size_limit) {
+ fprintf(stderr, "dictionary_size_limit is too large\n");
+ return "";
+ }
+ TextIdx sliceLen = static_cast<TextIdx>(slice_len);
+ if (sliceLen != slice_len) {
+ fprintf(stderr, "slice_len is too large\n");
+ return "";
+ }
+ if (sliceLen < 1) {
+ fprintf(stderr, "slice_len is too small\n");
+ return "";
+ }
+ SampleIdx numSamples = static_cast<SampleIdx>(sample_sizes.size());
+ if ((numSamples != sample_sizes.size()) || (numSamples * 2 < numSamples)) {
+ fprintf(stderr, "too many samples\n");
+ return "";
+ }
const uint8_t* data = sample_data;
- size_t total = 0;
- std::vector<size_t> offsets;
- for (size_t i = 0; i < sample_sizes.size(); ++i) {
- total += sample_sizes[i];
+ TextIdx total = 0;
+ std::vector<TextIdx> offsets;
+ for (SampleIdx i = 0; i < numSamples; ++i) {
+ TextIdx delta = static_cast<TextIdx>(sample_sizes[i]);
+ if (delta != sample_sizes[i]) {
+ fprintf(stderr, "sample is too large\n");
+ return "";
+ }
+ if (delta == 0) {
+ fprintf(stderr, "empty samples are prohibited\n");
+ return "";
+ }
+ if (total + delta <= total) {
+ fprintf(stderr, "corpus is too large\n");
+ return "";
+ }
+ total += delta;
offsets.push_back(total);
}
+ if (total * 2 < total) {
+ fprintf(stderr, "corpus is too large\n");
+ return "";
+ }
+
+ if (total < sliceLen) {
+ fprintf(stderr, "slice_len is larger than corpus size\n");
+ return "";
+ }
+
/*****************************************************************************
* Build coverage map.
****************************************************************************/
std::vector<Slot> map;
- std::vector<uint32_t> shortcut;
+ std::vector<TextIdx> shortcut;
map.push_back({0, 0, 0, 0});
- size_t end = total - sliceLen;
- int hashLen = 8;
- while ((1 << hashLen) < end) {
+ TextIdx end = total - sliceLen;
+ TextIdx hashLen = 11;
+ while (hashLen < 29 && ((1u << hashLen) < end)) {
hashLen += 3;
}
hashLen -= 3;
- uint32_t hashMask = (1u << hashLen) - 1u;
- std::vector<uint32_t> hashHead(1 << hashLen);
- uint32_t hashSlot = 1;
- uint16_t piece = 0;
- uint32_t hash = 0;
- int lShift = 3;
- int rShift = hashLen - lShift;
- for (int i = 0; i < sliceLen - 1; ++i) {
- uint32_t v = data[i];
+ TextIdx hashMask = (1u << hashLen) - 1u;
+ std::vector<TextIdx> hashHead(1 << hashLen);
+ TextIdx hashSlot = 1;
+ SampleIdx piece = 0;
+ TextIdx hash = 0;
+ TextIdx lShift = 3;
+ TextIdx rShift = hashLen - lShift;
+ for (TextIdx i = 0; i < sliceLen - 1; ++i) {
+ TextIdx v = data[i];
hash = (((hash << lShift) | (hash >> rShift)) & hashMask) ^ v;
}
- int lShiftX = (lShift * (sliceLen - 1)) % hashLen;
- int rShiftX = hashLen - lShiftX;
- for (uint32_t i = 0; i < end; ++i) {
- uint32_t v = data[i + sliceLen - 1];
+ TextIdx lShiftX = (lShift * (sliceLen - 1)) % hashLen;
+ TextIdx rShiftX = hashLen - lShiftX;
+ for (TextIdx i = 0; i < end; ++i) {
+ TextIdx v = data[i + sliceLen - 1];
hash = (((hash << lShift) | (hash >> rShift)) & hashMask) ^ v;
if (offsets[piece] == i) {
piece++;
}
- uint32_t slot = hashHead[hash];
+ TextIdx slot = hashHead[hash];
while (slot != 0) {
Slot& item = map[slot];
- int start = item.offset;
+ TextIdx start = item.offset;
bool miss = false;
- for (size_t j = 0; j < sliceLen; ++j) {
+ for (TextIdx j = 0; j < sliceLen; ++j) {
if (data[i + j] != data[start + j]) {
miss = true;
break;
@@ -148,8 +196,8 @@ std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len,
/*****************************************************************************
* Build dictionary of specified size.
****************************************************************************/
- size_t a = 1;
- size_t size = dryRun(
+ SampleIdx a = 1;
+ TextIdx size = dryRun(
sliceLen, map.data(), shortcut.data(), end, end, a, ++piece);
/* Maximal output is smaller than target. */
if (size <= targetSize) {
@@ -157,7 +205,7 @@ std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len,
data, sliceLen, map.data(), shortcut.data(), end, end, a, ++piece);
}
- size_t b = offsets.size();
+ SampleIdx b = numSamples;
size = dryRun(sliceLen, map.data(), shortcut.data(), end, end, b, ++piece);
if (size == targetSize) {
return createDictionary(
@@ -167,7 +215,7 @@ std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len,
if (size < targetSize) {
/* size(a) > targetSize > size(b) && a < m < b */
while (a + 1 < b) {
- size_t m = (a + b) / 2;
+ SampleIdx m = static_cast<SampleIdx>((a + b) / 2);
size = dryRun(
sliceLen, map.data(), shortcut.data(), end, end, m, ++piece);
if (size < targetSize) {
@@ -183,18 +231,18 @@ std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len,
a = b;
}
/* size(minPresence) > targetSize > size(minPresence + 1) */
- size_t minPresence = a;
- a = 0;
- b = end;
+ SampleIdx minPresence = a;
+ TextIdx c = 0;
+ TextIdx d = end;
/* size(a) < targetSize < size(b) && a < m < b */
- while (a + 1 < b) {
- size_t m = (a + b) / 2;
+ while (c + 1 < d) {
+ TextIdx m = (c + d) / 2;
size = dryRun(
sliceLen, map.data(), shortcut.data(), end, m, minPresence, ++piece);
if (size < targetSize) {
- a = m;
+ c = m;
} else if (size > targetSize) {
- b = m;
+ d = m;
} else {
return createDictionary(data, sliceLen, map.data(), shortcut.data(), end,
m, minPresence, ++piece);
@@ -204,8 +252,8 @@ std::string sieve_generate(size_t dictionary_size_limit, size_t slice_len,
bool unrestricted = false;
if (minPresence <= 2 && !unrestricted) {
minPresence = 2;
- a = end;
+ c = end;
}
- return createDictionary(data, sliceLen, map.data(), shortcut.data(), end, a,
+ return createDictionary(data, sliceLen, map.data(), shortcut.data(), end, c,
minPresence, ++piece);
}
diff --git a/research/sieve.h b/research/sieve.h
index 2aae669..6c65dc8 100755
--- a/research/sieve.h
+++ b/research/sieve.h
@@ -1,9 +1,8 @@
#ifndef BROTLI_RESEARCH_SIEVE_H_
#define BROTLI_RESEARCH_SIEVE_H_
-#include <stddef.h>
-#include <stdint.h>
-
+#include <cstddef>
+#include <cstdint>
#include <string>
#include <vector>
diff --git a/scripts/sources.lst b/scripts/sources.lst
index cd61a7f..cdddb37 100644
--- a/scripts/sources.lst
+++ b/scripts/sources.lst
@@ -5,11 +5,15 @@ BROTLI_CLI_C = \
c/tools/brotli.c
BROTLI_COMMON_C = \
- c/common/dictionary.c
+ c/common/dictionary.c \
+ c/common/transform.c
BROTLI_COMMON_H = \
c/common/constants.h \
+ c/common/context.h \
c/common/dictionary.h \
+ c/common/platform.h \
+ c/common/transform.h \
c/common/version.h
BROTLI_DEC_C = \
@@ -20,12 +24,9 @@ BROTLI_DEC_C = \
BROTLI_DEC_H = \
c/dec/bit_reader.h \
- c/dec/context.h \
c/dec/huffman.h \
- c/dec/port.h \
c/dec/prefix.h \
- c/dec/state.h \
- c/dec/transform.h
+ c/dec/state.h
BROTLI_ENC_C = \
c/enc/backward_references.c \
@@ -38,6 +39,7 @@ BROTLI_ENC_C = \
c/enc/compress_fragment_two_pass.c \
c/enc/dictionary_hash.c \
c/enc/encode.c \
+ c/enc/encoder_dict.c \
c/enc/entropy_encode.c \
c/enc/histogram.c \
c/enc/literal_cost.c \
@@ -61,14 +63,14 @@ BROTLI_ENC_H = \
c/enc/command.h \
c/enc/compress_fragment.h \
c/enc/compress_fragment_two_pass.h \
- c/enc/context.h \
c/enc/dictionary_hash.h \
+ c/enc/encoder_dict.h \
c/enc/entropy_encode.h \
c/enc/entropy_encode_static.h \
c/enc/fast_log.h \
c/enc/find_match_length.h \
- c/enc/hash_forgetful_chain_inc.h \
c/enc/hash.h \
+ c/enc/hash_forgetful_chain_inc.h \
c/enc/hash_longest_match64_inc.h \
c/enc/hash_longest_match_inc.h \
c/enc/hash_longest_match_quickly_inc.h \
@@ -79,7 +81,6 @@ BROTLI_ENC_H = \
c/enc/memory.h \
c/enc/metablock.h \
c/enc/metablock_inc.h \
- c/enc/port.h \
c/enc/prefix.h \
c/enc/quality.h \
c/enc/ringbuffer.h \
diff --git a/setup.py b/setup.py
index a8a2ebe..d8478b3 100644
--- a/setup.py
+++ b/setup.py
@@ -182,6 +182,7 @@ EXT_MODULES = [
sources=[
'python/_brotli.cc',
'c/common/dictionary.c',
+ 'c/common/transform.c',
'c/dec/bit_reader.c',
'c/dec/decode.c',
'c/dec/huffman.c',
@@ -196,6 +197,7 @@ EXT_MODULES = [
'c/enc/compress_fragment_two_pass.c',
'c/enc/dictionary_hash.c',
'c/enc/encode.c',
+ 'c/enc/encoder_dict.c',
'c/enc/entropy_encode.c',
'c/enc/histogram.c',
'c/enc/literal_cost.c',
@@ -206,15 +208,15 @@ EXT_MODULES = [
],
depends=[
'c/common/constants.h',
+ 'c/common/context.h',
'c/common/dictionary.h',
+ 'c/common/platform.h',
+ 'c/common/transform.h',
'c/common/version.h',
'c/dec/bit_reader.h',
- 'c/dec/context.h',
'c/dec/huffman.h',
- 'c/dec/port.h',
'c/dec/prefix.h',
'c/dec/state.h',
- 'c/dec/transform.h',
'c/enc/backward_references.h',
'c/enc/backward_references_hq.h',
'c/enc/backward_references_inc.h',
@@ -229,8 +231,8 @@ EXT_MODULES = [
'c/enc/command.h',
'c/enc/compress_fragment.h',
'c/enc/compress_fragment_two_pass.h',
- 'c/enc/context.h',
'c/enc/dictionary_hash.h',
+ 'c/enc/encoder_dict.h',
'c/enc/entropy_encode.h',
'c/enc/entropy_encode_static.h',
'c/enc/fast_log.h',
@@ -247,7 +249,6 @@ EXT_MODULES = [
'c/enc/memory.h',
'c/enc/metablock.h',
'c/enc/metablock_inc.h',
- 'c/enc/port.h',
'c/enc/prefix.h',
'c/enc/quality.h',
'c/enc/ringbuffer.h',