diff options
Diffstat (limited to 'c')
61 files changed, 1778 insertions, 1523 deletions
diff --git a/c/common/constants.h b/c/common/constants.h index 416ec55..26edcd5 100644 --- a/c/common/constants.h +++ b/c/common/constants.h @@ -28,18 +28,25 @@ /* "code length of 8 is repeated" */ #define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8 +/* "Large Window Brotli" */ +#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U +#define BROTLI_LARGE_MIN_WBITS 10 +#define BROTLI_LARGE_MAX_WBITS 30 + /* Specification: 4. Encoding of distances */ #define BROTLI_NUM_DISTANCE_SHORT_CODES 16 #define BROTLI_MAX_NPOSTFIX 3 #define BROTLI_MAX_NDIRECT 120 #define BROTLI_MAX_DISTANCE_BITS 24U -/* BROTLI_NUM_DISTANCE_SYMBOLS == 520 */ -#define BROTLI_NUM_DISTANCE_SYMBOLS (BROTLI_NUM_DISTANCE_SHORT_CODES + \ - BROTLI_MAX_NDIRECT + \ - (BROTLI_MAX_DISTANCE_BITS << \ - (BROTLI_MAX_NPOSTFIX + 1))) -/* Distance that is guaranteed to be representable in any stream. */ +#define BROTLI_DISTANCE_ALPHABET_SIZE(NDIRECT, NPOSTFIX, MAXNBITS) ( \ + BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) + \ + ((MAXNBITS) << ((NPOSTFIX) + 1))) +/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */ +#define BROTLI_NUM_DISTANCE_SYMBOLS \ + BROTLI_DISTANCE_ALPHABET_SIZE( \ + BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS) #define BROTLI_MAX_DISTANCE 0x3FFFFFC +#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC /* 7.1. Context modes and context ID lookup for literals */ /* "context IDs for literals are in the range of 0..63" */ diff --git a/c/dec/context.h b/c/common/context.h index 9402cbe..24b3eb4 100644..100755 --- a/c/dec/context.h +++ b/c/common/context.h @@ -6,110 +6,171 @@ /* Lookup table to map the previous two bytes to a context id. - There are four different context modeling modes defined here: - CONTEXT_LSB6: context id is the least significant 6 bits of the last byte, - CONTEXT_MSB6: context id is the most significant 6 bits of the last byte, - CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text, - CONTEXT_SIGNED: second-order context model tuned for signed integers. - - The context id for the UTF8 context model is calculated as follows. If p1 - and p2 are the previous two bytes, we calculate the context as - - context = kContextLookup[p1] | kContextLookup[p2 + 256]. - - If the previous two bytes are ASCII characters (i.e. < 128), this will be - equivalent to - - context = 4 * context1(p1) + context2(p2), - - where context1 is based on the previous byte in the following way: - - 0 : non-ASCII control - 1 : \t, \n, \r - 2 : space - 3 : other punctuation - 4 : " ' - 5 : % - 6 : ( < [ { - 7 : ) > ] } - 8 : , ; : - 9 : . - 10 : = - 11 : number - 12 : upper-case vowel - 13 : upper-case consonant - 14 : lower-case vowel - 15 : lower-case consonant - - and context2 is based on the second last byte: - - 0 : control, space - 1 : punctuation - 2 : upper-case letter, number - 3 : lower-case letter - - If the last byte is ASCII, and the second last byte is not (in a valid UTF8 - stream it will be a continuation byte, value between 128 and 191), the - context is the same as if the second last byte was an ASCII control or space. - - If the last byte is a UTF8 lead byte (value >= 192), then the next byte will - be a continuation byte and the context id is 2 or 3 depending on the LSB of - the last byte and to a lesser extent on the second last byte if it is ASCII. - - If the last byte is a UTF8 continuation byte, the second last byte can be: - - continuation byte: the next byte is probably ASCII or lead byte (assuming - 4-byte UTF8 characters are rare) and the context id is 0 or 1. - - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 - - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 - - The possible value combinations of the previous two bytes, the range of - context ids and the type of the next byte is summarized in the table below: - - |--------\-----------------------------------------------------------------| - | \ Last byte | - | Second \---------------------------------------------------------------| - | last byte \ ASCII | cont. byte | lead byte | - | \ (0-127) | (128-191) | (192-) | - |=============|===================|=====================|==================| - | ASCII | next: ASCII/lead | not valid | next: cont. | - | (0-127) | context: 4 - 63 | | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | - | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: ASCII/lead | not valid | - | (192-207) | | context: 0 - 1 | | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: cont. | not valid | - | (208-) | | context: 2 - 3 | | - |-------------|-------------------|---------------------|------------------| - - The context id for the signed context mode is calculated as: - - context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2]. - - For any context modeling modes, the context ids can be calculated by |-ing - together two lookups from one table using context model dependent offsets: - - context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2]. - - where offset1 and offset2 are dependent on the context mode. + There are four different context modeling modes defined here: + CONTEXT_LSB6: context id is the least significant 6 bits of the last byte, + CONTEXT_MSB6: context id is the most significant 6 bits of the last byte, + CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text, + CONTEXT_SIGNED: second-order context model tuned for signed integers. + + If |p1| and |p2| are the previous two bytes, and |mode| is current context + mode, we calculate the context as: + + context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256]. + + For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters + (i.e. < 128), this will be equivalent to + + context = 4 * context1(p1) + context2(p2), + + where context1 is based on the previous byte in the following way: + + 0 : non-ASCII control + 1 : \t, \n, \r + 2 : space + 3 : other punctuation + 4 : " ' + 5 : % + 6 : ( < [ { + 7 : ) > ] } + 8 : , ; : + 9 : . + 10 : = + 11 : number + 12 : upper-case vowel + 13 : upper-case consonant + 14 : lower-case vowel + 15 : lower-case consonant + + and context2 is based on the second last byte: + + 0 : control, space + 1 : punctuation + 2 : upper-case letter, number + 3 : lower-case letter + + If the last byte is ASCII, and the second last byte is not (in a valid UTF8 + stream it will be a continuation byte, value between 128 and 191), the + context is the same as if the second last byte was an ASCII control or space. + + If the last byte is a UTF8 lead byte (value >= 192), then the next byte will + be a continuation byte and the context id is 2 or 3 depending on the LSB of + the last byte and to a lesser extent on the second last byte if it is ASCII. + + If the last byte is a UTF8 continuation byte, the second last byte can be: + - continuation byte: the next byte is probably ASCII or lead byte (assuming + 4-byte UTF8 characters are rare) and the context id is 0 or 1. + - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 + - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 + + The possible value combinations of the previous two bytes, the range of + context ids and the type of the next byte is summarized in the table below: + + |--------\-----------------------------------------------------------------| + | \ Last byte | + | Second \---------------------------------------------------------------| + | last byte \ ASCII | cont. byte | lead byte | + | \ (0-127) | (128-191) | (192-) | + |=============|===================|=====================|==================| + | ASCII | next: ASCII/lead | not valid | next: cont. | + | (0-127) | context: 4 - 63 | | context: 2 - 3 | + |-------------|-------------------|---------------------|------------------| + | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | + | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | + |-------------|-------------------|---------------------|------------------| + | lead byte | not valid | next: ASCII/lead | not valid | + | (192-207) | | context: 0 - 1 | | + |-------------|-------------------|---------------------|------------------| + | lead byte | not valid | next: cont. | not valid | + | (208-) | | context: 2 - 3 | | + |-------------|-------------------|---------------------|------------------| */ -#ifndef BROTLI_DEC_CONTEXT_H_ -#define BROTLI_DEC_CONTEXT_H_ +#ifndef BROTLI_COMMON_CONTEXT_H_ +#define BROTLI_COMMON_CONTEXT_H_ #include <brotli/types.h> -enum ContextType { +typedef enum ContextType { CONTEXT_LSB6 = 0, CONTEXT_MSB6 = 1, CONTEXT_UTF8 = 2, CONTEXT_SIGNED = 3 -}; +} ContextType; /* Common context lookup table for all context modes. */ -static const uint8_t kContextLookup[1792] = { +static const uint8_t kContextLookup[2048] = { + /* CONTEXT_LSB6, last byte. */ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + + /* CONTEXT_LSB6, second last byte, */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + /* CONTEXT_MSB6, last byte. */ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, + 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, + 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, + 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, + 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, + 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, + 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, + 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, + 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, + 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, + 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, + 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, + 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, + + /* CONTEXT_MSB6, second last byte, */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* CONTEXT_UTF8, last byte. */ /* ASCII range. */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, @@ -130,6 +191,7 @@ static const uint8_t kContextLookup[1792] = { 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + /* CONTEXT_UTF8 second last byte. */ /* ASCII range. */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -150,23 +212,7 @@ static const uint8_t kContextLookup[1792] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - /* CONTEXT_SIGNED, second last byte. */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, + /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */ 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, @@ -184,68 +230,32 @@ static const uint8_t kContextLookup[1792] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56, - /* CONTEXT_LSB6, last byte. */ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - /* CONTEXT_MSB6, last byte. */ - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, - 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, - 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, - 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, - 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, - 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, - 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, - 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, - 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, - 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, - 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, - 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, - 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, - /* CONTEXT_{M,L}SB6, second last byte, */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; -static const int kContextLookupOffsets[8] = { - /* CONTEXT_LSB6 */ - 1024, 1536, - /* CONTEXT_MSB6 */ - 1280, 1536, - /* CONTEXT_UTF8 */ - 0, 256, - /* CONTEXT_SIGNED */ - 768, 512, + /* CONTEXT_SIGNED, second last byte. */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, }; -#endif /* BROTLI_DEC_CONTEXT_H_ */ +typedef const uint8_t* ContextLut; + +/* typeof(MODE) == ContextType; returns ContextLut */ +#define BROTLI_CONTEXT_LUT(MODE) (&kContextLookup[(MODE) << 9]) + +/* typeof(LUT) == ContextLut */ +#define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2]) + +#endif /* BROTLI_COMMON_CONTEXT_H_ */ diff --git a/c/common/dictionary.bin.br b/c/common/dictionary.bin.br Binary files differnew file mode 100755 index 0000000..6a55d42 --- /dev/null +++ b/c/common/dictionary.bin.br diff --git a/c/common/platform.h b/c/common/platform.h index 804fd25..d6fd3ee 100755 --- a/c/common/platform.h +++ b/c/common/platform.h @@ -10,6 +10,7 @@ #define BROTLI_COMMON_PLATFORM_H_ #include <string.h> /* memcpy */ +#include <stdlib.h> /* malloc, free */ #include <brotli/port.h> #include <brotli/types.h> @@ -204,7 +205,7 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { memcpy(p, &v, sizeof v); } -#else /* BROTLI_ALIGNED_READ */ +#else /* BROTLI_ALIGNED_READ */ /* Unaligned memory access is allowed: just cast pointer to requested type. */ static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) { return *(const uint16_t*)p; @@ -218,7 +219,7 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { *(uint64_t*)p = v; } -#endif /* BROTLI_ALIGNED_READ */ +#endif /* BROTLI_ALIGNED_READ */ #if BROTLI_LITTLE_ENDIAN /* Straight endianness. Just read / write values. */ @@ -390,6 +391,18 @@ BROTLI_MIN_MAX(size_t) BROTLI_MIN_MAX(uint32_t) BROTLI_MIN_MAX(uint8_t) (A)[(J)] = __brotli_swap_tmp; \ } +/* Default brotli_alloc_func */ +static void* BrotliDefaultAllocFunc(void* opaque, size_t size) { + BROTLI_UNUSED(opaque); + return malloc(size); +} + +/* Default brotli_free_func */ +static void BrotliDefaultFreeFunc(void* opaque, void* address) { + BROTLI_UNUSED(opaque); + free(address); +} + BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) { BROTLI_UNUSED(BrotliSuppressUnusedFunctions); BROTLI_UNUSED(BrotliUnalignedRead16); @@ -413,6 +426,11 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) { BROTLI_UNUSED(brotli_max_uint32_t); BROTLI_UNUSED(brotli_min_uint8_t); BROTLI_UNUSED(brotli_max_uint8_t); + BROTLI_UNUSED(BrotliDefaultAllocFunc); + BROTLI_UNUSED(BrotliDefaultFreeFunc); +#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG) + BROTLI_UNUSED(BrotliDump); +#endif } #endif /* BROTLI_COMMON_PLATFORM_H_ */ diff --git a/c/common/transform.c b/c/common/transform.c new file mode 100755 index 0000000..53fe4f6 --- /dev/null +++ b/c/common/transform.c @@ -0,0 +1,236 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#include "./platform.h" +#include "./transform.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/* RFC 7932 transforms string data */ +static const char kPrefixSuffix[217] = + "\1 \2, \10 of the \4 of \2s \1.\5 and \4 " +/* 0x _0 _2 __5 _E _3 _6 _8 _E */ + "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 " +/* 2x _3_ _5 _A_ _D_ _F _2 _4 _A _E */ + "that \1\'\6 with \6 from \4 by \1(\6. T" +/* 4x _5_ _7 _E _5 _A _C */ + "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed " +/* 6x _3 _8 _D _2 _7_ _ _A _C */ + "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5" +/* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */ + " not \3er \3al \4ful \4ive \5less \4es" +/* Ax _5 _9 _D _2 _7 _D */ + "t \4ize \2\xc2\xa0\4ous \5 the \2e \0"; +/* Cx _2 _7___ ___ _A _F _5 _8 */ + +static const uint16_t kPrefixSuffixMap[50] = { + 0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25, + 0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E, + 0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C, + 0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9, + 0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8 +}; + +/* RFC 7932 transforms */ +static const uint8_t kTransformsData[] = { + 49, BROTLI_TRANSFORM_IDENTITY, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 0, + 0, BROTLI_TRANSFORM_IDENTITY, 0, + 49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 47, + 0, BROTLI_TRANSFORM_IDENTITY, 49, + 4, BROTLI_TRANSFORM_IDENTITY, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 3, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 6, + 49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_1, 49, + 1, BROTLI_TRANSFORM_IDENTITY, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 1, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 7, + 49, BROTLI_TRANSFORM_IDENTITY, 9, + 48, BROTLI_TRANSFORM_IDENTITY, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 8, + 49, BROTLI_TRANSFORM_IDENTITY, 5, + 49, BROTLI_TRANSFORM_IDENTITY, 10, + 49, BROTLI_TRANSFORM_IDENTITY, 11, + 49, BROTLI_TRANSFORM_OMIT_LAST_3, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 13, + 49, BROTLI_TRANSFORM_IDENTITY, 14, + 49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_2, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 15, + 49, BROTLI_TRANSFORM_IDENTITY, 16, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 12, + 5, BROTLI_TRANSFORM_IDENTITY, 49, + 0, BROTLI_TRANSFORM_IDENTITY, 1, + 49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 18, + 49, BROTLI_TRANSFORM_IDENTITY, 17, + 49, BROTLI_TRANSFORM_IDENTITY, 19, + 49, BROTLI_TRANSFORM_IDENTITY, 20, + 49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49, + 49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49, + 47, BROTLI_TRANSFORM_IDENTITY, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_4, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 22, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 23, + 49, BROTLI_TRANSFORM_IDENTITY, 24, + 49, BROTLI_TRANSFORM_IDENTITY, 25, + 49, BROTLI_TRANSFORM_OMIT_LAST_7, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_1, 26, + 49, BROTLI_TRANSFORM_IDENTITY, 27, + 49, BROTLI_TRANSFORM_IDENTITY, 28, + 0, BROTLI_TRANSFORM_IDENTITY, 12, + 49, BROTLI_TRANSFORM_IDENTITY, 29, + 49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49, + 49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_6, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 21, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1, + 49, BROTLI_TRANSFORM_OMIT_LAST_8, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 31, + 49, BROTLI_TRANSFORM_IDENTITY, 32, + 47, BROTLI_TRANSFORM_IDENTITY, 3, + 49, BROTLI_TRANSFORM_OMIT_LAST_5, 49, + 49, BROTLI_TRANSFORM_OMIT_LAST_9, 49, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8, + 5, BROTLI_TRANSFORM_IDENTITY, 21, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10, + 49, BROTLI_TRANSFORM_IDENTITY, 30, + 0, BROTLI_TRANSFORM_IDENTITY, 5, + 35, BROTLI_TRANSFORM_IDENTITY, 49, + 47, BROTLI_TRANSFORM_IDENTITY, 2, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17, + 49, BROTLI_TRANSFORM_IDENTITY, 36, + 49, BROTLI_TRANSFORM_IDENTITY, 33, + 5, BROTLI_TRANSFORM_IDENTITY, 0, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5, + 49, BROTLI_TRANSFORM_IDENTITY, 37, + 0, BROTLI_TRANSFORM_IDENTITY, 30, + 49, BROTLI_TRANSFORM_IDENTITY, 38, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0, + 49, BROTLI_TRANSFORM_IDENTITY, 39, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49, + 49, BROTLI_TRANSFORM_IDENTITY, 34, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12, + 0, BROTLI_TRANSFORM_IDENTITY, 21, + 49, BROTLI_TRANSFORM_IDENTITY, 40, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12, + 49, BROTLI_TRANSFORM_IDENTITY, 41, + 49, BROTLI_TRANSFORM_IDENTITY, 42, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17, + 49, BROTLI_TRANSFORM_IDENTITY, 43, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10, + 0, BROTLI_TRANSFORM_IDENTITY, 34, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33, + 49, BROTLI_TRANSFORM_IDENTITY, 44, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5, + 45, BROTLI_TRANSFORM_IDENTITY, 49, + 0, BROTLI_TRANSFORM_IDENTITY, 33, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30, + 49, BROTLI_TRANSFORM_IDENTITY, 46, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1, + 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5, + 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30, + 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34, + 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34, +}; + +static BrotliTransforms kBrotliTransforms = { + sizeof(kPrefixSuffix), + (const uint8_t*)kPrefixSuffix, + kPrefixSuffixMap, + sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])), + kTransformsData, + {0, 12, 27, 23, 42, 63, 56, 48, 59, 64} +}; + +const BrotliTransforms* BrotliGetTransforms(void) { + return &kBrotliTransforms; +} + +static int ToUpperCase(uint8_t* p) { + if (p[0] < 0xC0) { + if (p[0] >= 'a' && p[0] <= 'z') { + p[0] ^= 32; + } + return 1; + } + /* An overly simplified uppercasing model for UTF-8. */ + if (p[0] < 0xE0) { + p[1] ^= 32; + return 2; + } + /* An arbitrary transform for three byte characters. */ + p[2] ^= 5; + return 3; +} + +int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len, + const BrotliTransforms* BROTLI_RESTRICT transforms, int transfom_idx) { + int idx = 0; + const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transfom_idx); + uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transfom_idx); + const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transfom_idx); + { + int prefix_len = *prefix++; + while (prefix_len--) { dst[idx++] = *prefix++; } + } + { + const int t = type; + int i = 0; + if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) { + len -= t; + } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1 + && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) { + int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1); + word += skip; + len -= skip; + } + while (i < len) { dst[idx++] = word[i++]; } + if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) { + ToUpperCase(&dst[idx - len]); + } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) { + uint8_t* uppercase = &dst[idx - len]; + while (len > 0) { + int step = ToUpperCase(uppercase); + uppercase += step; + len -= step; + } + } + } + { + int suffix_len = *suffix++; + while (suffix_len--) { dst[idx++] = *suffix++; } + return idx; + } +} + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif diff --git a/c/common/transform.h b/c/common/transform.h new file mode 100755 index 0000000..b279c04 --- /dev/null +++ b/c/common/transform.h @@ -0,0 +1,80 @@ +/* transforms is a part of ABI, nut not API. + + It means that there are some functions that are supposed to be in "common" + library, but header itself is not placed into include/brotli. This way, + aforementioned functions will be available only to brotli internals. + */ + +#ifndef BROTLI_COMMON_TRANSFORM_H_ +#define BROTLI_COMMON_TRANSFORM_H_ + +#include <brotli/port.h> +#include <brotli/types.h> + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +enum BrotliWordTransformType { + BROTLI_TRANSFORM_IDENTITY = 0, + BROTLI_TRANSFORM_OMIT_LAST_1 = 1, + BROTLI_TRANSFORM_OMIT_LAST_2 = 2, + BROTLI_TRANSFORM_OMIT_LAST_3 = 3, + BROTLI_TRANSFORM_OMIT_LAST_4 = 4, + BROTLI_TRANSFORM_OMIT_LAST_5 = 5, + BROTLI_TRANSFORM_OMIT_LAST_6 = 6, + BROTLI_TRANSFORM_OMIT_LAST_7 = 7, + BROTLI_TRANSFORM_OMIT_LAST_8 = 8, + BROTLI_TRANSFORM_OMIT_LAST_9 = 9, + BROTLI_TRANSFORM_UPPERCASE_FIRST = 10, + BROTLI_TRANSFORM_UPPERCASE_ALL = 11, + BROTLI_TRANSFORM_OMIT_FIRST_1 = 12, + BROTLI_TRANSFORM_OMIT_FIRST_2 = 13, + BROTLI_TRANSFORM_OMIT_FIRST_3 = 14, + BROTLI_TRANSFORM_OMIT_FIRST_4 = 15, + BROTLI_TRANSFORM_OMIT_FIRST_5 = 16, + BROTLI_TRANSFORM_OMIT_FIRST_6 = 17, + BROTLI_TRANSFORM_OMIT_FIRST_7 = 18, + BROTLI_TRANSFORM_OMIT_FIRST_8 = 19, + BROTLI_TRANSFORM_OMIT_FIRST_9 = 20, + BROTLI_NUM_TRANSFORM_TYPES /* Counts transforms, not a transform itself. */ +}; + +#define BROTLI_TRANSFORMS_MAX_CUT_OFF BROTLI_TRANSFORM_OMIT_LAST_9 + +typedef struct BrotliTransforms { + uint16_t prefix_suffix_size; + /* Last character must be null, so prefix_suffix_size must be at least 1. */ + const uint8_t* prefix_suffix; + const uint16_t* prefix_suffix_map; + uint32_t num_transforms; + /* Each entry is a [prefix_id, transform, suffix_id] triplet. */ + const uint8_t* transforms; + /* Indices of transforms like ["", BROTLI_TRANSFORM_OMIT_LAST_#, ""]. + 0-th element corresponds to ["", BROTLI_TRANSFORM_IDENTITY, ""]. + -1, if cut-off transform does not exist. */ + int16_t cutOffTransforms[BROTLI_TRANSFORMS_MAX_CUT_OFF + 1]; +} BrotliTransforms; + +/* T is BrotliTransforms*; result is uint8_t. */ +#define BROTLI_TRANSFORM_PREFIX_ID(T, I) ((T)->transforms[((I) * 3) + 0]) +#define BROTLI_TRANSFORM_TYPE(T, I) ((T)->transforms[((I) * 3) + 1]) +#define BROTLI_TRANSFORM_SUFFIX_ID(T, I) ((T)->transforms[((I) * 3) + 2]) + +/* T is BrotliTransforms*; result is const uint8_t*. */ +#define BROTLI_TRANSFORM_PREFIX(T, I) (&(T)->prefix_suffix[ \ + (T)->prefix_suffix_map[BROTLI_TRANSFORM_PREFIX_ID(T, I)]]) +#define BROTLI_TRANSFORM_SUFFIX(T, I) (&(T)->prefix_suffix[ \ + (T)->prefix_suffix_map[BROTLI_TRANSFORM_SUFFIX_ID(T, I)]]) + +BROTLI_COMMON_API const BrotliTransforms* BrotliGetTransforms(void); + +BROTLI_COMMON_API int BrotliTransformDictionaryWord( + uint8_t* dst, const uint8_t* word, int len, + const BrotliTransforms* transforms, int transform_idx); + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif + +#endif /* BROTLI_COMMON_TRANSFORM_H_ */ diff --git a/c/dec/bit_reader.h b/c/dec/bit_reader.h index 21c59d7..39e4873 100644 --- a/c/dec/bit_reader.h +++ b/c/dec/bit_reader.h @@ -20,7 +20,7 @@ extern "C" { #define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1) -static const uint32_t kBitMask[33] = { 0x0000, +static const uint32_t kBitMask[33] = { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, @@ -35,7 +35,7 @@ static BROTLI_INLINE uint32_t BitMask(uint32_t n) { if (BROTLI_IS_CONSTANT(n) || BROTLI_HAS_UBFX) { /* Masking with this expression turns to a single "Unsigned Bit Field Extract" UBFX instruction on ARM. */ - return ~((0xffffffffU) << n); + return ~((0xFFFFFFFFu) << n); } else { return kBitMask[n]; } @@ -58,8 +58,9 @@ typedef struct { /* Initializes the BrotliBitReader fields. */ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br); -/* Ensures that accumulator is not empty. May consume one byte of input. - Returns 0 if data is required but there is no input available. +/* Ensures that accumulator is not empty. + May consume up to sizeof(brotli_reg_t) - 1 bytes of input. + Returns BROTLI_FALSE if data is required but there is no input available. For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned reading. */ BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br); @@ -98,9 +99,9 @@ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount( return TO_BROTLI_BOOL(br->avail_in >= num); } -/* Guarantees that there are at least n_bits + 1 bits in accumulator. +/* Guarantees that there are at least |n_bits| + 1 bits in accumulator. Precondition: accumulator contains at least 1 bit. - n_bits should be in the range [1..24] for regular build. For portable + |n_bits| should be in the range [1..24] for regular build. For portable non-64-bit little-endian build only 16 bits are safe to request. */ static BROTLI_INLINE void BrotliFillBitWindow( BrotliBitReader* const br, uint32_t n_bits) { @@ -158,7 +159,8 @@ static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) { BrotliFillBitWindow(br, 17); } -/* Pulls one byte of input to accumulator. */ +/* Tries to pull one byte of input to accumulator. + Returns BROTLI_FALSE if there is no input available. */ static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) { if (br->avail_in == 0) { return BROTLI_FALSE; @@ -190,15 +192,16 @@ static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked( return (uint32_t)BrotliGetBitsUnmasked(br); } -/* Returns the specified number of bits from |br| without advancing bit pos. */ +/* Returns the specified number of bits from |br| without advancing bit + position. */ static BROTLI_INLINE uint32_t BrotliGetBits( BrotliBitReader* const br, uint32_t n_bits) { BrotliFillBitWindow(br, n_bits); return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits); } -/* Tries to peek the specified amount of bits. Returns 0, if there is not - enough input. */ +/* Tries to peek the specified amount of bits. Returns BROTLI_FALSE, if there + is not enough input. */ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { while (BrotliGetAvailableBits(br) < n_bits) { @@ -210,7 +213,7 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeGetBits( return BROTLI_TRUE; } -/* Advances the bit pos by n_bits. */ +/* Advances the bit pos by |n_bits|. */ static BROTLI_INLINE void BrotliDropBits( BrotliBitReader* const br, uint32_t n_bits) { br->bit_pos_ += n_bits; @@ -230,7 +233,7 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) { } /* Reads the specified number of bits from |br| and advances the bit pos. - Precondition: accumulator MUST contain at least n_bits. */ + Precondition: accumulator MUST contain at least |n_bits|. */ static BROTLI_INLINE void BrotliTakeBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits); @@ -259,8 +262,8 @@ static BROTLI_INLINE uint32_t BrotliReadBits( } } -/* Tries to read the specified amount of bits. Returns 0, if there is not - enough input. n_bits MUST be positive. */ +/* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there + is not enough input. |n_bits| MUST be positive. */ static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits( BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) { while (BrotliGetAvailableBits(br) < n_bits) { @@ -284,7 +287,7 @@ static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) { } /* Copies remaining input bytes stored in the bit reader to the output. Value - num may not be larger than BrotliGetRemainingBytes. The bit reader must be + |num| may not be larger than BrotliGetRemainingBytes. The bit reader must be warmed up again after this. */ static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest, BrotliBitReader* br, size_t num) { diff --git a/c/dec/decode.c b/c/dec/decode.c index 846557a..630edeb 100644 --- a/c/dec/decode.c +++ b/c/dec/decode.c @@ -14,15 +14,15 @@ #include <string.h> /* memcpy, memset */ #include "../common/constants.h" +#include "../common/context.h" #include "../common/dictionary.h" #include "../common/platform.h" +#include "../common/transform.h" #include "../common/version.h" #include "./bit_reader.h" -#include "./context.h" #include "./huffman.h" #include "./prefix.h" #include "./state.h" -#include "./transform.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -37,7 +37,7 @@ extern "C" { (unsigned long)(idx), (unsigned long)array_name[idx])) #define HUFFMAN_TABLE_BITS 8U -#define HUFFMAN_TABLE_MASK 0xff +#define HUFFMAN_TABLE_MASK 0xFF /* We need the slack region for the following reasons: - doing up to two 16-byte copies for fast backward copying @@ -59,11 +59,16 @@ static const uint8_t kCodeLengthPrefixValue[16] = { BROTLI_BOOL BrotliDecoderSetParameter( BrotliDecoderState* state, BrotliDecoderParameter p, uint32_t value) { + if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE; switch (p) { case BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION: state->canny_ringbuffer_allocation = !!value ? 0 : 1; return BROTLI_TRUE; + case BROTLI_DECODER_PARAM_LARGE_WINDOW: + state->large_window = TO_BROTLI_BOOL(!!value); + return BROTLI_TRUE; + default: return BROTLI_FALSE; } } @@ -80,8 +85,15 @@ BrotliDecoderState* BrotliDecoderCreateInstance( BROTLI_DUMP(); return 0; } - BrotliDecoderStateInitWithCustomAllocators( - state, alloc_func, free_func, opaque); + if (!BrotliDecoderStateInit(state, alloc_func, free_func, opaque)) { + BROTLI_DUMP(); + if (!alloc_func && !free_func) { + free(state); + } else if (alloc_func && free_func) { + free_func(opaque, state); + } + return 0; + } return state; } @@ -97,39 +109,61 @@ void BrotliDecoderDestroyInstance(BrotliDecoderState* state) { } } -/* Saves error code and converts it to BrotliDecoderResult */ +/* Saves error code and converts it to BrotliDecoderResult. */ static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode( BrotliDecoderState* s, BrotliDecoderErrorCode e) { s->error_code = (int)e; switch (e) { case BROTLI_DECODER_SUCCESS: return BROTLI_DECODER_RESULT_SUCCESS; + case BROTLI_DECODER_NEEDS_MORE_INPUT: return BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT; + case BROTLI_DECODER_NEEDS_MORE_OUTPUT: return BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT; + default: return BROTLI_DECODER_RESULT_ERROR; } } -/* Decodes a number in the range [9..24], by reading 1 - 7 bits. - Precondition: bit-reader accumulator has at least 7 bits. */ -static uint32_t DecodeWindowBits(BrotliBitReader* br) { +/* Decodes WBITS by reading 1 - 7 bits, or 0x11 for "Large Window Brotli". + Precondition: bit-reader accumulator has at least 8 bits. */ +static BrotliDecoderErrorCode DecodeWindowBits(BrotliDecoderState* s, + BrotliBitReader* br) { uint32_t n; + BROTLI_BOOL large_window = s->large_window; + s->large_window = BROTLI_FALSE; BrotliTakeBits(br, 1, &n); if (n == 0) { - return 16; + s->window_bits = 16; + return BROTLI_DECODER_SUCCESS; } BrotliTakeBits(br, 3, &n); if (n != 0) { - return 17 + n; + s->window_bits = 17 + n; + return BROTLI_DECODER_SUCCESS; } BrotliTakeBits(br, 3, &n); + if (n == 1) { + if (large_window) { + BrotliTakeBits(br, 1, &n); + if (n == 1) { + return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); + } + s->large_window = BROTLI_TRUE; + return BROTLI_DECODER_SUCCESS; + } else { + return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); + } + } if (n != 0) { - return 8 + n; + s->window_bits = 8 + n; + return BROTLI_DECODER_SUCCESS; } - return 17; + s->window_bits = 17; + return BROTLI_DECODER_SUCCESS; } static BROTLI_INLINE void memmove16(uint8_t* dst, uint8_t* src) { @@ -342,7 +376,7 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( *result = table->value; return BROTLI_TRUE; } - return BROTLI_FALSE; /* No valid bits at all. */ + return BROTLI_FALSE; /* No valid bits at all. */ } val = (uint32_t)BrotliGetBitsUnmasked(br); table += val & HUFFMAN_TABLE_MASK; @@ -352,11 +386,11 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( *result = table->value; return BROTLI_TRUE; } else { - return BROTLI_FALSE; /* Not enough bits for the first level. */ + return BROTLI_FALSE; /* Not enough bits for the first level. */ } } if (available_bits <= HUFFMAN_TABLE_BITS) { - return BROTLI_FALSE; /* Not enough bits to move to the second level. */ + return BROTLI_FALSE; /* Not enough bits to move to the second level. */ } /* Speculatively drop HUFFMAN_TABLE_BITS. */ @@ -364,7 +398,7 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( available_bits -= HUFFMAN_TABLE_BITS; table += table->value + val; if (available_bits < table->bits) { - return BROTLI_FALSE; /* Not enough bits for the second level. */ + return BROTLI_FALSE; /* Not enough bits for the second level. */ } BrotliDropBits(br, HUFFMAN_TABLE_BITS + table->bits); @@ -428,12 +462,11 @@ static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) { } /* Reads (s->symbol + 1) symbols. - Totally 1..4 symbols are read, 1..10 bits each. - The list of symbols MUST NOT contain duplicates. - */ + Totally 1..4 symbols are read, 1..11 bits each. + The list of symbols MUST NOT contain duplicates. */ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols( - uint32_t alphabet_size, BrotliDecoderState* s) { - /* max_bits == 1..10; symbol == 0..3; 1..40 bits will be read. */ + uint32_t alphabet_size, uint32_t max_symbol, BrotliDecoderState* s) { + /* max_bits == 1..11; symbol == 0..3; 1..44 bits will be read. */ BrotliBitReader* br = &s->br; uint32_t max_bits = Log2Floor(alphabet_size - 1); uint32_t i = s->sub_loop_counter; @@ -445,7 +478,7 @@ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols( s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ; return BROTLI_DECODER_NEEDS_MORE_INPUT; } - if (v >= alphabet_size) { + if (v >= max_symbol) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET); } @@ -471,14 +504,13 @@ static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols( B) remember code length (if it is not 0) C) extend corresponding index-chain D) reduce the Huffman space - E) update the histogram - */ + E) update the histogram */ static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len, uint32_t* symbol, uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len, uint16_t* symbol_lists, uint16_t* code_length_histo, int* next_symbol) { *repeat = 0; - if (code_len != 0) { /* code_len == 1..15 */ + if (code_len != 0) { /* code_len == 1..15 */ symbol_lists[next_symbol[code_len]] = (uint16_t)(*symbol); next_symbol[code_len] = (int)(*symbol); *prev_code_len = code_len; @@ -498,8 +530,7 @@ static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len, D) For each symbol do the same operations as in ProcessSingleCodeLength PRECONDITION: code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH or - code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH - */ + code_len == BROTLI_REPEAT_ZERO_CODE_LENGTH */ static BROTLI_INLINE void ProcessRepeatedCodeLength(uint32_t code_len, uint32_t repeat_delta, uint32_t alphabet_size, uint32_t* symbol, uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len, @@ -576,12 +607,12 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths( BrotliFillBitWindow16(br); p += BrotliGetBitsUnmasked(br) & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH); - BrotliDropBits(br, p->bits); /* Use 1..5 bits */ + BrotliDropBits(br, p->bits); /* Use 1..5 bits. */ code_len = p->value; /* code_len == 0..17 */ if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) { ProcessSingleCodeLength(code_len, &symbol, &repeat, &space, &prev_code_len, symbol_lists, code_length_histo, next_symbol); - } else { /* code_len == 16..17, extra_bits == 2..3 */ + } else { /* code_len == 16..17, extra_bits == 2..3 */ uint32_t extra_bits = (code_len == BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) ? 2 : 3; uint32_t repeat_delta = @@ -616,13 +647,13 @@ static BrotliDecoderErrorCode SafeReadSymbolCodeLengths( get_byte = BROTLI_TRUE; continue; } - code_len = p->value; /* code_len == 0..17 */ + code_len = p->value; /* code_len == 0..17 */ if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) { BrotliDropBits(br, p->bits); ProcessSingleCodeLength(code_len, &s->symbol, &s->repeat, &s->space, &s->prev_code_len, s->symbol_lists, s->code_length_histo, s->next_symbol); - } else { /* code_len == 16..17, extra_bits == 2..3 */ + } else { /* code_len == 16..17, extra_bits == 2..3 */ uint32_t extra_bits = code_len - 14U; uint32_t repeat_delta = (bits >> p->bits) & BitMask(extra_bits); if (available_bits < p->bits + extra_bits) { @@ -674,7 +705,7 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) { ++num_codes; ++s->code_length_histo[v]; if (space - 1U >= 32U) { - /* space is 0 or wrapped around */ + /* space is 0 or wrapped around. */ break; } } @@ -689,22 +720,22 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) { There are 2 scenarios: A) Huffman code contains only few symbols (1..4). Those symbols are read directly; their code lengths are defined by the number of symbols. - For this scenario 4 - 45 bits will be read. + For this scenario 4 - 49 bits will be read. B) 2-phase decoding: B.1) Small Huffman table is decoded; it is specified with code lengths encoded with predefined entropy code. 32 - 74 bits are used. B.2) Decoded table is used to decode code lengths of symbols in resulting - Huffman table. In worst case 3520 bits are read. -*/ + Huffman table. In worst case 3520 bits are read. */ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, + uint32_t max_symbol, HuffmanCode* table, uint32_t* opt_table_size, BrotliDecoderState* s) { BrotliBitReader* br = &s->br; /* Unnecessary masking, but might be good for safety. */ - alphabet_size &= 0x3ff; - /* State machine */ + alphabet_size &= 0x7FF; + /* State machine. */ for (;;) { switch (s->substate_huffman) { case BROTLI_STATE_HUFFMAN_NONE: @@ -717,7 +748,7 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */ if (s->sub_loop_counter != 1) { s->space = 32; - s->repeat = 0; /* num_codes */ + s->repeat = 0; /* num_codes */ memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo[0]) * (BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1)); memset(&s->code_length_code_lengths[0], 0, @@ -729,20 +760,22 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE: /* Read symbols, codes & code lengths directly. */ - if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */ + if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */ s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE; return BROTLI_DECODER_NEEDS_MORE_INPUT; } s->sub_loop_counter = 0; /* No break, transit to the next state. */ + case BROTLI_STATE_HUFFMAN_SIMPLE_READ: { BrotliDecoderErrorCode result = - ReadSimpleHuffmanSymbols(alphabet_size, s); + ReadSimpleHuffmanSymbols(alphabet_size, max_symbol, s); if (result != BROTLI_DECODER_SUCCESS) { return result; } /* No break, transit to the next state. */ } + case BROTLI_STATE_HUFFMAN_SIMPLE_BUILD: { uint32_t table_size; if (s->symbol == 3) { @@ -787,11 +820,12 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size, s->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS; /* No break, transit to the next state. */ } + case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: { uint32_t table_size; - BrotliDecoderErrorCode result = ReadSymbolCodeLengths(alphabet_size, s); + BrotliDecoderErrorCode result = ReadSymbolCodeLengths(max_symbol, s); if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) { - result = SafeReadSymbolCodeLengths(alphabet_size, s); + result = SafeReadSymbolCodeLengths(max_symbol, s); } if (result != BROTLI_DECODER_SUCCESS) { return result; @@ -823,7 +857,7 @@ static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table, uint32_t code; uint32_t nbits; code = ReadSymbol(table, br); - nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */ + nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */ return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits); } @@ -842,7 +876,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength( } { uint32_t bits; - uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */ + uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */ if (!BrotliSafeReadBits(br, nbits, &bits)) { s->block_length_index = index; s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX; @@ -867,8 +901,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBlockLength( of Y values, and reinitialize only first elements in L. Most of input values are 0 and 1. To reduce number of branches, we replace - inner for loop with do-while. - */ + inner for loop with do-while. */ static BROTLI_NOINLINE void InverseMoveToFrontTransform( uint8_t* v, uint32_t v_len, BrotliDecoderState* state) { /* Reinitialize elements that could have been changed. */ @@ -884,7 +917,7 @@ static BROTLI_NOINLINE void InverseMoveToFrontTransform( /* Initialize list using 4 consequent values pattern. */ mtf[0] = pattern; do { - pattern += 0x04040404; /* Advance all 4 values by 4. */ + pattern += 0x04040404; /* Advance all 4 values by 4. */ mtf[i] = pattern; i++; } while (i <= upper_bound); @@ -917,7 +950,8 @@ static BrotliDecoderErrorCode HuffmanTreeGroupDecode( while (s->htree_index < group->num_htrees) { uint32_t table_size; BrotliDecoderErrorCode result = - ReadHuffmanCode(group->alphabet_size, s->next, &table_size, s); + ReadHuffmanCode(group->alphabet_size, group->max_symbol, + s->next, &table_size, s); if (result != BROTLI_DECODER_SUCCESS) return result; group->htrees[s->htree_index] = s->next; s->next += table_size; @@ -934,8 +968,7 @@ static BrotliDecoderErrorCode HuffmanTreeGroupDecode( 2) Decode Huffman table using ReadHuffmanCode function. This table will be used for reading context map items. 3) Read context map items; "0" values could be run-length encoded. - 4) Optionally, apply InverseMoveToFront transform to the resulting map. - */ + 4) Optionally, apply InverseMoveToFront transform to the resulting map. */ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, uint32_t* num_htrees, uint8_t** context_map_arg, @@ -964,6 +997,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, } s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX; /* No break, continue to next state. */ + case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: { uint32_t bits; /* In next stage ReadHuffmanCode uses at least 4 bits, so it is safe @@ -982,13 +1016,17 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN; /* No break, continue to next state. */ } - case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: - result = ReadHuffmanCode(*num_htrees + s->max_run_length_prefix, + + case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: { + uint32_t alphabet_size = *num_htrees + s->max_run_length_prefix; + result = ReadHuffmanCode(alphabet_size, alphabet_size, s->context_map_table, NULL, s); if (result != BROTLI_DECODER_SUCCESS) return result; s->code = 0xFFFF; s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE; /* No break, continue to next state. */ + } + case BROTLI_STATE_CONTEXT_MAP_DECODE: { uint32_t context_index = s->context_index; uint32_t max_run_length_prefix = s->max_run_length_prefix; @@ -1037,6 +1075,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, } /* No break, continue to next state. */ } + case BROTLI_STATE_CONTEXT_MAP_TRANSFORM: { uint32_t bits; if (!BrotliSafeReadBits(br, 1, &bits)) { @@ -1049,6 +1088,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size, s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; return BROTLI_DECODER_SUCCESS; } + default: return BROTLI_FAILURE(BROTLI_DECODER_ERROR_UNREACHABLE); @@ -1067,8 +1107,11 @@ static BROTLI_INLINE BROTLI_BOOL DecodeBlockTypeAndLength( BrotliBitReader* br = &s->br; uint32_t* ringbuffer = &s->block_type_rb[tree_type * 2]; uint32_t block_type; + if (max_block_type <= 1) { + return BROTLI_FALSE; + } - /* Read 0..15 + 3..39 bits */ + /* Read 0..15 + 3..39 bits. */ if (!safe) { block_type = ReadSymbol(type_tree, br); s->block_length[tree_type] = ReadBlockLength(len_tree, br); @@ -1125,9 +1168,8 @@ static BROTLI_INLINE void PrepareLiteralDecoding(BrotliDecoderState* s) { trivial = s->trivial_literal_contexts[block_type >> 5]; s->trivial_literal_context = (trivial >> (block_type & 31)) & 1; s->literal_htree = s->literal_hgroup.htrees[s->context_map_slice[0]]; - context_mode = s->context_modes[block_type]; - s->context_lookup1 = &kContextLookup[kContextLookupOffsets[context_mode]]; - s->context_lookup2 = &kContextLookup[kContextLookupOffsets[context_mode + 1]]; + context_mode = s->context_modes[block_type] & 3; + s->context_lookup = BROTLI_CONTEXT_LUT(context_mode); } /* Decodes the block type and updates the state for literal context. @@ -1164,6 +1206,7 @@ static BROTLI_INLINE BROTLI_BOOL DecodeCommandBlockSwitchInternal( static void BROTLI_NOINLINE DecodeCommandBlockSwitch(BrotliDecoderState* s) { DecodeCommandBlockSwitchInternal(0, s); } + static BROTLI_BOOL BROTLI_NOINLINE SafeDecodeCommandBlockSwitch( BrotliDecoderState* s) { return DecodeCommandBlockSwitchInternal(1, s); @@ -1200,8 +1243,7 @@ static size_t UnwrittenBytes(const BrotliDecoderState* s, BROTLI_BOOL wrap) { /* Dumps output. Returns BROTLI_DECODER_NEEDS_MORE_OUTPUT only if there is more output to push - and either ring-buffer is as big as window size, or |force| is true. - */ + and either ring-buffer is as big as window size, or |force| is true. */ static BrotliDecoderErrorCode BROTLI_NOINLINE WriteRingBuffer( BrotliDecoderState* s, size_t* available_out, uint8_t** next_out, size_t* total_out, BROTLI_BOOL force) { @@ -1260,8 +1302,7 @@ static void BROTLI_NOINLINE WrapRingBuffer(BrotliDecoderState* s) { this function is called. Last two bytes of ring-buffer are initialized to 0, so context calculation - could be done uniformly for the first two and all other positions. -*/ + could be done uniformly for the first two and all other positions. */ static BROTLI_BOOL BROTLI_NOINLINE BrotliEnsureRingBuffer( BrotliDecoderState* s) { uint8_t* old_ringbuffer = s->ringbuffer; @@ -1321,8 +1362,9 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput( return BROTLI_DECODER_NEEDS_MORE_INPUT; } s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_UNCOMPRESSED_WRITE: { BrotliDecoderErrorCode result; result = WriteRingBuffer( @@ -1346,8 +1388,7 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput( If we know the data size is small, do not allocate more ring buffer size than needed to reduce memory usage. - When this method is called, metablock size and flags MUST be decoded. -*/ + When this method is called, metablock size and flags MUST be decoded. */ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize( BrotliDecoderState* s) { int window_size = 1 << s->window_bits; @@ -1378,7 +1419,7 @@ static void BROTLI_NOINLINE BrotliCalculateRingBufferSize( if (!!s->canny_ringbuffer_allocation) { /* Reduce ring buffer size to save memory when server is unscrupulous. In worst case memory usage might be 1.5x bigger for a short period of - ring buffer reallocation.*/ + ring buffer reallocation. */ while ((new_ringbuffer_size >> 1) >= min_size) { new_ringbuffer_size >>= 1; } @@ -1398,7 +1439,7 @@ static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) { s->loop_counter = i; return BROTLI_DECODER_NEEDS_MORE_INPUT; } - s->context_modes[i] = (uint8_t)(bits << 1); + s->context_modes[i] = (uint8_t)bits; BROTLI_LOG_ARRAY_INDEX(s->context_modes, i); i++; } @@ -1413,12 +1454,12 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) { s->distance_context = 1; } else { int distance_code = s->distance_code << 1; - /* kDistanceShortCodeIndexOffset has 2-bit values from LSB: */ - /* 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */ - const uint32_t kDistanceShortCodeIndexOffset = 0xaaafff1b; - /* kDistanceShortCodeValueOffset has 2-bit values from LSB: */ - /*-0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */ - const uint32_t kDistanceShortCodeValueOffset = 0xfa5fa500; + /* kDistanceShortCodeIndexOffset has 2-bit values from LSB: + 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */ + const uint32_t kDistanceShortCodeIndexOffset = 0xAAAFFF1B; + /* kDistanceShortCodeValueOffset has 2-bit values from LSB: + -0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */ + const uint32_t kDistanceShortCodeValueOffset = 0xFA5FA500; int v = (s->dist_rb_idx + (int)(kDistanceShortCodeIndexOffset >> distance_code)) & 0x3; s->distance_code = s->dist_rb[v]; @@ -1428,9 +1469,9 @@ static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) { } else { s->distance_code -= v; if (s->distance_code <= 0) { - /* A huge distance will cause a BROTLI_FAILURE() soon. */ - /* This is a little faster than failing here. */ - s->distance_code = 0x0fffffff; + /* A huge distance will cause a BROTLI_FAILURE() soon. + This is a little faster than failing here. */ + s->distance_code = 0x7FFFFFFF; } } } @@ -1446,7 +1487,7 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBits( } } -/* Precondition: s->distance_code < 0 */ +/* Precondition: s->distance_code < 0. */ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( int safe, BrotliDecoderState* s, BrotliBitReader* br) { int distval; @@ -1462,10 +1503,10 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( } s->distance_code = (int)code; } - /* Convert the distance code to the actual distance by possibly */ - /* looking up past distances from the s->ringbuffer. */ + /* Convert the distance code to the actual distance by possibly + looking up past distances from the s->ringbuffer. */ s->distance_context = 0; - if ((s->distance_code & ~0xf) == 0) { + if ((s->distance_code & ~0xF) == 0) { TakeDistanceFromRingBuffer(s); --s->block_length[2]; return BROTLI_TRUE; @@ -1481,14 +1522,14 @@ static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal( s->distance_code = (int)s->num_direct_distance_codes + offset + (int)BrotliReadBits(br, nbits); } else { - /* This branch also works well when s->distance_postfix_bits == 0 */ + /* This branch also works well when s->distance_postfix_bits == 0. */ uint32_t bits; postfix = distval & s->distance_postfix_mask; distval >>= s->distance_postfix_bits; nbits = ((uint32_t)distval >> 1) + 1; if (safe) { if (!SafeReadBits(br, nbits, &bits)) { - s->distance_code = -1; /* Restore precondition. */ + s->distance_code = -1; /* Restore precondition. */ BrotliBitReaderRestoreState(br, &memento); return BROTLI_FALSE; } @@ -1615,7 +1656,7 @@ CommandBegin: if (safe) { s->state = BROTLI_STATE_COMMAND_BEGIN; } - if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */ + if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */ s->state = BROTLI_STATE_COMMAND_BEGIN; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1624,7 +1665,7 @@ CommandBegin: BROTLI_SAFE(DecodeCommandBlockSwitch(s)); goto CommandBegin; } - /* Read the insert/copy length in the command */ + /* Read the insert/copy length in the command. */ BROTLI_SAFE(ReadCommand(s, br, &i)); BROTLI_LOG(("[ProcessCommandsInternal] pos = %d insert = %d copy = %d\n", pos, i, s->copy_length)); @@ -1637,13 +1678,13 @@ CommandInner: if (safe) { s->state = BROTLI_STATE_COMMAND_INNER; } - /* Read the literals in the command */ + /* Read the literals in the command. */ if (s->trivial_literal_context) { uint32_t bits; uint32_t value; PreloadSymbol(safe, s->literal_htree, br, &bits, &value); do { - if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ + if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ s->state = BROTLI_STATE_COMMAND_INNER; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1679,7 +1720,7 @@ CommandInner: do { const HuffmanCode* hc; uint8_t context; - if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ + if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ s->state = BROTLI_STATE_COMMAND_INNER; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1688,7 +1729,7 @@ CommandInner: BROTLI_SAFE(DecodeLiteralBlockSwitch(s)); if (s->trivial_literal_context) goto CommandInner; } - context = s->context_lookup1[p1] | s->context_lookup2[p2]; + context = BROTLI_CONTEXT(p1, p2, s->context_lookup); BROTLI_LOG_UINT(context); hc = s->literal_hgroup.htrees[s->context_map_slice[context]]; p2 = p1; @@ -1744,14 +1785,25 @@ CommandPostDecodeLiterals: } i = s->copy_length; /* Apply copy of LZ77 back-reference, or static dictionary reference if - the distance is larger than the max LZ77 distance */ + the distance is larger than the max LZ77 distance */ if (s->distance_code > s->max_distance) { - int address = s->distance_code - s->max_distance - 1; + /* The maximum allowed distance is BROTLI_MAX_ALLOWED_DISTANCE = 0x7FFFFFFC. + With this choice, no signed overflow can occur after decoding + a special distance code (e.g., after adding 3 to the last distance). */ + if (s->distance_code > BROTLI_MAX_ALLOWED_DISTANCE) { + BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d " + "len: %d bytes left: %d\n", + pos, s->distance_code, i, s->meta_block_remaining_len)); + return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DISTANCE); + } if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH && i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) { + int address = s->distance_code - s->max_distance - 1; const BrotliDictionary* words = s->dictionary; + const BrotliTransforms* transforms = s->transforms; int offset = (int)s->dictionary->offsets_by_length[i]; uint32_t shift = s->dictionary->size_bits_by_length[i]; + int mask = (int)BitMask(shift); int word_idx = address & mask; int transform_idx = address >> shift; @@ -1761,16 +1813,16 @@ CommandPostDecodeLiterals: if (BROTLI_PREDICT_FALSE(!words->data)) { return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET); } - if (transform_idx < kNumTransforms) { + if (transform_idx < (int)transforms->num_transforms) { const uint8_t* word = &words->data[offset]; int len = i; - if (transform_idx == 0) { + if (transform_idx == transforms->cutOffTransforms[0]) { memcpy(&s->ringbuffer[pos], word, (size_t)len); BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]\n", len, word)); } else { len = BrotliTransformDictionaryWord(&s->ringbuffer[pos], word, len, - transform_idx); + transforms, transform_idx); BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s]," " transform_idx = %d, transformed: [%.*s]\n", i, word, transform_idx, len, &s->ringbuffer[pos])); @@ -1778,7 +1830,6 @@ CommandPostDecodeLiterals: pos += len; s->meta_block_remaining_len -= len; if (pos >= s->ringbuffer_size) { - /*s->partial_pos_rb += (size_t)s->ringbuffer_size;*/ s->state = BROTLI_STATE_COMMAND_POST_WRITE_1; goto saveStateAndReturn; } @@ -1800,14 +1851,13 @@ CommandPostDecodeLiterals: uint8_t* copy_src = &s->ringbuffer[src_start]; int dst_end = pos + i; int src_end = src_start + i; - /* update the recent distances cache */ + /* Update the recent distances cache. */ s->dist_rb[s->dist_rb_idx & 3] = s->distance_code; ++s->dist_rb_idx; s->meta_block_remaining_len -= i; /* There are 32+ bytes of slack in the ring-buffer allocation. Also, we have 16 short codes, that make these 16 bytes irrelevant - in the ring-buffer. Let's copy over them as a first guess. - */ + in the ring-buffer. Let's copy over them as a first guess. */ memmove16(copy_dst, copy_src); if (src_end > pos && dst_end > src_start) { /* Regions intersect. */ @@ -1830,7 +1880,7 @@ CommandPostDecodeLiterals: } BROTLI_LOG_UINT(s->meta_block_remaining_len); if (s->meta_block_remaining_len <= 0) { - /* Next metablock, if any */ + /* Next metablock, if any. */ s->state = BROTLI_STATE_METABLOCK_DONE; goto saveStateAndReturn; } else { @@ -1850,7 +1900,7 @@ CommandPostWrapCopy: } } if (s->meta_block_remaining_len <= 0) { - /* Next metablock, if any */ + /* Next metablock, if any. */ s->state = BROTLI_STATE_METABLOCK_DONE; goto saveStateAndReturn; } else { @@ -1875,6 +1925,21 @@ static BROTLI_NOINLINE BrotliDecoderErrorCode SafeProcessCommands( return ProcessCommandsInternal(1, s); } +/* Returns the maximum number of distance symbols which can only represent + distances not exceeding BROTLI_MAX_ALLOWED_DISTANCE. */ +static uint32_t BrotliMaxDistanceSymbol(uint32_t ndirect, uint32_t npostfix) { + static const uint32_t bound[BROTLI_MAX_NPOSTFIX + 1] = {0, 4, 12, 28}; + static const uint32_t diff[BROTLI_MAX_NPOSTFIX + 1] = {73, 126, 228, 424}; + uint32_t postfix = 1U << npostfix; + if (ndirect < bound[npostfix]) { + return ndirect + diff[npostfix] + postfix; + } else if (ndirect > bound[npostfix] + postfix) { + return ndirect + diff[npostfix]; + } else { + return bound[npostfix] + diff[npostfix] + postfix; + } +} + BrotliDecoderResult BrotliDecoderDecompress( size_t encoded_size, const uint8_t* encoded_buffer, size_t* decoded_size, uint8_t* decoded_buffer) { @@ -1885,7 +1950,9 @@ BrotliDecoderResult BrotliDecoderDecompress( const uint8_t* next_in = encoded_buffer; size_t available_out = *decoded_size; uint8_t* next_out = decoded_buffer; - BrotliDecoderStateInit(&s); + if (!BrotliDecoderStateInit(&s, 0, 0, 0)) { + return BROTLI_DECODER_RESULT_ERROR; + } result = BrotliDecoderDecompressStream( &s, &available_in, &next_in, &available_out, &next_out, &total_out); *decoded_size = total_out; @@ -1897,23 +1964,22 @@ BrotliDecoderResult BrotliDecoderDecompress( } /* Invariant: input stream is never overconsumed: - * invalid input implies that the whole stream is invalid -> any amount of + - invalid input implies that the whole stream is invalid -> any amount of input could be read and discarded - * when result is "needs more input", then at least one more byte is REQUIRED + - when result is "needs more input", then at least one more byte is REQUIRED to complete decoding; all input data MUST be consumed by decoder, so client could swap the input buffer - * when result is "needs more output" decoder MUST ensure that it doesn't + - when result is "needs more output" decoder MUST ensure that it doesn't hold more than 7 bits in bit reader; this saves client from swapping input buffer ahead of time - * when result is "success" decoder MUST return all unused data back to input - buffer; this is possible because the invariant is hold on enter -*/ + - when result is "success" decoder MUST return all unused data back to input + buffer; this is possible because the invariant is held on enter */ BrotliDecoderResult BrotliDecoderDecompressStream( BrotliDecoderState* s, size_t* available_in, const uint8_t** next_in, size_t* available_out, uint8_t** next_out, size_t* total_out) { BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS; BrotliBitReader* br = &s->br; - /* Ensure that *total_out is set, even if no data will ever be pushed out. */ + /* Ensure that |total_out| is set, even if no data will ever be pushed out. */ if (total_out) { *total_out = s->partial_pos_out; } @@ -1926,7 +1992,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS)); } if (!*available_out) next_out = 0; - if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */ + if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */ br->avail_in = *available_in; br->next_in = *next_in; } else { @@ -1938,9 +2004,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } /* State machine */ for (;;) { - if (result != BROTLI_DECODER_SUCCESS) { /* Error, needs more input/output */ + if (result != BROTLI_DECODER_SUCCESS) { + /* Error, needs more input/output. */ if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) { - if (s->ringbuffer != 0) { /* Pro-actively push output. */ + if (s->ringbuffer != 0) { /* Pro-actively push output. */ BrotliDecoderErrorCode intermediate_result = WriteRingBuffer(s, available_out, next_out, total_out, BROTLI_TRUE); /* WriteRingBuffer checks s->meta_block_remaining_len validity. */ @@ -1949,9 +2016,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } } - if (s->buffer_length != 0) { /* Used with internal buffer. */ - if (br->avail_in == 0) { /* Successfully finished read transaction. */ - /* Accumulator contains less than 8 bits, because internal buffer + if (s->buffer_length != 0) { /* Used with internal buffer. */ + if (br->avail_in == 0) { + /* Successfully finished read transaction. + Accumulator contains less than 8 bits, because internal buffer is expanded byte-by-byte until it is enough to complete read. */ s->buffer_length = 0; /* Switch to input stream and restart. */ @@ -1971,9 +2039,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream( /* Retry with more data in buffer. */ continue; } - /* Can't finish reading and no more input.*/ + /* Can't finish reading and no more input. */ break; - } else { /* Input stream doesn't contain enough input. */ + } else { /* Input stream doesn't contain enough input. */ /* Copy tail to internal buffer and return. */ *next_in = br->next_in; *available_in = br->avail_in; @@ -1992,7 +2060,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( if (s->buffer_length != 0) { /* Just consumed the buffered input and produced some output. Otherwise - it would result in "needs more input". Reset internal buffer.*/ + it would result in "needs more input". Reset internal buffer. */ s->buffer_length = 0; } else { /* Using input stream in last iteration. When decoder switches to input @@ -2012,13 +2080,32 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } /* Decode window size. */ - s->window_bits = DecodeWindowBits(br); /* Reads 1..7 bits. */ - BROTLI_LOG_UINT(s->window_bits); - if (s->window_bits == 9) { - /* Value 9 is reserved for future use. */ + result = DecodeWindowBits(s, br); /* Reads 1..8 bits. */ + if (result != BROTLI_DECODER_SUCCESS) { + break; + } + if (s->large_window) { + s->state = BROTLI_STATE_LARGE_WINDOW_BITS; + break; + } + s->state = BROTLI_STATE_INITIALIZE; + break; + + case BROTLI_STATE_LARGE_WINDOW_BITS: + if (!BrotliSafeReadBits(br, 6, &s->window_bits)) { + result = BROTLI_DECODER_NEEDS_MORE_INPUT; + break; + } + if (s->window_bits < BROTLI_LARGE_MIN_WBITS || + s->window_bits > BROTLI_LARGE_MAX_WBITS) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS); break; } + s->state = BROTLI_STATE_INITIALIZE; + /* No break, continue to next state */ + + case BROTLI_STATE_INITIALIZE: + BROTLI_LOG_UINT(s->window_bits); /* Maximum distance, see section 9.1. of the spec. */ s->max_backward_distance = (1 << s->window_bits) - BROTLI_WINDOW_GAP; @@ -2034,14 +2121,16 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->block_type_trees + 3 * BROTLI_HUFFMAN_MAX_SIZE_258; s->state = BROTLI_STATE_METABLOCK_BEGIN; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_METABLOCK_BEGIN: BrotliDecoderStateMetablockBegin(s); BROTLI_LOG_UINT(s->pos); s->state = BROTLI_STATE_METABLOCK_HEADER; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_METABLOCK_HEADER: - result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */ + result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */ if (result != BROTLI_DECODER_SUCCESS) { break; } @@ -2071,6 +2160,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->loop_counter = 0; s->state = BROTLI_STATE_HUFFMAN_CODE_0; break; + case BROTLI_STATE_UNCOMPRESSED: { result = CopyUncompressedBlockToOutput( available_out, next_out, total_out, s); @@ -2080,6 +2170,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_METABLOCK_DONE; break; } + case BROTLI_STATE_METADATA: for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) { uint32_t bits; @@ -2093,6 +2184,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_METABLOCK_DONE; } break; + case BROTLI_STATE_HUFFMAN_CODE_0: if (s->loop_counter >= 3) { s->state = BROTLI_STATE_METABLOCK_HEADER_2; @@ -2110,23 +2202,28 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } s->state = BROTLI_STATE_HUFFMAN_CODE_1; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_HUFFMAN_CODE_1: { + uint32_t alphabet_size = s->num_block_types[s->loop_counter] + 2; int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_258; - result = ReadHuffmanCode(s->num_block_types[s->loop_counter] + 2, + result = ReadHuffmanCode(alphabet_size, alphabet_size, &s->block_type_trees[tree_offset], NULL, s); if (result != BROTLI_DECODER_SUCCESS) break; s->state = BROTLI_STATE_HUFFMAN_CODE_2; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_HUFFMAN_CODE_2: { + uint32_t alphabet_size = BROTLI_NUM_BLOCK_LEN_SYMBOLS; int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26; - result = ReadHuffmanCode(BROTLI_NUM_BLOCK_LEN_SYMBOLS, + result = ReadHuffmanCode(alphabet_size, alphabet_size, &s->block_len_trees[tree_offset], NULL, s); if (result != BROTLI_DECODER_SUCCESS) break; s->state = BROTLI_STATE_HUFFMAN_CODE_3; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_HUFFMAN_CODE_3: { int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26; if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter], @@ -2139,6 +2236,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_HUFFMAN_CODE_0; break; } + case BROTLI_STATE_METABLOCK_HEADER_2: { uint32_t bits; if (!BrotliSafeReadBits(br, 6, &bits)) { @@ -2160,15 +2258,17 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } s->loop_counter = 0; s->state = BROTLI_STATE_CONTEXT_MODES; - /* No break, continue to next state */ + /* No break, continue to next state. */ } + case BROTLI_STATE_CONTEXT_MODES: result = ReadContextModes(s); if (result != BROTLI_DECODER_SUCCESS) { break; } s->state = BROTLI_STATE_CONTEXT_MAP_1; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_CONTEXT_MAP_1: result = DecodeContextMap( s->num_block_types[0] << BROTLI_LITERAL_CONTEXT_BITS, @@ -2178,54 +2278,54 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } DetectTrivialLiteralBlockTypes(s); s->state = BROTLI_STATE_CONTEXT_MAP_2; - /* No break, continue to next state */ - case BROTLI_STATE_CONTEXT_MAP_2: - { - uint32_t num_distance_codes = s->num_direct_distance_codes + - ((2 * BROTLI_MAX_DISTANCE_BITS) << s->distance_postfix_bits); - BROTLI_BOOL allocation_success = BROTLI_TRUE; - result = DecodeContextMap( - s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS, - &s->num_dist_htrees, &s->dist_context_map, s); - if (result != BROTLI_DECODER_SUCCESS) { - break; - } - allocation_success &= BrotliDecoderHuffmanTreeGroupInit( - s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS, - s->num_literal_htrees); - allocation_success &= BrotliDecoderHuffmanTreeGroupInit( - s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS, - s->num_block_types[1]); - allocation_success &= BrotliDecoderHuffmanTreeGroupInit( - s, &s->distance_hgroup, num_distance_codes, - s->num_dist_htrees); - if (!allocation_success) { - return SaveErrorCode(s, - BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS)); - } + /* No break, continue to next state. */ + + case BROTLI_STATE_CONTEXT_MAP_2: { + uint32_t num_direct_codes = + s->num_direct_distance_codes - BROTLI_NUM_DISTANCE_SHORT_CODES; + uint32_t num_distance_codes = BROTLI_DISTANCE_ALPHABET_SIZE( + num_direct_codes, s->distance_postfix_bits, + (s->large_window ? BROTLI_LARGE_MAX_DISTANCE_BITS : + BROTLI_MAX_DISTANCE_BITS)); + uint32_t max_distance_symbol = (s->large_window ? + BrotliMaxDistanceSymbol( + num_direct_codes, s->distance_postfix_bits) : + num_distance_codes); + BROTLI_BOOL allocation_success = BROTLI_TRUE; + result = DecodeContextMap( + s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS, + &s->num_dist_htrees, &s->dist_context_map, s); + if (result != BROTLI_DECODER_SUCCESS) { + break; + } + allocation_success &= BrotliDecoderHuffmanTreeGroupInit( + s, &s->literal_hgroup, BROTLI_NUM_LITERAL_SYMBOLS, + BROTLI_NUM_LITERAL_SYMBOLS, s->num_literal_htrees); + allocation_success &= BrotliDecoderHuffmanTreeGroupInit( + s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS, + BROTLI_NUM_COMMAND_SYMBOLS, s->num_block_types[1]); + allocation_success &= BrotliDecoderHuffmanTreeGroupInit( + s, &s->distance_hgroup, num_distance_codes, + max_distance_symbol, s->num_dist_htrees); + if (!allocation_success) { + return SaveErrorCode(s, + BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS)); } s->loop_counter = 0; s->state = BROTLI_STATE_TREE_GROUP; - /* No break, continue to next state */ - case BROTLI_STATE_TREE_GROUP: - { - HuffmanTreeGroup* hgroup = NULL; - switch (s->loop_counter) { - case 0: - hgroup = &s->literal_hgroup; - break; - case 1: - hgroup = &s->insert_copy_hgroup; - break; - case 2: - hgroup = &s->distance_hgroup; - break; - default: - return SaveErrorCode(s, BROTLI_FAILURE( - BROTLI_DECODER_ERROR_UNREACHABLE)); - } - result = HuffmanTreeGroupDecode(hgroup, s); + /* No break, continue to next state. */ + } + + case BROTLI_STATE_TREE_GROUP: { + HuffmanTreeGroup* hgroup = NULL; + switch (s->loop_counter) { + case 0: hgroup = &s->literal_hgroup; break; + case 1: hgroup = &s->insert_copy_hgroup; break; + case 2: hgroup = &s->distance_hgroup; break; + default: return SaveErrorCode(s, BROTLI_FAILURE( + BROTLI_DECODER_ERROR_UNREACHABLE)); } + result = HuffmanTreeGroupDecode(hgroup, s); if (result != BROTLI_DECODER_SUCCESS) break; s->loop_counter++; if (s->loop_counter >= 3) { @@ -2239,6 +2339,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_COMMAND_BEGIN; } break; + } + case BROTLI_STATE_COMMAND_BEGIN: case BROTLI_STATE_COMMAND_INNER: case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS: @@ -2248,6 +2350,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( result = SafeProcessCommands(s); } break; + case BROTLI_STATE_COMMAND_INNER_WRITE: case BROTLI_STATE_COMMAND_POST_WRITE_1: case BROTLI_STATE_COMMAND_POST_WRITE_2: @@ -2262,7 +2365,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) { if (s->meta_block_remaining_len == 0) { - /* Next metablock, if any */ + /* Next metablock, if any. */ s->state = BROTLI_STATE_METABLOCK_DONE; } else { s->state = BROTLI_STATE_COMMAND_BEGIN; @@ -2282,6 +2385,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s->state = BROTLI_STATE_COMMAND_INNER; } break; + case BROTLI_STATE_METABLOCK_DONE: if (s->meta_block_remaining_len < 0) { result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2); @@ -2302,7 +2406,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream( *next_in = br->next_in; } s->state = BROTLI_STATE_DONE; - /* No break, continue to next state */ + /* No break, continue to next state. */ + case BROTLI_STATE_DONE: if (s->ringbuffer != 0) { result = WriteRingBuffer( diff --git a/c/dec/huffman.c b/c/dec/huffman.c index 4fe7bfa..f142442 100644 --- a/c/dec/huffman.c +++ b/c/dec/huffman.c @@ -86,9 +86,9 @@ static BROTLI_INLINE void ReplicateValue(HuffmanCode* table, } while (end > 0); } -/* Returns the table width of the next 2nd level table. count is the histogram - of bit lengths for the remaining symbols, len is the code length of the next - processed symbol */ +/* Returns the table width of the next 2nd level table. |count| is the histogram + of bit lengths for the remaining symbols, |len| is the code length of the + next processed symbol. */ static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count, int len, int root_bits) { int left = 1 << (len - root_bits); @@ -118,7 +118,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <= BROTLI_REVERSE_BITS_MAX); - /* generate offsets into sorted symbol table by code length */ + /* Generate offsets into sorted symbol table by code length. */ symbol = -1; bits = 1; BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, { @@ -129,7 +129,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, /* Symbols with code length 0 are placed after all other symbols. */ offset[0] = BROTLI_CODE_LENGTH_CODES - 1; - /* sort symbols by length, by symbol order within each length */ + /* Sort symbols by length, by symbol order within each length. */ symbol = BROTLI_CODE_LENGTH_CODES; do { BROTLI_REPEAT(6, { @@ -150,7 +150,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, return; } - /* fill in table */ + /* Fill in table. */ key = 0; key_step = BROTLI_REVERSE_BITS_LOWEST; symbol = 0; @@ -200,9 +200,8 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, table_size = 1 << table_bits; total_size = table_size; - /* fill in root table */ - /* let's reduce the table size to a smaller size if possible, and */ - /* create the repetitions by memcpy if possible in the coming loop */ + /* Fill in the root table. Reduce the table size to if possible, + and create the repetitions by memcpy. */ if (table_bits > max_length) { table_bits = max_length; table_size = 1 << table_bits; @@ -224,15 +223,14 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, key_step >>= 1; } while (++bits <= table_bits); - /* if root_bits != table_bits we only created one fraction of the */ - /* table, and we need to replicate it now. */ + /* If root_bits != table_bits then replicate to fill the remaining slots. */ while (total_size != table_size) { memcpy(&table[table_size], &table[0], (size_t)table_size * sizeof(table[0])); table_size <<= 1; } - /* fill in 2nd level tables and add pointers to root table */ + /* Fill in 2nd level tables and add pointers to root table. */ key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1); sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1); sub_key_step = BROTLI_REVERSE_BITS_LOWEST; diff --git a/c/dec/huffman.h b/c/dec/huffman.h index 730af88..521ec6e 100644 --- a/c/dec/huffman.h +++ b/c/dec/huffman.h @@ -19,10 +19,11 @@ extern "C" { #define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15 /* Maximum possible Huffman table size for an alphabet size of (index * 32), - * max code length 15 and root table bits 8. */ + max code length 15 and root table bits 8. */ static const uint16_t kMaxHuffmanTableSize[] = { 256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822, - 854, 886, 920, 952, 984, 1016, 1048, 1080}; + 854, 886, 920, 952, 984, 1016, 1048, 1080, 1112, 1144, 1176, 1208, 1240, 1272, + 1304, 1336, 1368, 1400, 1432, 1464, 1496, 1528}; /* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */ #define BROTLI_HUFFMAN_MAX_SIZE_26 396 /* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */ @@ -41,23 +42,26 @@ typedef struct { BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table, const uint8_t* const code_lengths, uint16_t* count); -/* Builds Huffman lookup table assuming code lengths are in symbol order. */ -/* Returns size of resulting table. */ +/* Builds Huffman lookup table assuming code lengths are in symbol order. + Returns size of resulting table. */ BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, int root_bits, const uint16_t* const symbol_lists, uint16_t* count_arg); -/* Builds a simple Huffman table. The num_symbols parameter is to be */ -/* interpreted as follows: 0 means 1 symbol, 1 means 2 symbols, 2 means 3 */ -/* symbols, 3 means 4 symbols with lengths 2,2,2,2, 4 means 4 symbols with */ -/* lengths 1,2,3,3. */ +/* Builds a simple Huffman table. The |num_symbols| parameter is to be + interpreted as follows: 0 means 1 symbol, 1 means 2 symbols, + 2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2], + 4 means 4 symbols with lengths [1, 2, 3, 3]. */ BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table, int root_bits, uint16_t* symbols, uint32_t num_symbols); /* Contains a collection of Huffman trees with the same alphabet size. */ +/* max_symbol is needed due to simple codes since log2(alphabet_size) could be + greater than log2(max_symbol). */ typedef struct { HuffmanCode** htrees; HuffmanCode* codes; uint16_t alphabet_size; + uint16_t max_symbol; uint16_t num_htrees; } HuffmanTreeGroup; diff --git a/c/dec/prefix.h b/c/dec/prefix.h index aa776c7..3ea062d 100644 --- a/c/dec/prefix.h +++ b/c/dec/prefix.h @@ -5,8 +5,7 @@ */ /* Lookup tables to map prefix codes to value ranges. This is used during - decoding of the block lengths, literal insertion lengths and copy lengths. -*/ + decoding of the block lengths, literal insertion lengths and copy lengths. */ #ifndef BROTLI_DEC_PREFIX_H_ #define BROTLI_DEC_PREFIX_H_ @@ -14,8 +13,8 @@ #include "../common/constants.h" #include <brotli/types.h> -/* Represents the range of values belonging to a prefix code: */ -/* [offset, offset + 2^nbits) */ +/* Represents the range of values belonging to a prefix code: + [offset, offset + 2^nbits) */ struct PrefixCodeRange { uint16_t offset; uint8_t nbits; diff --git a/c/dec/state.c b/c/dec/state.c index eaec823..e0b37c2 100644 --- a/c/dec/state.c +++ b/c/dec/state.c @@ -15,25 +15,11 @@ extern "C" { #endif -static void* DefaultAllocFunc(void* opaque, size_t size) { - BROTLI_UNUSED(opaque); - return malloc(size); -} - -static void DefaultFreeFunc(void* opaque, void* address) { - BROTLI_UNUSED(opaque); - free(address); -} - -void BrotliDecoderStateInit(BrotliDecoderState* s) { - BrotliDecoderStateInitWithCustomAllocators(s, 0, 0, 0); -} - -void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s, +BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { if (!alloc_func) { - s->alloc_func = DefaultAllocFunc; - s->free_func = DefaultFreeFunc; + s->alloc_func = BrotliDefaultAllocFunc; + s->free_func = BrotliDefaultFreeFunc; s->memory_manager_opaque = 0; } else { s->alloc_func = alloc_func; @@ -45,6 +31,7 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s, BrotliInitBitReader(&s->br); s->state = BROTLI_STATE_UNINITED; + s->large_window = 0; s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE; s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE; s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE; @@ -103,13 +90,16 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s, s->mtf_upper_bound = 63; s->dictionary = BrotliGetDictionary(); + s->transforms = BrotliGetTransforms(); + + return BROTLI_TRUE; } void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) { s->meta_block_remaining_len = 0; - s->block_length[0] = 1U << 28; - s->block_length[1] = 1U << 28; - s->block_length[2] = 1U << 28; + s->block_length[0] = 1U << 24; + s->block_length[1] = 1U << 24; + s->block_length[2] = 1U << 24; s->num_block_types[0] = 1; s->num_block_types[1] = 1; s->num_block_types[2] = 1; @@ -126,8 +116,7 @@ void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) { s->literal_htree = NULL; s->dist_context_map_slice = NULL; s->dist_htree_index = 0; - s->context_lookup1 = NULL; - s->context_lookup2 = NULL; + s->context_lookup = NULL; s->literal_hgroup.codes = NULL; s->literal_hgroup.htrees = NULL; s->insert_copy_hgroup.codes = NULL; @@ -153,7 +142,8 @@ void BrotliDecoderStateCleanup(BrotliDecoderState* s) { } BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s, - HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t ntrees) { + HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t max_symbol, + uint32_t ntrees) { /* Pack two allocations into one */ const size_t max_table_size = kMaxHuffmanTableSize[(alphabet_size + 31) >> 5]; const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size; @@ -162,6 +152,7 @@ BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s, HuffmanCode** p = (HuffmanCode**)BROTLI_DECODER_ALLOC(s, code_size + htree_size); group->alphabet_size = (uint16_t)alphabet_size; + group->max_symbol = (uint16_t)max_symbol; group->num_htrees = (uint16_t)ntrees; group->htrees = p; group->codes = (HuffmanCode*)(&p[ntrees]); diff --git a/c/dec/state.h b/c/dec/state.h index 069beca..d28b639 100644 --- a/c/dec/state.h +++ b/c/dec/state.h @@ -12,6 +12,7 @@ #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" +#include "../common/transform.h" #include <brotli/types.h> #include "./bit_reader.h" #include "./huffman.h" @@ -22,6 +23,8 @@ extern "C" { typedef enum { BROTLI_STATE_UNINITED, + BROTLI_STATE_LARGE_WINDOW_BITS, + BROTLI_STATE_INITIALIZE, BROTLI_STATE_METABLOCK_BEGIN, BROTLI_STATE_METABLOCK_HEADER, BROTLI_STATE_METABLOCK_HEADER_2, @@ -126,23 +129,22 @@ struct BrotliDecoderStateStruct { uint8_t* ringbuffer; uint8_t* ringbuffer_end; HuffmanCode* htree_command; - const uint8_t* context_lookup1; - const uint8_t* context_lookup2; + const uint8_t* context_lookup; uint8_t* context_map_slice; uint8_t* dist_context_map_slice; - /* This ring buffer holds a few past copy distances that will be used by */ - /* some special distance codes. */ + /* This ring buffer holds a few past copy distances that will be used by + some special distance codes. */ HuffmanTreeGroup literal_hgroup; HuffmanTreeGroup insert_copy_hgroup; HuffmanTreeGroup distance_hgroup; HuffmanCode* block_type_trees; HuffmanCode* block_len_trees; /* This is true if the literal context map histogram type always matches the - block type. It is then not needed to keep the context (faster decoding). */ + block type. It is then not needed to keep the context (faster decoding). */ int trivial_literal_context; - /* Distance context is actual after command is decoded and before distance - is computed. After distance computation it is used as a temporary variable. */ + /* Distance context is actual after command is decoded and before distance is + computed. After distance computation it is used as a temporary variable. */ int distance_context; int meta_block_remaining_len; uint32_t block_length_index; @@ -162,11 +164,11 @@ struct BrotliDecoderStateStruct { int copy_length; int distance_code; - /* For partial write operations */ - size_t rb_roundtrips; /* How many times we went around the ring-buffer */ - size_t partial_pos_out; /* How much output to the user in total */ + /* For partial write operations. */ + size_t rb_roundtrips; /* how many times we went around the ring-buffer */ + size_t partial_pos_out; /* how much output to the user in total */ - /* For ReadHuffmanCode */ + /* For ReadHuffmanCode. */ uint32_t symbol; uint32_t repeat; uint32_t space; @@ -180,25 +182,26 @@ struct BrotliDecoderStateStruct { /* Tails of symbol chains. */ int next_symbol[32]; uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES]; - /* Population counts for the code lengths */ + /* Population counts for the code lengths. */ uint16_t code_length_histo[16]; - /* For HuffmanTreeGroupDecode */ + /* For HuffmanTreeGroupDecode. */ int htree_index; HuffmanCode* next; - /* For DecodeContextMap */ + /* For DecodeContextMap. */ uint32_t context_index; uint32_t max_run_length_prefix; uint32_t code; HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272]; - /* For InverseMoveToFrontTransform */ + /* For InverseMoveToFrontTransform. */ uint32_t mtf_upper_bound; uint32_t mtf[64 + 1]; - /* less used attributes are in the end of this struct */ - /* States inside function calls */ + /* Less used attributes are at the end of this struct. */ + + /* States inside function calls. */ BrotliRunningMetablockHeaderState substate_metablock_header; BrotliRunningTreeGroupState substate_tree_group; BrotliRunningContextMapState substate_context_map; @@ -212,6 +215,7 @@ struct BrotliDecoderStateStruct { unsigned int is_metadata : 1; unsigned int should_wrap_ringbuffer : 1; unsigned int canny_ringbuffer_allocation : 1; + unsigned int large_window : 1; unsigned int size_nibbles : 8; uint32_t window_bits; @@ -222,6 +226,7 @@ struct BrotliDecoderStateStruct { uint8_t* context_modes; const BrotliDictionary* dictionary; + const BrotliTransforms* transforms; uint32_t trivial_literal_contexts[8]; /* 256 bits */ }; @@ -229,17 +234,15 @@ struct BrotliDecoderStateStruct { typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal; #define BrotliDecoderState BrotliDecoderStateInternal -BROTLI_INTERNAL void BrotliDecoderStateInit(BrotliDecoderState* s); -BROTLI_INTERNAL void BrotliDecoderStateInitWithCustomAllocators( - BrotliDecoderState* s, brotli_alloc_func alloc_func, - brotli_free_func free_func, void* opaque); +BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, + brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque); BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s); BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s); BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock( BrotliDecoderState* s); BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit( BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size, - uint32_t ntrees); + uint32_t max_symbol, uint32_t ntrees); #define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L) diff --git a/c/dec/transform.h b/c/dec/transform.h deleted file mode 100644 index e1d96ff..0000000 --- a/c/dec/transform.h +++ /dev/null @@ -1,300 +0,0 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Transformations on dictionary words. */ - -#ifndef BROTLI_DEC_TRANSFORM_H_ -#define BROTLI_DEC_TRANSFORM_H_ - -#include "../common/platform.h" -#include <brotli/types.h> - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -enum WordTransformType { - kIdentity = 0, - kOmitLast1 = 1, - kOmitLast2 = 2, - kOmitLast3 = 3, - kOmitLast4 = 4, - kOmitLast5 = 5, - kOmitLast6 = 6, - kOmitLast7 = 7, - kOmitLast8 = 8, - kOmitLast9 = 9, - kUppercaseFirst = 10, - kUppercaseAll = 11, - kOmitFirst1 = 12, - kOmitFirst2 = 13, - kOmitFirst3 = 14, - kOmitFirst4 = 15, - kOmitFirst5 = 16, - kOmitFirst6 = 17, - kOmitFirst7 = 18, - kOmitFirst8 = 19, - kOmitFirst9 = 20 -}; - -typedef struct { - const uint8_t prefix_id; - const uint8_t transform; - const uint8_t suffix_id; -} Transform; - -static const char kPrefixSuffix[208] = - "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0" - " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0" - " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0" - " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous "; - -enum { - /* EMPTY = "" - SP = " " - DQUOT = "\"" - SQUOT = "'" - CLOSEBR = "]" - OPEN = "(" - SLASH = "/" - NBSP = non-breaking space "\0xc2\xa0" - */ - kPFix_EMPTY = 0, - kPFix_SP = 1, - kPFix_COMMASP = 3, - kPFix_SPofSPtheSP = 6, - kPFix_SPtheSP = 9, - kPFix_eSP = 12, - kPFix_SPofSP = 15, - kPFix_sSP = 20, - kPFix_DOT = 23, - kPFix_SPandSP = 25, - kPFix_SPinSP = 31, - kPFix_DQUOT = 36, - kPFix_SPtoSP = 38, - kPFix_DQUOTGT = 43, - kPFix_NEWLINE = 46, - kPFix_DOTSP = 48, - kPFix_CLOSEBR = 51, - kPFix_SPforSP = 53, - kPFix_SPaSP = 59, - kPFix_SPthatSP = 63, - kPFix_SQUOT = 70, - kPFix_SPwithSP = 72, - kPFix_SPfromSP = 79, - kPFix_SPbySP = 86, - kPFix_OPEN = 91, - kPFix_DOTSPTheSP = 93, - kPFix_SPonSP = 100, - kPFix_SPasSP = 105, - kPFix_SPisSP = 110, - kPFix_ingSP = 115, - kPFix_NEWLINETAB = 120, - kPFix_COLON = 123, - kPFix_edSP = 125, - kPFix_EQDQUOT = 129, - kPFix_SPatSP = 132, - kPFix_lySP = 137, - kPFix_COMMA = 141, - kPFix_EQSQUOT = 143, - kPFix_DOTcomSLASH = 146, - kPFix_DOTSPThisSP = 152, - kPFix_SPnotSP = 160, - kPFix_erSP = 166, - kPFix_alSP = 170, - kPFix_fulSP = 174, - kPFix_iveSP = 179, - kPFix_lessSP = 184, - kPFix_estSP = 190, - kPFix_izeSP = 195, - kPFix_NBSP = 200, - kPFix_ousSP = 203 -}; - -static const Transform kTransforms[] = { - { kPFix_EMPTY, kIdentity, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SP }, - { kPFix_SP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_SPtheSP }, - { kPFix_SP, kIdentity, kPFix_EMPTY }, - { kPFix_sSP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_SPofSP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPandSP }, - { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY }, - { kPFix_COMMASP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_COMMASP }, - { kPFix_SP, kUppercaseFirst, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_SPinSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPtoSP }, - { kPFix_eSP, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_DQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_DOT }, - { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT }, - { kPFix_EMPTY, kIdentity, kPFix_NEWLINE }, - { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR }, - { kPFix_EMPTY, kIdentity, kPFix_SPforSP }, - { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPaSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPthatSP }, - { kPFix_SP, kUppercaseFirst, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_DOTSP }, - { kPFix_DOT, kIdentity, kPFix_EMPTY }, - { kPFix_SP, kIdentity, kPFix_COMMASP }, - { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPwithSP }, - { kPFix_EMPTY, kIdentity, kPFix_SQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_SPfromSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPbySP }, - { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY }, - { kPFix_SPtheSP, kIdentity, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPonSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPasSP }, - { kPFix_EMPTY, kIdentity, kPFix_SPisSP }, - { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast1, kPFix_ingSP }, - { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB }, - { kPFix_EMPTY, kIdentity, kPFix_COLON }, - { kPFix_SP, kIdentity, kPFix_DOTSP }, - { kPFix_EMPTY, kIdentity, kPFix_edSP }, - { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP }, - { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_SPatSP }, - { kPFix_EMPTY, kIdentity, kPFix_lySP }, - { kPFix_SPtheSP, kIdentity, kPFix_SPofSP }, - { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY }, - { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY }, - { kPFix_SP, kUppercaseFirst, kPFix_COMMASP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT }, - { kPFix_DOT, kIdentity, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseAll, kPFix_SP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT }, - { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT }, - { kPFix_SP, kIdentity, kPFix_DOT }, - { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY }, - { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP }, - { kPFix_EMPTY, kIdentity, kPFix_COMMA }, - { kPFix_DOT, kIdentity, kPFix_SP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT }, - { kPFix_EMPTY, kIdentity, kPFix_SPnotSP }, - { kPFix_SP, kIdentity, kPFix_EQDQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_erSP }, - { kPFix_SP, kUppercaseAll, kPFix_SP }, - { kPFix_EMPTY, kIdentity, kPFix_alSP }, - { kPFix_SP, kUppercaseAll, kPFix_EMPTY }, - { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP }, - { kPFix_SP, kIdentity, kPFix_OPEN }, - { kPFix_EMPTY, kIdentity, kPFix_fulSP }, - { kPFix_SP, kUppercaseFirst, kPFix_DOTSP }, - { kPFix_EMPTY, kIdentity, kPFix_iveSP }, - { kPFix_EMPTY, kIdentity, kPFix_lessSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_estSP }, - { kPFix_SP, kUppercaseFirst, kPFix_DOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT }, - { kPFix_SP, kIdentity, kPFix_EQSQUOT }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA }, - { kPFix_EMPTY, kIdentity, kPFix_izeSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DOT }, - { kPFix_NBSP, kIdentity, kPFix_EMPTY }, - { kPFix_SP, kIdentity, kPFix_COMMA }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT }, - { kPFix_EMPTY, kIdentity, kPFix_ousSP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP }, - { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT }, - { kPFix_SP, kUppercaseFirst, kPFix_COMMA }, - { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT }, - { kPFix_SP, kUppercaseAll, kPFix_COMMASP }, - { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA }, - { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN }, - { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP }, - { kPFix_SP, kUppercaseAll, kPFix_DOT }, - { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT }, - { kPFix_SP, kUppercaseAll, kPFix_DOTSP }, - { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT }, - { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT }, - { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT }, -}; - -static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]); - -static int ToUpperCase(uint8_t* p) { - if (p[0] < 0xc0) { - if (p[0] >= 'a' && p[0] <= 'z') { - p[0] ^= 32; - } - return 1; - } - /* An overly simplified uppercasing model for UTF-8. */ - if (p[0] < 0xe0) { - p[1] ^= 32; - return 2; - } - /* An arbitrary transform for three byte characters. */ - p[2] ^= 5; - return 3; -} - -static BROTLI_NOINLINE int BrotliTransformDictionaryWord( - uint8_t* dst, const uint8_t* word, int len, int transform) { - int idx = 0; - { - const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id]; - while (*prefix) { dst[idx++] = (uint8_t)*prefix++; } - } - { - const int t = kTransforms[transform].transform; - int i = 0; - int skip = t - (kOmitFirst1 - 1); - if (skip > 0) { - word += skip; - len -= skip; - } else if (t <= kOmitLast9) { - len -= t; - } - while (i < len) { dst[idx++] = word[i++]; } - if (t == kUppercaseFirst) { - ToUpperCase(&dst[idx - len]); - } else if (t == kUppercaseAll) { - uint8_t* uppercase = &dst[idx - len]; - while (len > 0) { - int step = ToUpperCase(uppercase); - uppercase += step; - len -= step; - } - } - } - { - const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id]; - while (*suffix) { dst[idx++] = (uint8_t)*suffix++; } - return idx; - } -} - -#if defined(__cplusplus) || defined(c_plusplus) -} /* extern "C" */ -#endif - -#endif /* BROTLI_DEC_TRANSFORM_H_ */ diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c index cce0cd4..62ecea7 100644 --- a/c/enc/backward_references.c +++ b/c/enc/backward_references.c @@ -102,23 +102,16 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance, #undef CAT #undef EXPAND_CAT -void BrotliCreateBackwardReferences(const BrotliDictionary* dictionary, - size_t num_bytes, - size_t position, - const uint8_t* ringbuffer, - size_t ringbuffer_mask, - const BrotliEncoderParams* params, - HasherHandle hasher, - int* dist_cache, - size_t* last_insert_len, - Command* commands, - size_t* num_commands, - size_t* num_literals) { +void BrotliCreateBackwardReferences( + size_t num_bytes, size_t position, const uint8_t* ringbuffer, + size_t ringbuffer_mask, const BrotliEncoderParams* params, + HasherHandle hasher, int* dist_cache, size_t* last_insert_len, + Command* commands, size_t* num_commands, size_t* num_literals) { switch (params->hasher.type) { #define CASE_(N) \ case N: \ - CreateBackwardReferencesNH ## N(dictionary, \ - kStaticDictionaryHash, num_bytes, position, ringbuffer, \ + CreateBackwardReferencesNH ## N( \ + num_bytes, position, ringbuffer, \ ringbuffer_mask, params, hasher, dist_cache, \ last_insert_len, commands, num_commands, num_literals); \ return; diff --git a/c/enc/backward_references.h b/c/enc/backward_references.h index 631c2f6..3a41466 100644 --- a/c/enc/backward_references.h +++ b/c/enc/backward_references.h @@ -26,7 +26,6 @@ extern "C" { CreateBackwardReferences calls, and must be incremented by the amount written by this call. */ BROTLI_INTERNAL void BrotliCreateBackwardReferences( - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c index b05cb4f..f2f9918 100644 --- a/c/enc/backward_references_hq.c +++ b/c/enc/backward_references_hq.c @@ -25,6 +25,10 @@ extern "C" { #endif +#define BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE ( \ + BROTLI_NUM_DISTANCE_SHORT_CODES + (2 * BROTLI_LARGE_MAX_DISTANCE_BITS)) +/* BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE == 74 */ + static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */ static const uint32_t kDistanceCacheIndex[] = { @@ -39,40 +43,40 @@ void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) { size_t i; stub.length = 1; stub.distance = 0; - stub.insert_length = 0; + stub.dcode_insert_length = 0; stub.u.cost = kInfinity; for (i = 0; i < length; ++i) array[i] = stub; } static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) { - return self->length & 0xffffff; + return self->length & 0x1FFFFFF; } static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) { - const uint32_t modifier = self->length >> 24; + const uint32_t modifier = self->length >> 25; return ZopfliNodeCopyLength(self) + 9u - modifier; } static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) { - return self->distance & 0x7ffffff; + return self->distance; } static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) { - const uint32_t short_code = self->distance >> 27; + const uint32_t short_code = self->dcode_insert_length >> 27; return short_code == 0 ? ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 : short_code - 1; } static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) { - return ZopfliNodeCopyLength(self) + self->insert_length; + return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF); } /* Histogram based cost model for zopflification. */ typedef struct ZopfliCostModel { /* The insert and copy length symbols. */ float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS]; - float cost_dist_[BROTLI_NUM_DISTANCE_SYMBOLS]; + float cost_dist_[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE]; /* Cumulative costs of literals per position in the stream. */ float* literal_costs_; float min_cost_cmd_; @@ -91,17 +95,26 @@ static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) { } static void SetCost(const uint32_t* histogram, size_t histogram_size, - float* cost) { + BROTLI_BOOL literal_histogram, float* cost) { size_t sum = 0; + size_t missing_symbol_sum; float log2sum; + float missing_symbol_cost; size_t i; for (i = 0; i < histogram_size; i++) { sum += histogram[i]; } log2sum = (float)FastLog2(sum); + missing_symbol_sum = sum; + if (!literal_histogram) { + for (i = 0; i < histogram_size; i++) { + if (histogram[i] == 0) missing_symbol_sum++; + } + } + missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2; for (i = 0; i < histogram_size; i++) { if (histogram[i] == 0) { - cost[i] = log2sum + 2; + cost[i] = missing_symbol_cost; continue; } @@ -122,7 +135,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, size_t last_insert_len) { uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS]; uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS]; - uint32_t histogram_dist[BROTLI_NUM_DISTANCE_SYMBOLS]; + uint32_t histogram_dist[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE]; float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS]; size_t pos = position - last_insert_len; float min_cost_cmd = kInfinity; @@ -136,7 +149,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, for (i = 0; i < num_commands; i++) { size_t inslength = commands[i].insert_len_; size_t copylength = CommandCopyLen(&commands[i]); - size_t distcode = commands[i].dist_prefix_; + size_t distcode = commands[i].dist_prefix_ & 0x3FF; size_t cmdcode = commands[i].cmd_prefix_; size_t j; @@ -150,9 +163,12 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, pos += inslength + copylength; } - SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, cost_literal); - SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, cost_cmd); - SetCost(histogram_dist, BROTLI_NUM_DISTANCE_SYMBOLS, self->cost_dist_); + SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE, + cost_literal); + SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE, + cost_cmd); + SetCost(histogram_dist, BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE, BROTLI_FALSE, + self->cost_dist_); for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) { min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]); @@ -161,11 +177,14 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, { float* literal_costs = self->literal_costs_; + float literal_carry = 0.0; size_t num_bytes = self->num_bytes_; literal_costs[0] = 0.0; for (i = 0; i < num_bytes; ++i) { - literal_costs[i + 1] = literal_costs[i] + + literal_carry += cost_literal[ringbuffer[(position + i) & ringbuffer_mask]]; + literal_costs[i + 1] = literal_costs[i] + literal_carry; + literal_carry -= literal_costs[i + 1] - literal_costs[i]; } } } @@ -175,6 +194,7 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self, const uint8_t* ringbuffer, size_t ringbuffer_mask) { float* literal_costs = self->literal_costs_; + float literal_carry = 0.0; float* cost_dist = self->cost_dist_; float* cost_cmd = self->cost_cmd_; size_t num_bytes = self->num_bytes_; @@ -183,12 +203,14 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self, ringbuffer, &literal_costs[1]); literal_costs[0] = 0.0; for (i = 0; i < num_bytes; ++i) { - literal_costs[i + 1] += literal_costs[i]; + literal_carry += literal_costs[i + 1]; + literal_costs[i + 1] = literal_costs[i] + literal_carry; + literal_carry -= literal_costs[i + 1] - literal_costs[i]; } for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) { cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i); } - for (i = 0; i < BROTLI_NUM_DISTANCE_SYMBOLS; ++i) { + for (i = 0; i < BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE; ++i) { cost_dist[i] = (float)FastLog2(20 + (uint32_t)i); } self->min_cost_cmd_ = (float)FastLog2(11); @@ -221,9 +243,10 @@ static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos, size_t len, size_t len_code, size_t dist, size_t short_code, float cost) { ZopfliNode* next = &nodes[pos + len]; - next->length = (uint32_t)(len | ((len + 9u - len_code) << 24)); - next->distance = (uint32_t)(dist | (short_code << 27)); - next->insert_length = (uint32_t)(pos - start_pos); + next->length = (uint32_t)(len | ((len + 9u - len_code) << 25)); + next->distance = (uint32_t)dist; + next->dcode_insert_length = (uint32_t)( + (short_code << 27) | (pos - start_pos)); next->u.cost = cost; } @@ -303,7 +326,7 @@ static uint32_t ComputeDistanceShortcut(const size_t block_start, const size_t gap, const ZopfliNode* nodes) { const size_t clen = ZopfliNodeCopyLength(&nodes[pos]); - const size_t ilen = nodes[pos].insert_length; + const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF; const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]); /* Since |block_start + pos| is the end position of the command, the copy part starts from |block_start + pos - clen|. Distances that are greater than @@ -335,7 +358,7 @@ static void ComputeDistanceCache(const size_t pos, int idx = 0; size_t p = nodes[pos].u.shortcut; while (idx < 4 && p > 0) { - const size_t ilen = nodes[p].insert_length; + const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF; const size_t clen = ZopfliNodeCopyLength(&nodes[p]); const size_t dist = ZopfliNodeCopyDistance(&nodes[p]); dist_cache[idx++] = (int)dist; @@ -483,9 +506,9 @@ static size_t UpdateNodes( float dist_cost; size_t max_match_len; PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra); - distnumextra = distextra >> 24; + distnumextra = dist_symbol >> 10; dist_cost = base_cost + (float)distnumextra + - ZopfliCostModelGetDistanceCost(model, dist_symbol); + ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF); /* Try all copy lengths up until the maximum copy length corresponding to this distance. If the distance refers to the static dictionary, or @@ -517,7 +540,8 @@ static size_t ComputeShortestPathFromNodes(size_t num_bytes, ZopfliNode* nodes) { size_t index = num_bytes; size_t num_commands = 0; - while (nodes[index].insert_length == 0 && nodes[index].length == 1) --index; + while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 && + nodes[index].length == 1) --index; nodes[index].u.next = BROTLI_UINT32_MAX; while (index != 0) { size_t len = ZopfliNodeCommandLength(&nodes[index]); @@ -546,7 +570,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes, for (i = 0; offset != BROTLI_UINT32_MAX; i++) { const ZopfliNode* next = &nodes[pos + offset]; size_t copy_length = ZopfliNodeCopyLength(next); - size_t insert_length = next->insert_length; + size_t insert_length = next->dcode_insert_length & 0x7FFFFFF; pos += insert_length; offset = next->u.next; if (i == 0) { @@ -624,7 +648,6 @@ static size_t ZopfliIterate(size_t num_bytes, /* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, const size_t max_backward_limit, const int* dist_cache, HasherHandle hasher, @@ -649,9 +672,9 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, const size_t pos = position + i; const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit); size_t skip; - size_t num_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer, - ringbuffer_mask, pos, num_bytes - i, max_distance, gap, params, - &matches[lz_matches_offset]); + size_t num_matches = FindAllMatchesH10(hasher, ¶ms->dictionary, + ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance, gap, + params, &matches[lz_matches_offset]); if (num_matches > 0 && BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) { matches[0] = matches[num_matches - 1]; @@ -683,7 +706,6 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, } void BrotliCreateZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, @@ -693,7 +715,7 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m, nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1); if (BROTLI_IS_OOM(m)) return; BrotliInitZopfliNodes(nodes, num_bytes + 1); - *num_commands += BrotliZopfliComputeShortestPath(m, dictionary, + *num_commands += BrotliZopfliComputeShortestPath(m, num_bytes, position, ringbuffer, ringbuffer_mask, params, max_backward_limit, dist_cache, hasher, nodes); if (BROTLI_IS_OOM(m)) return; @@ -703,7 +725,6 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m, } void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, @@ -736,8 +757,8 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size, cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches); if (BROTLI_IS_OOM(m)) return; - num_found_matches = FindAllMatchesH10(hasher, dictionary, - ringbuffer, ringbuffer_mask, pos, max_length, + num_found_matches = FindAllMatchesH10(hasher, + ¶ms->dictionary, ringbuffer, ringbuffer_mask, pos, max_length, max_distance, gap, params, &matches[cur_match_pos + shadow_matches]); cur_match_end = cur_match_pos + num_found_matches; for (j = cur_match_pos; j + 1 < cur_match_end; ++j) { diff --git a/c/enc/backward_references_hq.h b/c/enc/backward_references_hq.h index cc19544..7c38bd6 100644 --- a/c/enc/backward_references_hq.h +++ b/c/enc/backward_references_hq.h @@ -23,29 +23,26 @@ extern "C" { #endif BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, Command* commands, size_t* num_commands, size_t* num_literals); BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, Command* commands, size_t* num_commands, size_t* num_literals); typedef struct ZopfliNode { - /* best length to get up to this byte (not including this byte itself) - highest 8 bit is used to reconstruct the length code */ + /* Best length to get up to this byte (not including this byte itself) + highest 7 bit is used to reconstruct the length code. */ uint32_t length; - /* distance associated with the length; highest 5 bits contain distance - short code + 1 (or zero if no short code); this way only distances shorter - than 128MiB are allowed here */ + /* Distance associated with the length. */ uint32_t distance; - /* number of literal inserts before this copy */ - uint32_t insert_length; + /* Number of literal inserts before this copy; highest 5 bits contain + distance short code + 1 (or zero if no short code). */ + uint32_t dcode_insert_length; /* This union holds information used by dynamic-programming. During forward pass |cost| it used to store the goal function. When node is processed its @@ -78,7 +75,6 @@ BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length); (2) nodes[i].command_length() <= i and (3) nodes[i - nodes[i].command_length()].cost < kInfinity */ BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, const size_t max_backward_limit, const int* dist_cache, HasherHandle hasher, diff --git a/c/enc/backward_references_inc.h b/c/enc/backward_references_inc.h index 0a715b2..967545d 100644 --- a/c/enc/backward_references_inc.h +++ b/c/enc/backward_references_inc.h @@ -8,8 +8,6 @@ /* template parameters: EXPORT_FN, FN */ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)( - const BrotliDictionary* dictionary, - const uint16_t* dictionary_hash, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, @@ -43,9 +41,10 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)( sr.len_code_delta = 0; sr.distance = 0; sr.score = kMinScore; - FN(FindLongestMatch)(hasher, dictionary, dictionary_hash, ringbuffer, - ringbuffer_mask, dist_cache, position, - max_length, max_distance, gap, &sr); + FN(FindLongestMatch)(hasher, ¶ms->dictionary, + ringbuffer, ringbuffer_mask, dist_cache, position, + max_length, max_distance, gap, + params->dist.max_distance, &sr); if (sr.score > kMinScore) { /* Found a match. Let's look for something even better ahead. */ int delayed_backward_references_in_row = 0; @@ -59,9 +58,9 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)( sr2.distance = 0; sr2.score = kMinScore; max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit); - FN(FindLongestMatch)(hasher, dictionary, dictionary_hash, + FN(FindLongestMatch)(hasher, ¶ms->dictionary, ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length, - max_distance, gap, &sr2); + max_distance, gap, params->dist.max_distance, &sr2); if (sr2.score >= sr.score + cost_diff_lazy) { /* Ok, let's just write one byte for now and start a match from the next byte. */ diff --git a/c/enc/bit_cost.h b/c/enc/bit_cost.h index e8b7013..6586469 100644 --- a/c/enc/bit_cost.h +++ b/c/enc/bit_cost.h @@ -18,11 +18,11 @@ extern "C" { #endif -static BROTLI_INLINE double ShannonEntropy(const uint32_t *population, - size_t size, size_t *total) { +static BROTLI_INLINE double ShannonEntropy( + const uint32_t* population, size_t size, size_t* total) { size_t sum = 0; double retval = 0; - const uint32_t *population_end = population + size; + const uint32_t* population_end = population + size; size_t p; if (size & 1) { goto odd_number_of_elements_left; @@ -42,7 +42,7 @@ static BROTLI_INLINE double ShannonEntropy(const uint32_t *population, } static BROTLI_INLINE double BitsEntropy( - const uint32_t *population, size_t size) { + const uint32_t* population, size_t size) { size_t sum; double retval = ShannonEntropy(population, size, &sum); if (retval < sum) { diff --git a/c/enc/block_encoder_inc.h b/c/enc/block_encoder_inc.h index 2a08f90..8cbd5ea 100644 --- a/c/enc/block_encoder_inc.h +++ b/c/enc/block_encoder_inc.h @@ -13,9 +13,9 @@ stream. */ static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self, const HistogramType* histograms, const size_t histograms_size, - HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) { - const size_t alphabet_size = self->alphabet_size_; - const size_t table_size = histograms_size * alphabet_size; + const size_t alphabet_size, HuffmanTree* tree, + size_t* storage_ix, uint8_t* storage) { + const size_t table_size = histograms_size * self->histogram_length_; self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size); self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size); if (BROTLI_IS_OOM(m)) return; @@ -23,9 +23,10 @@ static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self, { size_t i; for (i = 0; i < histograms_size; ++i) { - size_t ix = i * alphabet_size; - BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size, tree, - &self->depths_[ix], &self->bits_[ix], storage_ix, storage); + size_t ix = i * self->histogram_length_; + BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_, + alphabet_size, tree, &self->depths_[ix], &self->bits_[ix], + storage_ix, storage); } } } diff --git a/c/enc/block_splitter.c b/c/enc/block_splitter.c index 6362211..d308eca 100644 --- a/c/enc/block_splitter.c +++ b/c/enc/block_splitter.c @@ -174,7 +174,7 @@ void BrotliSplitBlock(MemoryManager* m, for (i = 0; i < num_commands; ++i) { const Command* cmd = &cmds[i]; if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { - distance_prefixes[j++] = cmd->dist_prefix_; + distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF; } } /* Create the block split on the array of distance prefixes. */ diff --git a/c/enc/block_splitter_inc.h b/c/enc/block_splitter_inc.h index 5712572..023712b 100644 --- a/c/enc/block_splitter_inc.h +++ b/c/enc/block_splitter_inc.h @@ -70,7 +70,7 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length, double* insert_cost, double* cost, uint8_t* switch_signal, - uint8_t *block_id) { + uint8_t* block_id) { const size_t data_size = FN(HistogramDataSize)(); const size_t bitmaplen = (num_histograms + 7) >> 3; size_t num_blocks = 1; diff --git a/c/enc/brotli_bit_stream.c b/c/enc/brotli_bit_stream.c index cd9c594..aaf2dad 100644 --- a/c/enc/brotli_bit_stream.c +++ b/c/enc/brotli_bit_stream.c @@ -13,12 +13,13 @@ #include <string.h> /* memcpy, memset */ #include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include <brotli/types.h> -#include "./context.h" #include "./entropy_encode.h" #include "./entropy_encode_static.h" #include "./fast_log.h" +#include "./histogram.h" #include "./memory.h" #include "./write_bits.h" @@ -27,12 +28,11 @@ extern "C" { #endif #define MAX_HUFFMAN_TREE_SIZE (2 * BROTLI_NUM_COMMAND_SYMBOLS + 1) -/* The size of Huffman dictionary for distances assuming that NPOSTFIX = 0 and - NDIRECT = 0. */ -#define SIMPLE_DISTANCE_ALPHABET_SIZE (BROTLI_NUM_DISTANCE_SHORT_CODES + \ - (2 * BROTLI_MAX_DISTANCE_BITS)) -/* SIMPLE_DISTANCE_ALPHABET_SIZE == 64 */ -#define SIMPLE_DISTANCE_ALPHABET_BITS 6 +/* The maximum size of Huffman dictionary for distances assuming that + NPOSTFIX = 0 and NDIRECT = 0. */ +#define MAX_SIMPLE_DISTANCE_ALPHABET_SIZE \ + BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_LARGE_MAX_DISTANCE_BITS) +/* MAX_SIMPLE_DISTANCE_ALPHABET_SIZE == 140 */ /* Represents the range of values belonging to a prefix code: [offset, offset + 2^nbits) */ @@ -258,7 +258,7 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4], size_t num_symbols, size_t max_bits, - size_t *storage_ix, uint8_t *storage) { + size_t* storage_ix, uint8_t* storage) { /* value of 1 indicates a simple Huffman code */ BrotliWriteBits(2, 1, storage_ix, storage); BrotliWriteBits(2, num_symbols - 1, storage_ix, storage); /* NSYM - 1 */ @@ -297,7 +297,7 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths, depths = symbol depths */ void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree, - size_t *storage_ix, uint8_t *storage) { + size_t* storage_ix, uint8_t* storage) { /* Write the Huffman tree into the brotli-representation. The command alphabet is the largest, so this allocation will fit all alphabets. */ @@ -360,8 +360,9 @@ void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num, /* Builds a Huffman tree from histogram[0:length] into depth[0:length] and bits[0:length] and stores the encoded tree to the bit stream. */ -static void BuildAndStoreHuffmanTree(const uint32_t *histogram, - const size_t length, +static void BuildAndStoreHuffmanTree(const uint32_t* histogram, + const size_t histogram_length, + const size_t alphabet_size, HuffmanTree* tree, uint8_t* depth, uint16_t* bits, @@ -371,7 +372,7 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram, size_t s4[4] = { 0 }; size_t i; size_t max_bits = 0; - for (i = 0; i < length; i++) { + for (i = 0; i < histogram_length; i++) { if (histogram[i]) { if (count < 4) { s4[count] = i; @@ -383,7 +384,7 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram, } { - size_t max_bits_counter = length - 1; + size_t max_bits_counter = alphabet_size - 1; while (max_bits_counter) { max_bits_counter >>= 1; ++max_bits; @@ -398,14 +399,14 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram, return; } - memset(depth, 0, length * sizeof(depth[0])); - BrotliCreateHuffmanTree(histogram, length, 15, tree, depth); - BrotliConvertBitDepthsToSymbols(depth, length, bits); + memset(depth, 0, histogram_length * sizeof(depth[0])); + BrotliCreateHuffmanTree(histogram, histogram_length, 15, tree, depth); + BrotliConvertBitDepthsToSymbols(depth, histogram_length, bits); if (count <= 4) { StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage); } else { - BrotliStoreHuffmanTree(depth, length, tree, storage_ix, storage); + BrotliStoreHuffmanTree(depth, histogram_length, tree, storage_ix, storage); } } @@ -729,6 +730,7 @@ static void EncodeContextMap(MemoryManager* m, } } BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix, + num_clusters + max_run_length_prefix, tree, depths, bits, storage_ix, storage); for (i = 0; i < num_rle_symbols; ++i) { const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask; @@ -788,10 +790,11 @@ static void BuildAndStoreBlockSplitCode(const uint8_t* types, } StoreVarLenUint8(num_types - 1, storage_ix, storage); if (num_types > 1) { /* TODO: else? could StoreBlockSwitch occur? */ - BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree, + BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, num_types + 2, tree, &code->type_depths[0], &code->type_bits[0], storage_ix, storage); BuildAndStoreHuffmanTree(&length_histo[0], BROTLI_NUM_BLOCK_LEN_SYMBOLS, + BROTLI_NUM_BLOCK_LEN_SYMBOLS, tree, &code->length_depths[0], &code->length_bits[0], storage_ix, storage); StoreBlockSwitch(code, lengths[0], types[0], 1, storage_ix, storage); @@ -822,8 +825,8 @@ static void StoreTrivialContextMap(size_t num_types, for (i = context_bits; i < alphabet_size; ++i) { histogram[i] = 1; } - BuildAndStoreHuffmanTree(histogram, alphabet_size, tree, - depths, bits, storage_ix, storage); + BuildAndStoreHuffmanTree(histogram, alphabet_size, alphabet_size, + tree, depths, bits, storage_ix, storage); for (i = 0; i < num_types; ++i) { size_t code = (i == 0 ? 0 : i + context_bits - 1); BrotliWriteBits(depths[code], bits[code], storage_ix, storage); @@ -838,7 +841,7 @@ static void StoreTrivialContextMap(size_t num_types, /* Manages the encoding of one block category (literal, command or distance). */ typedef struct BlockEncoder { - size_t alphabet_size_; + size_t histogram_length_; size_t num_block_types_; const uint8_t* block_types_; /* Not owned. */ const uint32_t* block_lengths_; /* Not owned. */ @@ -851,10 +854,10 @@ typedef struct BlockEncoder { uint16_t* bits_; } BlockEncoder; -static void InitBlockEncoder(BlockEncoder* self, size_t alphabet_size, +static void InitBlockEncoder(BlockEncoder* self, size_t histogram_length, size_t num_block_types, const uint8_t* block_types, const uint32_t* block_lengths, const size_t num_blocks) { - self->alphabet_size_ = alphabet_size; + self->histogram_length_ = histogram_length; self->num_block_types_ = num_block_types; self->block_types_ = block_types; self->block_lengths_ = block_lengths; @@ -890,7 +893,7 @@ static void StoreSymbol(BlockEncoder* self, size_t symbol, size_t* storage_ix, uint32_t block_len = self->block_lengths_[block_ix]; uint8_t block_type = self->block_types_[block_ix]; self->block_len_ = block_len; - self->entropy_ix_ = block_type * self->alphabet_size_; + self->entropy_ix_ = block_type * self->histogram_length_; StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0, storage_ix, storage); } @@ -919,7 +922,7 @@ static void StoreSymbolWithContext(BlockEncoder* self, size_t symbol, --self->block_len_; { size_t histo_ix = context_map[self->entropy_ix_ + context]; - size_t ix = histo_ix * self->alphabet_size_ + symbol; + size_t ix = histo_ix * self->histogram_length_ + symbol; BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage); } } @@ -945,42 +948,38 @@ static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) { } void BrotliStoreMetaBlock(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - uint8_t prev_byte, - uint8_t prev_byte2, - BROTLI_BOOL is_last, - uint32_t num_direct_distance_codes, - uint32_t distance_postfix_bits, - ContextType literal_context_mode, - const Command *commands, - size_t n_commands, - const MetaBlockSplit* mb, - size_t *storage_ix, - uint8_t *storage) { + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last, + const BrotliEncoderParams* params, ContextType literal_context_mode, + const Command* commands, size_t n_commands, const MetaBlockSplit* mb, + size_t* storage_ix, uint8_t* storage) { + size_t pos = start_pos; size_t i; - size_t num_distance_codes = - BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_distance_codes + - (48u << distance_postfix_bits); + uint32_t num_distance_symbols = params->dist.alphabet_size; + uint32_t num_effective_distance_symbols = num_distance_symbols; HuffmanTree* tree; + ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); BlockEncoder literal_enc; BlockEncoder command_enc; BlockEncoder distance_enc; + const BrotliDistanceParams* dist = ¶ms->dist; + if (params->large_window && + num_effective_distance_symbols > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) { + num_effective_distance_symbols = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS; + } StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage); tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE); if (BROTLI_IS_OOM(m)) return; - InitBlockEncoder(&literal_enc, 256, mb->literal_split.num_types, - mb->literal_split.types, mb->literal_split.lengths, - mb->literal_split.num_blocks); + InitBlockEncoder(&literal_enc, BROTLI_NUM_LITERAL_SYMBOLS, + mb->literal_split.num_types, mb->literal_split.types, + mb->literal_split.lengths, mb->literal_split.num_blocks); InitBlockEncoder(&command_enc, BROTLI_NUM_COMMAND_SYMBOLS, mb->command_split.num_types, mb->command_split.types, mb->command_split.lengths, mb->command_split.num_blocks); - InitBlockEncoder(&distance_enc, num_distance_codes, + InitBlockEncoder(&distance_enc, num_effective_distance_symbols, mb->distance_split.num_types, mb->distance_split.types, mb->distance_split.lengths, mb->distance_split.num_blocks); @@ -989,9 +988,10 @@ void BrotliStoreMetaBlock(MemoryManager* m, BuildAndStoreBlockSwitchEntropyCodes( &distance_enc, tree, storage_ix, storage); - BrotliWriteBits(2, distance_postfix_bits, storage_ix, storage); - BrotliWriteBits(4, num_direct_distance_codes >> distance_postfix_bits, - storage_ix, storage); + BrotliWriteBits(2, dist->distance_postfix_bits, storage_ix, storage); + BrotliWriteBits( + 4, dist->num_direct_distance_codes >> dist->distance_postfix_bits, + storage_ix, storage); for (i = 0; i < mb->literal_split.num_types; ++i) { BrotliWriteBits(2, literal_context_mode, storage_ix, storage); } @@ -1017,13 +1017,16 @@ void BrotliStoreMetaBlock(MemoryManager* m, } BuildAndStoreEntropyCodesLiteral(m, &literal_enc, mb->literal_histograms, - mb->literal_histograms_size, tree, storage_ix, storage); + mb->literal_histograms_size, BROTLI_NUM_LITERAL_SYMBOLS, tree, + storage_ix, storage); if (BROTLI_IS_OOM(m)) return; BuildAndStoreEntropyCodesCommand(m, &command_enc, mb->command_histograms, - mb->command_histograms_size, tree, storage_ix, storage); + mb->command_histograms_size, BROTLI_NUM_COMMAND_SYMBOLS, tree, + storage_ix, storage); if (BROTLI_IS_OOM(m)) return; BuildAndStoreEntropyCodesDistance(m, &distance_enc, mb->distance_histograms, - mb->distance_histograms_size, tree, storage_ix, storage); + mb->distance_histograms_size, num_distance_symbols, tree, + storage_ix, storage); if (BROTLI_IS_OOM(m)) return; BROTLI_FREE(m, tree); @@ -1041,7 +1044,8 @@ void BrotliStoreMetaBlock(MemoryManager* m, } else { size_t j; for (j = cmd.insert_len_; j != 0; --j) { - size_t context = Context(prev_byte, prev_byte2, literal_context_mode); + size_t context = + BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut); uint8_t literal = input[pos & mask]; StoreSymbolWithContext(&literal_enc, literal, context, mb->literal_context_map, storage_ix, storage, @@ -1056,9 +1060,9 @@ void BrotliStoreMetaBlock(MemoryManager* m, prev_byte2 = input[(pos - 2) & mask]; prev_byte = input[(pos - 1) & mask]; if (cmd.cmd_prefix_ >= 128) { - size_t dist_code = cmd.dist_prefix_; - uint32_t distnumextra = cmd.dist_extra_ >> 24; - uint64_t distextra = cmd.dist_extra_ & 0xffffff; + size_t dist_code = cmd.dist_prefix_ & 0x3FF; + uint32_t distnumextra = cmd.dist_prefix_ >> 10; + uint64_t distextra = cmd.dist_extra_; if (mb->distance_context_map_size == 0) { StoreSymbol(&distance_enc, dist_code, storage_ix, storage); } else { @@ -1082,7 +1086,7 @@ void BrotliStoreMetaBlock(MemoryManager* m, static void BuildHistograms(const uint8_t* input, size_t start_pos, size_t mask, - const Command *commands, + const Command* commands, size_t n_commands, HistogramLiteral* lit_histo, HistogramCommand* cmd_histo, @@ -1099,7 +1103,7 @@ static void BuildHistograms(const uint8_t* input, } pos += CommandCopyLen(&cmd); if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) { - HistogramAddDistance(dist_histo, cmd.dist_prefix_); + HistogramAddDistance(dist_histo, cmd.dist_prefix_ & 0x3FF); } } } @@ -1107,7 +1111,7 @@ static void BuildHistograms(const uint8_t* input, static void StoreDataWithHuffmanCodes(const uint8_t* input, size_t start_pos, size_t mask, - const Command *commands, + const Command* commands, size_t n_commands, const uint8_t* lit_depth, const uint16_t* lit_bits, @@ -1134,9 +1138,9 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input, } pos += CommandCopyLen(&cmd); if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) { - const size_t dist_code = cmd.dist_prefix_; - const uint32_t distnumextra = cmd.dist_extra_ >> 24; - const uint32_t distextra = cmd.dist_extra_ & 0xffffff; + const size_t dist_code = cmd.dist_prefix_ & 0x3FF; + const uint32_t distnumextra = cmd.dist_prefix_ >> 10; + const uint32_t distextra = cmd.dist_extra_; BrotliWriteBits(dist_depth[dist_code], dist_bits[dist_code], storage_ix, storage); BrotliWriteBits(distnumextra, distextra, storage_ix, storage); @@ -1145,15 +1149,10 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input, } void BrotliStoreMetaBlockTrivial(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t *storage_ix, - uint8_t *storage) { + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage) { HistogramLiteral lit_histo; HistogramCommand cmd_histo; HistogramDistance dist_histo; @@ -1161,9 +1160,10 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m, uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS]; uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS]; uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS]; - uint8_t dist_depth[SIMPLE_DISTANCE_ALPHABET_SIZE]; - uint16_t dist_bits[SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; HuffmanTree* tree; + uint32_t num_distance_symbols = params->dist.alphabet_size; StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage); @@ -1178,14 +1178,16 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m, tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE); if (BROTLI_IS_OOM(m)) return; - BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS, tree, + BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS, + BROTLI_NUM_LITERAL_SYMBOLS, tree, lit_depth, lit_bits, storage_ix, storage); - BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS, tree, + BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS, + BROTLI_NUM_COMMAND_SYMBOLS, tree, cmd_depth, cmd_bits, storage_ix, storage); - BuildAndStoreHuffmanTree(dist_histo.data_, SIMPLE_DISTANCE_ALPHABET_SIZE, - tree, + BuildAndStoreHuffmanTree(dist_histo.data_, MAX_SIMPLE_DISTANCE_ALPHABET_SIZE, + num_distance_symbols, tree, dist_depth, dist_bits, storage_ix, storage); BROTLI_FREE(m, tree); @@ -1200,15 +1202,14 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m, } void BrotliStoreMetaBlockFast(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t *storage_ix, - uint8_t *storage) { + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage) { + uint32_t num_distance_symbols = params->dist.alphabet_size; + uint32_t distance_alphabet_bits = + Log2FloorNonZero(num_distance_symbols - 1) + 1; + StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage); BrotliWriteBits(13, 0, storage_ix, storage); @@ -1252,8 +1253,8 @@ void BrotliStoreMetaBlockFast(MemoryManager* m, uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS]; uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS]; uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS]; - uint8_t dist_depth[SIMPLE_DISTANCE_ALPHABET_SIZE]; - uint16_t dist_bits[SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; HistogramClearLiteral(&lit_histo); HistogramClearCommand(&cmd_histo); HistogramClearDistance(&dist_histo); @@ -1274,7 +1275,7 @@ void BrotliStoreMetaBlockFast(MemoryManager* m, BrotliBuildAndStoreHuffmanTreeFast(m, dist_histo.data_, dist_histo.total_count_, /* max_bits = */ - SIMPLE_DISTANCE_ALPHABET_BITS, + distance_alphabet_bits, dist_depth, dist_bits, storage_ix, storage); if (BROTLI_IS_OOM(m)) return; @@ -1293,11 +1294,11 @@ void BrotliStoreMetaBlockFast(MemoryManager* m, /* This is for storing uncompressed blocks (simple raw storage of bytes-as-bytes). */ void BrotliStoreUncompressedMetaBlock(BROTLI_BOOL is_final_block, - const uint8_t * BROTLI_RESTRICT input, + const uint8_t* BROTLI_RESTRICT input, size_t position, size_t mask, size_t len, - size_t * BROTLI_RESTRICT storage_ix, - uint8_t * BROTLI_RESTRICT storage) { + size_t* BROTLI_RESTRICT storage_ix, + uint8_t* BROTLI_RESTRICT storage) { size_t masked_pos = position & mask; BrotliStoreUncompressedMetaBlockHeader(len, storage_ix, storage); JumpToByteBoundary(storage_ix, storage); diff --git a/c/enc/brotli_bit_stream.h b/c/enc/brotli_bit_stream.h index 1324b18..9089b1d 100644 --- a/c/enc/brotli_bit_stream.h +++ b/c/enc/brotli_bit_stream.h @@ -16,10 +16,10 @@ #ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_ #define BROTLI_ENC_BROTLI_BIT_STREAM_H_ +#include "../common/context.h" #include "../common/platform.h" #include <brotli/types.h> #include "./command.h" -#include "./context.h" #include "./entropy_encode.h" #include "./memory.h" #include "./metablock.h" @@ -32,7 +32,7 @@ extern "C" { position for the current storage. */ BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num, - HuffmanTree* tree, size_t *storage_ix, uint8_t *storage); + HuffmanTree* tree, size_t* storage_ix, uint8_t* storage); BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast( MemoryManager* m, const uint32_t* histogram, const size_t histogram_total, @@ -42,51 +42,31 @@ BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast( /* REQUIRES: length > 0 */ /* REQUIRES: length <= (1 << 24) */ BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - uint8_t prev_byte, - uint8_t prev_byte2, - BROTLI_BOOL is_final_block, - uint32_t num_direct_distance_codes, - uint32_t distance_postfix_bits, - ContextType literal_context_mode, - const Command* commands, - size_t n_commands, - const MetaBlockSplit* mb, - size_t* storage_ix, - uint8_t* storage); + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last, + const BrotliEncoderParams* params, ContextType literal_context_mode, + const Command* commands, size_t n_commands, const MetaBlockSplit* mb, + size_t* storage_ix, uint8_t* storage); /* Stores the meta-block without doing any block splitting, just collects one histogram per block category and uses that for entropy coding. REQUIRES: length > 0 REQUIRES: length <= (1 << 24) */ BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t* storage_ix, - uint8_t* storage); + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage); /* Same as above, but uses static prefix codes for histograms with a only a few symbols, and uses static code length prefix codes for all other histograms. REQUIRES: length > 0 REQUIRES: length <= (1 << 24) */ BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t* storage_ix, - uint8_t* storage); + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage); /* This is for storing uncompressed blocks (simple raw storage of bytes-as-bytes). diff --git a/c/enc/command.h b/c/enc/command.h index 3bf0cf7..0526815 100644 --- a/c/enc/command.h +++ b/c/enc/command.h @@ -105,10 +105,13 @@ static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) { typedef struct Command { uint32_t insert_len_; - /* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */ + /* Stores copy_len in low 25 bits and copy_code - copy_len in high 7 bit. */ uint32_t copy_len_; + /* Stores distance extra bits. */ uint32_t dist_extra_; uint16_t cmd_prefix_; + /* Stores distance code in low 10 bits + and number of extra bits in high 6 bits. */ uint16_t dist_prefix_; } Command; @@ -118,7 +121,7 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen, /* Don't rely on signed int representation, use honest casts. */ uint32_t delta = (uint8_t)((int8_t)copylen_code_delta); self->insert_len_ = (uint32_t)insertlen; - self->copy_len_ = (uint32_t)(copylen | (delta << 24)); + self->copy_len_ = (uint32_t)(copylen | (delta << 25)); /* The distance prefix and extra bits are stored in this Command as if npostfix and ndirect were 0, they are only recomputed later after the clustering if needed. */ @@ -126,29 +129,29 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen, distance_code, 0, 0, &self->dist_prefix_, &self->dist_extra_); GetLengthCode( insertlen, (size_t)((int)copylen + copylen_code_delta), - TO_BROTLI_BOOL(self->dist_prefix_ == 0), &self->cmd_prefix_); + TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_); } static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) { self->insert_len_ = (uint32_t)insertlen; - self->copy_len_ = 4 << 24; + self->copy_len_ = 4 << 25; self->dist_extra_ = 0; self->dist_prefix_ = BROTLI_NUM_DISTANCE_SHORT_CODES; GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_); } static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(const Command* self) { - if (self->dist_prefix_ < BROTLI_NUM_DISTANCE_SHORT_CODES) { - return self->dist_prefix_; + if ((self->dist_prefix_ & 0x3FF) < BROTLI_NUM_DISTANCE_SHORT_CODES) { + return self->dist_prefix_ & 0x3FF; } else { - uint32_t nbits = self->dist_extra_ >> 24; - uint32_t extra = self->dist_extra_ & 0xffffff; + uint32_t nbits = self->dist_prefix_ >> 10; + uint32_t extra = self->dist_extra_; /* It is assumed that the distance was first encoded with NPOSTFIX = 0 and NDIRECT = 0, so the code itself is of this form: BROTLI_NUM_DISTANCE_SHORT_CODES + 2 * (nbits - 1) + prefix_bit Therefore, the following expression results in (2 + prefix_bit). */ - uint32_t prefix = - self->dist_prefix_ + 4u - BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits; + uint32_t prefix = (self->dist_prefix_ & 0x3FF) + 4u - + BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits; /* Subtract 4 for offset (Chapter 4.) and increase by BROTLI_NUM_DISTANCE_SHORT_CODES - 1 */ return (prefix << nbits) + extra + BROTLI_NUM_DISTANCE_SHORT_CODES - 4u; @@ -165,12 +168,13 @@ static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) { } static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) { - return self->copy_len_ & 0xFFFFFF; + return self->copy_len_ & 0x1FFFFFF; } static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) { - int32_t delta = (int8_t)((uint8_t)(self->copy_len_ >> 24)); - return (uint32_t)((int32_t)(self->copy_len_ & 0xFFFFFF) + delta); + uint32_t modifier = self->copy_len_ >> 25; + int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1))); + return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta); } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/compress_fragment.c b/c/enc/compress_fragment.c index 40dce3e..f75069b 100644 --- a/c/enc/compress_fragment.c +++ b/c/enc/compress_fragment.c @@ -38,7 +38,7 @@ extern "C" { * There is no effort to ensure that it is a prime, the oddity is enough for this use. * The number has been tuned heuristically against compression benchmarks. */ -static const uint32_t kHashMul32 = 0x1e35a7bd; +static const uint32_t kHashMul32 = 0x1E35A7BD; static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) { const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32; @@ -343,7 +343,7 @@ static void BrotliStoreMetaBlockHeader( } static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos, - uint8_t *array) { + uint8_t* array) { while (n_bits > 0) { size_t byte_pos = pos >> 3; size_t n_unchanged_bits = pos & 7; diff --git a/c/enc/compress_fragment_two_pass.c b/c/enc/compress_fragment_two_pass.c index 8259817..b8bd6e8 100644 --- a/c/enc/compress_fragment_two_pass.c +++ b/c/enc/compress_fragment_two_pass.c @@ -37,7 +37,7 @@ extern "C" { * There is no effort to ensure that it is a prime, the oddity is enough for this use. * The number has been tuned heuristically against compression benchmarks. */ -static const uint32_t kHashMul32 = 0x1e35a7bd; +static const uint32_t kHashMul32 = 0x1E35A7BD; static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) { const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 16) * kHashMul32; diff --git a/c/enc/context.h b/c/enc/context.h deleted file mode 100644 index caa4230..0000000 --- a/c/enc/context.h +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Functions to map previous bytes into a context id. */ - -#ifndef BROTLI_ENC_CONTEXT_H_ -#define BROTLI_ENC_CONTEXT_H_ - -#include "../common/platform.h" -#include <brotli/types.h> - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -/* Second-order context lookup table for UTF8 byte streams. - - If p1 and p2 are the previous two bytes, we calculate the context as - - context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]. - - If the previous two bytes are ASCII characters (i.e. < 128), this will be - equivalent to - - context = 4 * context1(p1) + context2(p2), - - where context1 is based on the previous byte in the following way: - - 0 : non-ASCII control - 1 : \t, \n, \r - 2 : space - 3 : other punctuation - 4 : " ' - 5 : % - 6 : ( < [ { - 7 : ) > ] } - 8 : , ; : - 9 : . - 10 : = - 11 : number - 12 : upper-case vowel - 13 : upper-case consonant - 14 : lower-case vowel - 15 : lower-case consonant - - and context2 is based on the second last byte: - - 0 : control, space - 1 : punctuation - 2 : upper-case letter, number - 3 : lower-case letter - - If the last byte is ASCII, and the second last byte is not (in a valid UTF8 - stream it will be a continuation byte, value between 128 and 191), the - context is the same as if the second last byte was an ASCII control or space. - - If the last byte is a UTF8 lead byte (value >= 192), then the next byte will - be a continuation byte and the context id is 2 or 3 depending on the LSB of - the last byte and to a lesser extent on the second last byte if it is ASCII. - - If the last byte is a UTF8 continuation byte, the second last byte can be: - - continuation byte: the next byte is probably ASCII or lead byte (assuming - 4-byte UTF8 characters are rare) and the context id is 0 or 1. - - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 - - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 - - The possible value combinations of the previous two bytes, the range of - context ids and the type of the next byte is summarized in the table below: - - |--------\-----------------------------------------------------------------| - | \ Last byte | - | Second \---------------------------------------------------------------| - | last byte \ ASCII | cont. byte | lead byte | - | \ (0-127) | (128-191) | (192-) | - |=============|===================|=====================|==================| - | ASCII | next: ASCII/lead | not valid | next: cont. | - | (0-127) | context: 4 - 63 | | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | - | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: ASCII/lead | not valid | - | (192-207) | | context: 0 - 1 | | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: cont. | not valid | - | (208-) | | context: 2 - 3 | | - |-------------|-------------------|---------------------|------------------| -*/ -static const uint8_t kUTF8ContextLookup[512] = { - /* Last byte. */ - /* */ - /* ASCII range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, - 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, - 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, - 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, - 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, - 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, - /* UTF8 continuation byte range. */ - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - /* UTF8 lead byte range. */ - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - /* Second last byte. */ - /* */ - /* ASCII range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, - 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, - /* UTF8 continuation byte range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* UTF8 lead byte range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -/* Context lookup table for small signed integers. */ -static const uint8_t kSigned3BitContextLookup[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, -}; - -typedef enum ContextType { - CONTEXT_LSB6 = 0, - CONTEXT_MSB6 = 1, - CONTEXT_UTF8 = 2, - CONTEXT_SIGNED = 3 -} ContextType; - -static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) { - switch (mode) { - case CONTEXT_LSB6: - return p1 & 0x3f; - case CONTEXT_MSB6: - return (uint8_t)(p1 >> 2); - case CONTEXT_UTF8: - return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]; - case CONTEXT_SIGNED: - return (uint8_t)((kSigned3BitContextLookup[p1] << 3) + - kSigned3BitContextLookup[p2]); - default: - return 0; - } -} - -#if defined(__cplusplus) || defined(c_plusplus) -} /* extern "C" */ -#endif - -#endif /* BROTLI_ENC_CONTEXT_H_ */ diff --git a/c/enc/encode.c b/c/enc/encode.c index 794a409..4fd28d0 100644 --- a/c/enc/encode.c +++ b/c/enc/encode.c @@ -11,6 +11,8 @@ #include <stdlib.h> /* free, malloc */ #include <string.h> /* memcpy, memset */ +#include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include "../common/version.h" #include "./backward_references.h" @@ -19,7 +21,7 @@ #include "./brotli_bit_stream.h" #include "./compress_fragment.h" #include "./compress_fragment_two_pass.h" -#include "./context.h" +#include "./encoder_dict.h" #include "./entropy_encode.h" #include "./fast_log.h" #include "./hash.h" @@ -69,8 +71,8 @@ typedef struct BrotliEncoderStateStruct { uint64_t last_processed_pos_; int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES]; int saved_dist_cache_[4]; - uint8_t last_byte_; - uint8_t last_byte_bits_; + uint16_t last_bytes_; + uint8_t last_bytes_bits_; uint8_t prev_byte_; uint8_t prev_byte2_; size_t storage_size_; @@ -161,6 +163,10 @@ BROTLI_BOOL BrotliEncoderSetParameter( state->params.size_hint = value; return BROTLI_TRUE; + case BROTLI_PARAM_LARGE_WINDOW: + state->params.large_window = TO_BROTLI_BOOL(!!value); + return BROTLI_TRUE; + default: return BROTLI_FALSE; } } @@ -251,20 +257,25 @@ static int* GetHashTable(BrotliEncoderState* s, int quality, return table; } -static void EncodeWindowBits(int lgwin, uint8_t* last_byte, - uint8_t* last_byte_bits) { - if (lgwin == 16) { - *last_byte = 0; - *last_byte_bits = 1; - } else if (lgwin == 17) { - *last_byte = 1; - *last_byte_bits = 7; - } else if (lgwin > 17) { - *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1); - *last_byte_bits = 4; +static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window, + uint16_t* last_bytes, uint8_t* last_bytes_bits) { + if (large_window) { + *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11); + *last_bytes_bits = 14; } else { - *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1); - *last_byte_bits = 7; + if (lgwin == 16) { + *last_bytes = 0; + *last_bytes_bits = 1; + } else if (lgwin == 17) { + *last_bytes = 1; + *last_bytes_bits = 7; + } else if (lgwin > 17) { + *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01); + *last_bytes_bits = 4; + } else { + *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01); + *last_bytes_bits = 7; + } } } @@ -420,6 +431,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input, double entropy[3]; size_t dummy; size_t i; + ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8); for (; start_pos + 64 <= end_pos; start_pos += 4096) { const size_t stride_end_pos = start_pos + 64; uint8_t prev2 = input[start_pos & mask]; @@ -430,7 +442,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input, for (pos = start_pos + 2; pos < stride_end_pos; ++pos) { const uint8_t literal = input[pos & mask]; const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[ - Context(prev1, prev2, CONTEXT_UTF8)]; + BROTLI_CONTEXT(prev1, prev2, utf8_lut)]; ++total; ++combined_histo[literal >> 3]; ++context_histo[context][literal >> 3]; @@ -519,12 +531,26 @@ static BROTLI_BOOL ShouldCompress( return BROTLI_TRUE; } +/* Chooses the literal context mode for a metablock */ +static ContextType ChooseContextMode(const BrotliEncoderParams* params, + const uint8_t* data, const size_t pos, const size_t mask, + const size_t length) { + /* We only do the computation for the option of something else than + CONTEXT_UTF8 for the highest qualities */ + if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING && + !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) { + return CONTEXT_SIGNED; + } + return CONTEXT_UTF8; +} + static void WriteMetaBlockInternal(MemoryManager* m, const uint8_t* data, const size_t mask, const uint64_t last_flush_pos, const size_t bytes, const BROTLI_BOOL is_last, + ContextType literal_context_mode, const BrotliEncoderParams* params, const uint8_t prev_byte, const uint8_t prev_byte2, @@ -536,10 +562,9 @@ static void WriteMetaBlockInternal(MemoryManager* m, size_t* storage_ix, uint8_t* storage) { const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos); - uint8_t last_byte; - uint8_t last_byte_bits; - uint32_t num_direct_distance_codes = 0; - uint32_t distance_postfix_bits = 0; + uint16_t last_bytes; + uint8_t last_bytes_bits; + ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); if (bytes == 0) { /* Write the ISLAST and ISEMPTY bits. */ @@ -559,31 +584,29 @@ static void WriteMetaBlockInternal(MemoryManager* m, return; } - last_byte = storage[0]; - last_byte_bits = (uint8_t)(*storage_ix & 0xff); - if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES && - params->mode == BROTLI_MODE_FONT) { - num_direct_distance_codes = 12; - distance_postfix_bits = 1; + BROTLI_DCHECK(*storage_ix <= 14); + last_bytes = (uint16_t)((storage[1] << 8) | storage[0]); + last_bytes_bits = (uint8_t)(*storage_ix); + if (params->dist.num_direct_distance_codes != 0 || + params->dist.distance_postfix_bits != 0) { RecomputeDistancePrefixes(commands, num_commands, - num_direct_distance_codes, - distance_postfix_bits); + params->dist.num_direct_distance_codes, + params->dist.distance_postfix_bits); } if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos, - bytes, mask, is_last, + bytes, mask, is_last, params, commands, num_commands, storage_ix, storage); if (BROTLI_IS_OOM(m)) return; } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) { BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos, - bytes, mask, is_last, + bytes, mask, is_last, params, commands, num_commands, storage_ix, storage); if (BROTLI_IS_OOM(m)) return; } else { - ContextType literal_context_mode = CONTEXT_UTF8; MetaBlockSplit mb; InitMetaBlockSplit(&mb); if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) { @@ -596,14 +619,10 @@ static void WriteMetaBlockInternal(MemoryManager* m, &literal_context_map); } BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask, - prev_byte, prev_byte2, literal_context_mode, num_literal_contexts, + prev_byte, prev_byte2, literal_context_lut, num_literal_contexts, literal_context_map, commands, num_commands, &mb); if (BROTLI_IS_OOM(m)) return; } else { - if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes, - kMinUTF8Ratio)) { - literal_context_mode = CONTEXT_SIGNED; - } BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, params, prev_byte, prev_byte2, commands, num_commands, @@ -612,15 +631,18 @@ static void WriteMetaBlockInternal(MemoryManager* m, if (BROTLI_IS_OOM(m)) return; } if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) { - BrotliOptimizeHistograms(num_direct_distance_codes, - distance_postfix_bits, - &mb); + /* The number of distance symbols effectively used by + "Large Window Brotli" (32-bit). */ + uint32_t num_effective_dist_codes = params->dist.alphabet_size; + if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) { + num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS; + } + BrotliOptimizeHistograms(num_effective_dist_codes, &mb); } BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask, prev_byte, prev_byte2, is_last, - num_direct_distance_codes, - distance_postfix_bits, + params, literal_context_mode, commands, num_commands, &mb, @@ -631,20 +653,54 @@ static void WriteMetaBlockInternal(MemoryManager* m, if (bytes + 4 < (*storage_ix >> 3)) { /* Restore the distance cache and last byte. */ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); - storage[0] = last_byte; - *storage_ix = last_byte_bits; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); + *storage_ix = last_bytes_bits; BrotliStoreUncompressedMetaBlock(is_last, data, wrapped_last_flush_pos, mask, bytes, storage_ix, storage); } } +static void ChooseDistanceParams(BrotliEncoderParams* params) { + uint32_t num_direct_distance_codes = 0; + uint32_t distance_postfix_bits = 0; + uint32_t alphabet_size; + size_t max_distance = BROTLI_MAX_DISTANCE; + + if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES && + params->mode == BROTLI_MODE_FONT) { + num_direct_distance_codes = 12; + distance_postfix_bits = 1; + max_distance = (1U << 27) + 4; + } + + alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE( + num_direct_distance_codes, distance_postfix_bits, + BROTLI_MAX_DISTANCE_BITS); + if (params->large_window) { + max_distance = BROTLI_MAX_ALLOWED_DISTANCE; + if (num_direct_distance_codes != 0 || distance_postfix_bits != 0) { + max_distance = (3U << 29) - 4; + } + alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE( + num_direct_distance_codes, distance_postfix_bits, + BROTLI_LARGE_MAX_DISTANCE_BITS); + } + + params->dist.num_direct_distance_codes = num_direct_distance_codes; + params->dist.distance_postfix_bits = distance_postfix_bits; + params->dist.alphabet_size = alphabet_size; + params->dist.max_distance = max_distance; +} + static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE; if (s->is_initialized_) return BROTLI_TRUE; SanitizeParams(&s->params); s->params.lgblock = ComputeLgBlock(&s->params); + ChooseDistanceParams(&s->params); s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; @@ -657,7 +713,8 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { lgwin = BROTLI_MAX(int, lgwin, 18); } - EncodeWindowBits(lgwin, &s->last_byte_, &s->last_byte_bits_); + EncodeWindowBits(lgwin, s->params.large_window, + &s->last_bytes_, &s->last_bytes_bits_); } if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { @@ -671,11 +728,18 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { static void BrotliEncoderInitParams(BrotliEncoderParams* params) { params->mode = BROTLI_DEFAULT_MODE; + params->large_window = BROTLI_FALSE; params->quality = BROTLI_DEFAULT_QUALITY; params->lgwin = BROTLI_DEFAULT_WINDOW; params->lgblock = 0; params->size_hint = 0; params->disable_literal_context_modeling = BROTLI_FALSE; + BrotliInitEncoderDictionary(¶ms->dictionary); + params->dist.num_direct_distance_codes = 0; + params->dist.distance_postfix_bits = 0; + params->dist.alphabet_size = + BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS); + params->dist.max_distance = BROTLI_MAX_DISTANCE; } static void BrotliEncoderInitState(BrotliEncoderState* s) { @@ -837,6 +901,37 @@ static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) { return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos); } +static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes, + uint32_t* wrapped_last_processed_pos) { + Command* last_command = &s->commands_[s->num_commands_ - 1]; + const uint8_t* data = s->ringbuffer_.buffer_; + const uint32_t mask = s->ringbuffer_.mask_; + uint64_t max_backward_distance = (1u << s->params.lgwin) - BROTLI_WINDOW_GAP; + uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF; + uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len; + uint64_t max_distance = last_processed_pos < max_backward_distance ? + last_processed_pos : max_backward_distance; + uint64_t cmd_dist = (uint64_t)s->dist_cache_[0]; + uint32_t distance_code = CommandRestoreDistanceCode(last_command); + if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES || + distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) { + if (cmd_dist <= max_distance) { + while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] == + data[(*wrapped_last_processed_pos - cmd_dist) & mask]) { + last_command->copy_len_++; + (*bytes)--; + (*wrapped_last_processed_pos)++; + } + } + /* The copy length is at most the metablock size, and thus expressible. */ + GetLengthCode(last_command->insert_len_, + (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) + + (int)(last_command->copy_len_ >> 25)), + TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0), + &last_command->cmd_prefix_); + } +} + /* Processes the accumulated input data and sets |*out_size| to the length of the new output meta-block, or to zero if no new output meta-block has been @@ -853,13 +948,12 @@ static BROTLI_BOOL EncodeData( BrotliEncoderState* s, const BROTLI_BOOL is_last, const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) { const uint64_t delta = UnprocessedInputSize(s); - const uint32_t bytes = (uint32_t)delta; - const uint32_t wrapped_last_processed_pos = - WrapPosition(s->last_processed_pos_); + uint32_t bytes = (uint32_t)delta; + uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_); uint8_t* data; uint32_t mask; MemoryManager* m = &s->memory_manager_; - const BrotliDictionary* dictionary = BrotliGetDictionary(); + ContextType literal_context_mode; if (!EnsureInitialized(s)) return BROTLI_FALSE; data = s->ringbuffer_.buffer_; @@ -884,7 +978,7 @@ static BROTLI_BOOL EncodeData( if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { uint8_t* storage; - size_t storage_ix = s->last_byte_bits_; + size_t storage_ix = s->last_bytes_bits_; size_t table_size; int* table; @@ -894,9 +988,10 @@ static BROTLI_BOOL EncodeData( *out_size = 0; return BROTLI_TRUE; } - storage = GetBrotliStorage(s, 2 * bytes + 502); + storage = GetBrotliStorage(s, 2 * bytes + 503); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; - storage[0] = s->last_byte_; + storage[0] = (uint8_t)s->last_bytes_; + storage[1] = (uint8_t)(s->last_bytes_ >> 8); table = GetHashTable(s, s->params.quality, bytes, &table_size); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { @@ -917,8 +1012,8 @@ static BROTLI_BOOL EncodeData( &storage_ix, storage); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } - s->last_byte_ = storage[storage_ix >> 3]; - s->last_byte_bits_ = storage_ix & 7u; + s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); + s->last_bytes_bits_ = storage_ix & 7u; UpdateLastProcessedPos(s); *output = &storage[0]; *out_size = storage_ix >> 3; @@ -946,27 +1041,36 @@ static BROTLI_BOOL EncodeData( InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params, wrapped_last_processed_pos, bytes, is_last); + + literal_context_mode = ChooseContextMode( + &s->params, data, WrapPosition(s->last_flush_pos_), + mask, (size_t)(s->input_pos_ - s->last_flush_pos_)); + if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; + if (s->num_commands_ && s->last_insert_len_ == 0) { + ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos); + } + if (s->params.quality == ZOPFLIFICATION_QUALITY) { BROTLI_DCHECK(s->params.hasher.type == 10); - BrotliCreateZopfliBackwardReferences( - m, dictionary, bytes, wrapped_last_processed_pos, + BrotliCreateZopfliBackwardReferences(m, + bytes, wrapped_last_processed_pos, data, mask, &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) { BROTLI_DCHECK(s->params.hasher.type == 10); - BrotliCreateHqZopfliBackwardReferences( - m, dictionary, bytes, wrapped_last_processed_pos, + BrotliCreateHqZopfliBackwardReferences(m, + bytes, wrapped_last_processed_pos, data, mask, &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } else { BrotliCreateBackwardReferences( - dictionary, bytes, wrapped_last_processed_pos, + bytes, wrapped_last_processed_pos, data, mask, &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); @@ -1018,18 +1122,19 @@ static BROTLI_BOOL EncodeData( { const uint32_t metablock_size = (uint32_t)(s->input_pos_ - s->last_flush_pos_); - uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 502); - size_t storage_ix = s->last_byte_bits_; + uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503); + size_t storage_ix = s->last_bytes_bits_; if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; - storage[0] = s->last_byte_; + storage[0] = (uint8_t)s->last_bytes_; + storage[1] = (uint8_t)(s->last_bytes_ >> 8); WriteMetaBlockInternal( m, data, mask, s->last_flush_pos_, metablock_size, is_last, - &s->params, s->prev_byte_, s->prev_byte2_, + literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_, s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_, s->dist_cache_, &storage_ix, storage); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; - s->last_byte_ = storage[storage_ix >> 3]; - s->last_byte_bits_ = storage_ix & 7u; + s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); + s->last_bytes_bits_ = storage_ix & 7u; s->last_flush_pos_ = s->input_pos_; if (UpdateLastProcessedPos(s)) { HasherReset(s->hasher_); @@ -1058,10 +1163,11 @@ static BROTLI_BOOL EncodeData( static size_t WriteMetadataHeader( BrotliEncoderState* s, const size_t block_size, uint8_t* header) { size_t storage_ix; - storage_ix = s->last_byte_bits_; - header[0] = s->last_byte_; - s->last_byte_ = 0; - s->last_byte_bits_ = 0; + storage_ix = s->last_bytes_bits_; + header[0] = (uint8_t)s->last_bytes_; + header[1] = (uint8_t)(s->last_bytes_ >> 8); + s->last_bytes_ = 0; + s->last_bytes_bits_ = 0; BrotliWriteBits(1, 0, &storage_ix, header); BrotliWriteBits(2, 3, &storage_ix, header); @@ -1091,15 +1197,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( BROTLI_BOOL ok = BROTLI_TRUE; const size_t max_out_size = *encoded_size; size_t total_out_size = 0; - uint8_t last_byte; - uint8_t last_byte_bits; + uint16_t last_bytes; + uint8_t last_bytes_bits; HasherHandle hasher = NULL; const size_t hasher_eff_size = BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP); BrotliEncoderParams params; - const BrotliDictionary* dictionary = BrotliGetDictionary(); const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1); size_t max_block_size; @@ -1113,14 +1218,18 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( BrotliEncoderInitParams(¶ms); params.quality = 10; params.lgwin = lgwin; + if (lgwin > BROTLI_MAX_WINDOW_BITS) { + params.large_window = BROTLI_TRUE; + } SanitizeParams(¶ms); params.lgblock = ComputeLgBlock(¶ms); + ChooseDistanceParams(¶ms); max_block_size = (size_t)1 << params.lgblock; BrotliInitMemoryManager(m, 0, 0, 0); BROTLI_DCHECK(input_size <= mask + 1); - EncodeWindowBits(lgwin, &last_byte, &last_byte_bits); + EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits); InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, ¶ms, 0, hasher_eff_size, BROTLI_TRUE); if (BROTLI_IS_OOM(m)) goto oom; @@ -1140,6 +1249,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( uint8_t* storage; size_t storage_ix; + ContextType literal_context_mode = ChooseContextMode(¶ms, + input_buffer, metablock_start, mask, metablock_end - metablock_start); + size_t block_start; for (block_start = metablock_start; block_start < metablock_end; ) { size_t block_size = @@ -1151,10 +1263,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( BrotliInitZopfliNodes(nodes, block_size + 1); StitchToPreviousBlockH10(hasher, block_size, block_start, input_buffer, mask); - path_size = BrotliZopfliComputeShortestPath( - m, dictionary, block_size, block_start, - input_buffer, mask, ¶ms, max_backward_limit, dist_cache, hasher, - nodes); + path_size = BrotliZopfliComputeShortestPath(m, + block_size, block_start, input_buffer, mask, ¶ms, + max_backward_limit, dist_cache, hasher, nodes); if (BROTLI_IS_OOM(m)) goto oom; /* We allocate a command buffer in the first iteration of this loop that will be likely big enough for the whole metablock, so that for most @@ -1197,13 +1308,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size); storage = NULL; - storage_ix = last_byte_bits; + storage_ix = last_bytes_bits; if (metablock_size == 0) { /* Write the ISLAST and ISEMPTY bits. */ storage = BROTLI_ALLOC(m, uint8_t, 16); if (BROTLI_IS_OOM(m)) goto oom; - storage[0] = last_byte; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); BrotliWriteBits(2, 3, &storage_ix, storage); storage_ix = (storage_ix + 7u) & ~7u; } else if (!ShouldCompress(input_buffer, mask, metablock_start, @@ -1213,37 +1325,35 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16); if (BROTLI_IS_OOM(m)) goto oom; - storage[0] = last_byte; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); BrotliStoreUncompressedMetaBlock(is_last, input_buffer, metablock_start, mask, metablock_size, &storage_ix, storage); } else { - uint32_t num_direct_distance_codes = 0; - uint32_t distance_postfix_bits = 0; - ContextType literal_context_mode = CONTEXT_UTF8; MetaBlockSplit mb; - InitMetaBlockSplit(&mb); - if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask, - metablock_size, kMinUTF8Ratio)) { - literal_context_mode = CONTEXT_SIGNED; + /* The number of distance symbols effectively used by + "Large Window Brotli" (32-bit). */ + uint32_t num_effective_dist_codes = params.dist.alphabet_size; + if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) { + num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS; } + InitMetaBlockSplit(&mb); BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, ¶ms, prev_byte, prev_byte2, commands, num_commands, literal_context_mode, &mb); if (BROTLI_IS_OOM(m)) goto oom; - BrotliOptimizeHistograms(num_direct_distance_codes, - distance_postfix_bits, - &mb); - storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 502); + BrotliOptimizeHistograms(num_effective_dist_codes, &mb); + storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503); if (BROTLI_IS_OOM(m)) goto oom; - storage[0] = last_byte; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size, mask, prev_byte, prev_byte2, is_last, - num_direct_distance_codes, - distance_postfix_bits, + ¶ms, literal_context_mode, commands, num_commands, &mb, @@ -1252,16 +1362,17 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( if (metablock_size + 4 < (storage_ix >> 3)) { /* Restore the distance cache and last byte. */ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); - storage[0] = last_byte; - storage_ix = last_byte_bits; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); + storage_ix = last_bytes_bits; BrotliStoreUncompressedMetaBlock(is_last, input_buffer, metablock_start, mask, metablock_size, &storage_ix, storage); } DestroyMetaBlockSplit(m, &mb); } - last_byte = storage[storage_ix >> 3]; - last_byte_bits = storage_ix & 7u; + last_bytes = (uint16_t)(storage[storage_ix >> 3]); + last_bytes_bits = storage_ix & 7u; metablock_start += metablock_size; if (metablock_start < input_size) { prev_byte = input_buffer[metablock_start - 1]; @@ -1296,8 +1407,8 @@ oom: size_t BrotliEncoderMaxCompressedSize(size_t input_size) { /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */ - size_t num_small_blocks = input_size >> 14; - size_t overhead = 2 + (4 * num_small_blocks) + 3 + 1; + size_t num_large_blocks = input_size >> 14; + size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1; size_t result = input_size + overhead; if (input_size == 0) return 2; return (result < input_size) ? 0 : result; @@ -1360,7 +1471,7 @@ BROTLI_BOOL BrotliEncoderCompress( } if (quality == 10) { /* TODO: Implement this direct path for all quality levels. */ - const int lg_win = BROTLI_MIN(int, BROTLI_MAX_WINDOW_BITS, + const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS, BROTLI_MAX(int, 16, lgwin)); int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer, encoded_size, encoded_buffer); @@ -1384,6 +1495,9 @@ BROTLI_BOOL BrotliEncoderCompress( BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin); BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode); BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size); + if (lgwin > BROTLI_MAX_WINDOW_BITS) { + BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE); + } result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH, &available_in, &next_in, &available_out, &next_out, &total_out); if (!BrotliEncoderIsFinished(s)) result = 0; @@ -1406,11 +1520,11 @@ fallback: } static void InjectBytePaddingBlock(BrotliEncoderState* s) { - uint32_t seal = s->last_byte_; - size_t seal_bits = s->last_byte_bits_; + uint32_t seal = s->last_bytes_; + size_t seal_bits = s->last_bytes_bits_; uint8_t* destination; - s->last_byte_ = 0; - s->last_byte_bits_ = 0; + s->last_bytes_ = 0; + s->last_bytes_bits_ = 0; /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */ seal |= 0x6u << seal_bits; seal_bits += 6; @@ -1424,6 +1538,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) { } destination[0] = (uint8_t)seal; if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8); + if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16); s->available_out_ += (seal_bits + 7) >> 3; } @@ -1432,7 +1547,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) { static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, size_t* available_out, uint8_t** next_out, size_t* total_out) { if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && - s->last_byte_bits_ != 0) { + s->last_bytes_bits_ != 0) { InjectBytePaddingBlock(s); return BROTLI_TRUE; } @@ -1513,10 +1628,10 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH); BROTLI_BOOL force_flush = (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH); - size_t max_out_size = 2 * block_size + 502; + size_t max_out_size = 2 * block_size + 503; BROTLI_BOOL inplace = BROTLI_TRUE; uint8_t* storage = NULL; - size_t storage_ix = s->last_byte_bits_; + size_t storage_ix = s->last_bytes_bits_; size_t table_size; int* table; @@ -1531,7 +1646,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( storage = GetBrotliStorage(s, max_out_size); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } - storage[0] = s->last_byte_; + storage[0] = (uint8_t)s->last_bytes_; + storage[1] = (uint8_t)(s->last_bytes_ >> 8); table = GetHashTable(s, s->params.quality, block_size, &table_size); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; @@ -1561,8 +1677,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( s->next_out_ = storage; s->available_out_ = out_bytes; } - s->last_byte_ = storage[storage_ix >> 3]; - s->last_byte_bits_ = storage_ix & 7u; + s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); + s->last_bytes_bits_ = storage_ix & 7u; if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; diff --git a/c/enc/encoder_dict.c b/c/enc/encoder_dict.c new file mode 100755 index 0000000..8b2f6ad --- /dev/null +++ b/c/enc/encoder_dict.c @@ -0,0 +1,32 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#include "./encoder_dict.h" + +#include "../common/dictionary.h" +#include "../common/transform.h" +#include "./dictionary_hash.h" +#include "./hash.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) { + dict->words = BrotliGetDictionary(); + + dict->hash_table = kStaticDictionaryHash; + dict->buckets = kStaticDictionaryBuckets; + dict->dict_words = kStaticDictionaryWords; + + dict->cutoffTransformsCount = kCutoffTransformsCount; + dict->cutoffTransforms = kCutoffTransforms; + +} + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif diff --git a/c/enc/encoder_dict.h b/c/enc/encoder_dict.h new file mode 100755 index 0000000..9ac8b4a --- /dev/null +++ b/c/enc/encoder_dict.h @@ -0,0 +1,42 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#ifndef BROTLI_ENC_ENCODER_DICT_H_ +#define BROTLI_ENC_ENCODER_DICT_H_ + +#include "../common/dictionary.h" +#include "../common/platform.h" +#include <brotli/types.h> +#include "./static_dict_lut.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/* Dictionary data (words and transforms) for 1 possible context */ +typedef struct BrotliEncoderDictionary { + const BrotliDictionary* words; + + /* cut off for fast encoder */ + uint32_t cutoffTransformsCount; + uint64_t cutoffTransforms; + + /* from dictionary_hash.h, for fast encoder */ + const uint16_t* hash_table; + + /* from static_dict_lut.h, for slow encoder */ + const uint16_t* buckets; + const DictWord* dict_words; +} BrotliEncoderDictionary; + +BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict); + + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif + +#endif /* BROTLI_ENC_ENCODER_DICT_H_ */ diff --git a/c/enc/entropy_encode.c b/c/enc/entropy_encode.c index 9e0ea11..97f9dfb 100644 --- a/c/enc/entropy_encode.c +++ b/c/enc/entropy_encode.c @@ -66,11 +66,11 @@ static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree( we are not planning to use this with extremely long blocks. See http://en.wikipedia.org/wiki/Huffman_coding */ -void BrotliCreateHuffmanTree(const uint32_t *data, +void BrotliCreateHuffmanTree(const uint32_t* data, const size_t length, const int tree_limit, HuffmanTree* tree, - uint8_t *depth) { + uint8_t* depth) { uint32_t count_limit; HuffmanTree sentinel; InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1); @@ -371,8 +371,8 @@ void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, } static void DecideOverRleUse(const uint8_t* depth, const size_t length, - BROTLI_BOOL *use_rle_for_non_zero, - BROTLI_BOOL *use_rle_for_zero) { + BROTLI_BOOL* use_rle_for_non_zero, + BROTLI_BOOL* use_rle_for_zero) { size_t total_reps_zero = 0; size_t total_reps_non_zero = 0; size_t count_reps_zero = 1; @@ -454,26 +454,26 @@ void BrotliWriteHuffmanTree(const uint8_t* depth, static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) { static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */ - 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, - 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf + 0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E, + 0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F }; - size_t retval = kLut[bits & 0xf]; + size_t retval = kLut[bits & 0x0F]; size_t i; for (i = 4; i < num_bits; i += 4) { retval <<= 4; bits = (uint16_t)(bits >> 4); - retval |= kLut[bits & 0xf]; + retval |= kLut[bits & 0x0F]; } - retval >>= ((0 - num_bits) & 0x3); + retval >>= ((0 - num_bits) & 0x03); return (uint16_t)retval; } /* 0..15 are values for bits */ #define MAX_HUFFMAN_BITS 16 -void BrotliConvertBitDepthsToSymbols(const uint8_t *depth, +void BrotliConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, - uint16_t *bits) { + uint16_t* bits) { /* In Brotli, all bit depths are [1..15] 0 bit depth means that the symbol does not exist. */ uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 }; diff --git a/c/enc/entropy_encode.h b/c/enc/entropy_encode.h index ef7c216..f23d9c3 100644 --- a/c/enc/entropy_encode.h +++ b/c/enc/entropy_encode.h @@ -46,11 +46,11 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth( be at least 2 * length + 1 long. See http://en.wikipedia.org/wiki/Huffman_coding */ -BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t *data, +BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data, const size_t length, const int tree_limit, HuffmanTree* tree, - uint8_t *depth); + uint8_t* depth); /* Change the population counts in a way that the consequent Huffman tree compression, especially its RLE-part will be more @@ -72,9 +72,9 @@ BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth, uint8_t* extra_bits_data); /* Get the actual bit values for a tree of bit depths. */ -BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t *depth, +BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, - uint16_t *bits); + uint16_t* bits); /* Input size optimized Shell sort. */ typedef BROTLI_BOOL (*HuffmanTreeComparator)( diff --git a/c/enc/entropy_encode_static.h b/c/enc/entropy_encode_static.h index b2c1fbb..62b99a9 100644 --- a/c/enc/entropy_encode_static.h +++ b/c/enc/entropy_encode_static.h @@ -83,7 +83,7 @@ static const uint32_t kCodeLengthBits[18] = { static BROTLI_INLINE void StoreStaticCodeLengthCode( size_t* storage_ix, uint8_t* storage) { BrotliWriteBits( - 40, BROTLI_MAKE_UINT64_T(0x0000ffU, 0x55555554U), storage_ix, storage); + 40, BROTLI_MAKE_UINT64_T(0x0000FFu, 0x55555554u), storage_ix, storage); } static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = { @@ -529,7 +529,7 @@ static const uint16_t kStaticDistanceCodeBits[64] = { static BROTLI_INLINE void StoreStaticDistanceHuffmanTree( size_t* storage_ix, uint8_t* storage) { - BrotliWriteBits(28, 0x0369dc03U, storage_ix, storage); + BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage); } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/hash.h b/c/enc/hash.h index 2a1634d..1827ce6 100644 --- a/c/enc/hash.h +++ b/c/enc/hash.h @@ -16,6 +16,7 @@ #include "../common/dictionary.h" #include "../common/platform.h" #include <brotli/types.h> +#include "./encoder_dict.h" #include "./fast_log.h" #include "./find_match_length.h" #include "./memory.h" @@ -73,10 +74,10 @@ typedef struct HasherSearchResult { * There is no effort to ensure that it is a prime, the oddity is enough for this use. * The number has been tuned heuristically against compression benchmarks. */ -static const uint32_t kHashMul32 = 0x1e35a7bd; -static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1e35a7bd, 0x1e35a7bd); +static const uint32_t kHashMul32 = 0x1E35A7BD; +static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD); static const uint64_t kHashMul64Long = - BROTLI_MAKE_UINT64_T(0x1fe35a7bU, 0xd3579bd3U); + BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u); static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; @@ -146,8 +147,9 @@ static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance( } static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( - const BrotliDictionary* dictionary, size_t item, const uint8_t* data, - size_t max_length, size_t max_backward, HasherSearchResult* out) { + const BrotliEncoderDictionary* dictionary, size_t item, const uint8_t* data, + size_t max_length, size_t max_backward, size_t max_distance, + HasherSearchResult* out) { size_t len; size_t dist; size_t offset; @@ -156,24 +158,24 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( score_t score; len = item & 0x1F; dist = item >> 5; - offset = dictionary->offsets_by_length[len] + len * dist; + offset = dictionary->words->offsets_by_length[len] + len * dist; if (len > max_length) { return BROTLI_FALSE; } matchlen = - FindMatchLengthWithLimit(data, &dictionary->data[offset], len); - if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) { + FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len); + if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) { return BROTLI_FALSE; } { size_t cut = len - matchlen; - size_t transform_id = - (cut << 2) + (size_t)((kCutoffTransforms >> (cut * 6)) & 0x3F); + size_t transform_id = (cut << 2) + + (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); backward = max_backward + dist + 1 + - (transform_id << dictionary->size_bits_by_length[len]); + (transform_id << dictionary->words->size_bits_by_length[len]); } - if (backward >= BROTLI_MAX_DISTANCE) { + if (backward > max_distance) { return BROTLI_FALSE; } score = BackwardReferenceScore(matchlen, backward); @@ -188,9 +190,10 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( } static BROTLI_INLINE void SearchInStaticDictionary( - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, HasherHandle handle, const uint8_t* data, size_t max_length, - size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) { + size_t max_backward, size_t max_distance, + HasherSearchResult* out, BROTLI_BOOL shallow) { size_t key; size_t i; HasherCommon* self = GetHasherCommon(handle); @@ -199,11 +202,11 @@ static BROTLI_INLINE void SearchInStaticDictionary( } key = Hash14(data) << 1; for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) { - size_t item = dictionary_hash[key]; + size_t item = dictionary->hash_table[key]; self->dict_num_lookups++; if (item != 0) { BROTLI_BOOL item_matches = TestStaticDictionaryItem( - dictionary, item, data, max_length, max_backward, out); + dictionary, item, data, max_length, max_backward, max_distance, out); if (item_matches) { self->dict_num_matches++; } diff --git a/c/enc/hash_forgetful_chain_inc.h b/c/enc/hash_forgetful_chain_inc.h index 46d363c..41cb3ff 100644 --- a/c/enc/hash_forgetful_chain_inc.h +++ b/c/enc/hash_forgetful_chain_inc.h @@ -28,7 +28,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } /* HashBytes is the function that chooses the bucket to place the address in.*/ -static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t *data) { +static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* data) { const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -115,7 +115,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -154,11 +154,12 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, - const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) { + const size_t gap, const size_t max_distance, + HasherSearchResult* BROTLI_RESTRICT out) { HashForgetfulChain* self = FN(Self)(handle); const size_t cur_ix_masked = cur_ix & ring_buffer_mask; /* Don't accept a short copy from far away. */ @@ -240,9 +241,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, FN(Store)(handle, data, ring_buffer_mask, cur_ix); } if (out->score == min_score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_FALSE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_FALSE); } } diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h index 6b0697b..e099edf 100644 --- a/c/enc/hash_longest_match64_inc.h +++ b/c/enc/hash_longest_match64_inc.h @@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; } static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; } /* HashBytes is the function that chooses the bucket to place the address in. */ -static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t *data, +static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* data, const uint64_t mask, const int shift) { const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long; @@ -105,7 +105,7 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and store the value of ix at that position. */ -static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, +static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data, const size_t mask, const size_t ix) { HashLongestMatch* self = FN(Self)(handle); uint16_t* num = FN(Num)(self); @@ -119,7 +119,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -158,11 +158,11 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, const size_t gap, - HasherSearchResult* BROTLI_RESTRICT out) { + const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { HasherCommon* common = GetHasherCommon(handle); HashLongestMatch* self = FN(Self)(handle); uint16_t* num = FN(Num)(self); @@ -257,9 +257,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, ++num[key]; } if (min_score == out->score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_FALSE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_FALSE); } } diff --git a/c/enc/hash_longest_match_inc.h b/c/enc/hash_longest_match_inc.h index d24576d..951d7a4 100644 --- a/c/enc/hash_longest_match_inc.h +++ b/c/enc/hash_longest_match_inc.h @@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } /* HashBytes is the function that chooses the bucket to place the address in. */ -static uint32_t FN(HashBytes)(const uint8_t *data, const int shift) { +static uint32_t FN(HashBytes)(const uint8_t* data, const int shift) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -112,7 +112,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -151,11 +151,11 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, const size_t gap, - HasherSearchResult* BROTLI_RESTRICT out) { + const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { HasherCommon* common = GetHasherCommon(handle); HashLongestMatch* self = FN(Self)(handle); uint16_t* num = FN(Num)(self); @@ -249,9 +249,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, ++num[key]; } if (min_score == out->score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_FALSE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_FALSE); } } diff --git a/c/enc/hash_longest_match_quickly_inc.h b/c/enc/hash_longest_match_quickly_inc.h index 2c78351..a7b9639 100644 --- a/c/enc/hash_longest_match_quickly_inc.h +++ b/c/enc/hash_longest_match_quickly_inc.h @@ -81,7 +81,7 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( Compute a hash from these, and store the value somewhere within [ix .. ix+3]. */ static BROTLI_INLINE void FN(Store)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix) { + const uint8_t* data, const size_t mask, const size_t ix) { const uint32_t key = FN(HashBytes)(&data[ix & mask]); /* Wiggle the value with the bucket sweep range. */ const uint32_t off = (ix >> 3) % BUCKET_SWEEP; @@ -89,7 +89,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -125,11 +125,12 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)( - HasherHandle handle, const BrotliDictionary* dictionary, - const uint16_t* dictionary_hash, const uint8_t* BROTLI_RESTRICT data, + HasherHandle handle, const BrotliEncoderDictionary* dictionary, + const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, - const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) { + const size_t gap, const size_t max_distance, + HasherSearchResult* BROTLI_RESTRICT out) { HashLongestMatchQuickly* self = FN(Self)(handle); const size_t best_len_in = out->len; const size_t cur_ix_masked = cur_ix & ring_buffer_mask; @@ -191,7 +192,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)( } } } else { - uint32_t *bucket = self->buckets_ + key; + uint32_t* bucket = self->buckets_ + key; int i; prev_ix = *bucket++; for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) { @@ -221,9 +222,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)( } } if (USE_DICTIONARY && min_score == out->score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_TRUE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_TRUE); } self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix; } diff --git a/c/enc/hash_to_binary_tree_inc.h b/c/enc/hash_to_binary_tree_inc.h index 73774b2..48097b1 100644 --- a/c/enc/hash_to_binary_tree_inc.h +++ b/c/enc/hash_to_binary_tree_inc.h @@ -24,7 +24,7 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; } -static uint32_t FN(HashBytes)(const uint8_t *data) { +static uint32_t FN(HashBytes)(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -200,7 +200,7 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)( sorted by strictly increasing length and (non-strictly) increasing distance. */ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint8_t* data, + const BrotliEncoderDictionary* dictionary, const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix, const size_t max_length, const size_t max_backward, const size_t gap, const BrotliEncoderParams* params, BackwardMatch* matches) { @@ -252,7 +252,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle, uint32_t dict_id = dict_matches[l]; if (dict_id < kInvalidMatch) { size_t distance = max_backward + gap + (dict_id >> 5) + 1; - if (distance < BROTLI_MAX_DISTANCE) { + if (distance <= params->dist.max_distance) { InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31); } } @@ -265,7 +265,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle, /* Stores the hash of the next 4 bytes and re-roots the binary tree at the current sequence, without returning any matches. REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */ -static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, +static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data, const size_t mask, const size_t ix) { HashToBinaryTree* self = FN(Self)(handle); /* Maximum distance is window size - 16, see section 9.1. of the spec. */ @@ -275,7 +275,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i = ix_start; size_t j = ix_start; diff --git a/c/enc/histogram.c b/c/enc/histogram.c index bb7b4c5..6da2ff6 100644 --- a/c/enc/histogram.c +++ b/c/enc/histogram.c @@ -8,9 +8,9 @@ #include "./histogram.h" +#include "../common/context.h" #include "./block_splitter.h" #include "./command.h" -#include "./context.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -63,13 +63,16 @@ void BrotliBuildHistogramsWithContext( BlockSplitIteratorNext(&insert_and_copy_it); HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_], cmd->cmd_prefix_); + /* TODO: unwrap iterator blocks. */ for (j = cmd->insert_len_; j != 0; --j) { size_t context; BlockSplitIteratorNext(&literal_it); - context = context_modes ? - ((literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) + - Context(prev_byte, prev_byte2, context_modes[literal_it.type_])) : - literal_it.type_; + context = literal_it.type_; + if (context_modes) { + ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]); + context = (context << BROTLI_LITERAL_CONTEXT_BITS) + + BROTLI_CONTEXT(prev_byte, prev_byte2, lut); + } HistogramAddLiteral(&literal_histograms[context], ringbuffer[pos & mask]); prev_byte2 = prev_byte; @@ -86,7 +89,7 @@ void BrotliBuildHistogramsWithContext( context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) + CommandDistanceContext(cmd); HistogramAddDistance(©_dist_histograms[context], - cmd->dist_prefix_); + cmd->dist_prefix_ & 0x3FF); } } } diff --git a/c/enc/histogram.h b/c/enc/histogram.h index b1b8d11..42af3c3 100644 --- a/c/enc/histogram.h +++ b/c/enc/histogram.h @@ -12,16 +12,19 @@ #include <string.h> /* memset */ #include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include <brotli/types.h> #include "./block_splitter.h" #include "./command.h" -#include "./context.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif +/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */ +#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544 + #define FN(X) X ## Literal #define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS #define DataType uint8_t @@ -38,7 +41,7 @@ extern "C" { #undef FN #define FN(X) X ## Distance -#define DATA_SIZE BROTLI_NUM_DISTANCE_SYMBOLS +#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS #include "./histogram_inc.h" /* NOLINT(build/include) */ #undef DataType #undef DATA_SIZE diff --git a/c/enc/histogram_inc.h b/c/enc/histogram_inc.h index 7807036..50eaf74 100644 --- a/c/enc/histogram_inc.h +++ b/c/enc/histogram_inc.h @@ -33,7 +33,7 @@ static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) { } static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self, - const DataType *p, size_t n) { + const DataType* p, size_t n) { self->total_count_ += n; n += 1; while (--n) ++self->data_[*p++]; diff --git a/c/enc/literal_cost.c b/c/enc/literal_cost.c index 9bcb680..c231100 100644 --- a/c/enc/literal_cost.c +++ b/c/enc/literal_cost.c @@ -25,7 +25,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) { return BROTLI_MIN(size_t, 1, clamp); } else { /* Let's decide over the last byte if this ends the sequence. */ - if (last < 0xe0) { + if (last < 0xE0) { return 0; /* Completed two or three byte coding. */ } else { /* Next one is the 'Byte 3' of utf-8 encoding. */ return BROTLI_MIN(size_t, 2, clamp); @@ -34,7 +34,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) { } static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask, - const uint8_t *data) { + const uint8_t* data) { size_t counts[3] = { 0 }; size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */ size_t last_c = 0; @@ -54,7 +54,7 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask, } static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, - const uint8_t *data, float *cost) { + const uint8_t* data, float* cost) { /* max_utf8 is 0 (normal ASCII single byte modeling), 1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */ const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data); @@ -125,7 +125,7 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, } void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask, - const uint8_t *data, float *cost) { + const uint8_t* data, float* cost) { if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) { EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost); return; diff --git a/c/enc/literal_cost.h b/c/enc/literal_cost.h index d2f430c..8f53f39 100644 --- a/c/enc/literal_cost.h +++ b/c/enc/literal_cost.h @@ -21,7 +21,7 @@ extern "C" { ring-buffer (data, mask) will take entropy coded and writes these estimates to the cost[0..len) array. */ BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals( - size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost); + size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost); #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ diff --git a/c/enc/memory.c b/c/enc/memory.c index 3716b98..f6ed7e3 100644 --- a/c/enc/memory.c +++ b/c/enc/memory.c @@ -27,22 +27,12 @@ extern "C" { #define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED #define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED) -static void* DefaultAllocFunc(void* opaque, size_t size) { - BROTLI_UNUSED(opaque); - return malloc(size); -} - -static void DefaultFreeFunc(void* opaque, void* address) { - BROTLI_UNUSED(opaque); - free(address); -} - void BrotliInitMemoryManager( MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { if (!alloc_func) { - m->alloc_func = DefaultAllocFunc; - m->free_func = DefaultFreeFunc; + m->alloc_func = BrotliDefaultAllocFunc; + m->free_func = BrotliDefaultFreeFunc; m->opaque = 0; } else { m->alloc_func = alloc_func; diff --git a/c/enc/metablock.c b/c/enc/metablock.c index 50f2ea2..6219292 100644 --- a/c/enc/metablock.c +++ b/c/enc/metablock.c @@ -10,12 +10,12 @@ #include "./metablock.h" #include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include <brotli/types.h> #include "./bit_cost.h" #include "./block_splitter.h" #include "./cluster.h" -#include "./context.h" #include "./entropy_encode.h" #include "./histogram.h" #include "./memory.h" @@ -398,9 +398,9 @@ static void MapStaticContexts(MemoryManager* m, static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask, - uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode, + uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut, const size_t num_contexts, const uint32_t* static_context_map, - const Command *commands, size_t n_commands, MetaBlockSplit* mb) { + const Command* commands, size_t n_commands, MetaBlockSplit* mb) { union { BlockSplitterLiteral plain; ContextBlockSplitter ctx; @@ -441,7 +441,8 @@ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( if (num_contexts == 1) { BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal); } else { - size_t context = Context(prev_byte, prev_byte2, literal_context_mode); + size_t context = + BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut); ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal, static_context_map[context]); if (BROTLI_IS_OOM(m)) return; @@ -455,7 +456,7 @@ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( prev_byte2 = ringbuffer[(pos - 2) & mask]; prev_byte = ringbuffer[(pos - 1) & mask]; if (cmd.cmd_prefix_ >= 128) { - BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_); + BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_ & 0x3FF); } } } @@ -482,7 +483,7 @@ void BrotliBuildMetaBlockGreedy(MemoryManager* m, size_t mask, uint8_t prev_byte, uint8_t prev_byte2, - ContextType literal_context_mode, + ContextLut literal_context_lut, size_t num_contexts, const uint32_t* static_context_map, const Command* commands, @@ -490,19 +491,17 @@ void BrotliBuildMetaBlockGreedy(MemoryManager* m, MetaBlockSplit* mb) { if (num_contexts == 1) { BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte, - prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb); + prev_byte2, literal_context_lut, 1, NULL, commands, n_commands, mb); } else { BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte, - prev_byte2, literal_context_mode, num_contexts, static_context_map, + prev_byte2, literal_context_lut, num_contexts, static_context_map, commands, n_commands, mb); } } -void BrotliOptimizeHistograms(size_t num_direct_distance_codes, - size_t distance_postfix_bits, +void BrotliOptimizeHistograms(uint32_t num_distance_codes, MetaBlockSplit* mb) { uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS]; - size_t num_distance_codes; size_t i; for (i = 0; i < mb->literal_histograms_size; ++i) { BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_, @@ -513,9 +512,6 @@ void BrotliOptimizeHistograms(size_t num_direct_distance_codes, mb->command_histograms[i].data_, good_for_rle); } - num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES + - num_direct_distance_codes + - ((2 * BROTLI_MAX_DISTANCE_BITS) << distance_postfix_bits); for (i = 0; i < mb->distance_histograms_size; ++i) { BrotliOptimizeHuffmanCountsForRle(num_distance_codes, mb->distance_histograms[i].data_, diff --git a/c/enc/metablock.h b/c/enc/metablock.h index 3fa6d65..76a6594 100644 --- a/c/enc/metablock.h +++ b/c/enc/metablock.h @@ -10,11 +10,11 @@ #ifndef BROTLI_ENC_METABLOCK_H_ #define BROTLI_ENC_METABLOCK_H_ +#include "../common/context.h" #include "../common/platform.h" #include <brotli/types.h> #include "./block_splitter.h" #include "./command.h" -#include "./context.h" #include "./histogram.h" #include "./memory.h" #include "./quality.h" @@ -85,12 +85,11 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m, is the same for all block types. */ BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy( MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask, - uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode, + uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut, size_t num_contexts, const uint32_t* static_context_map, const Command* commands, size_t n_commands, MetaBlockSplit* mb); -BROTLI_INTERNAL void BrotliOptimizeHistograms(size_t num_direct_distance_codes, - size_t distance_postfix_bits, +BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes, MetaBlockSplit* mb); #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/params.h b/c/enc/params.h index acb3668..9bcf236 100755 --- a/c/enc/params.h +++ b/c/enc/params.h @@ -10,6 +10,7 @@ #define BROTLI_ENC_PARAMS_H_ #include <brotli/encode.h> +#include "./encoder_dict.h" typedef struct BrotliHasherParams { int type; @@ -19,6 +20,13 @@ typedef struct BrotliHasherParams { int num_last_distances_to_check; } BrotliHasherParams; +typedef struct BrotliDistanceParams { + uint32_t num_direct_distance_codes; + uint32_t distance_postfix_bits; + uint32_t alphabet_size; + size_t max_distance; +} BrotliDistanceParams; + /* Encoding parameters */ typedef struct BrotliEncoderParams { BrotliEncoderMode mode; @@ -27,7 +35,10 @@ typedef struct BrotliEncoderParams { int lgblock; size_t size_hint; BROTLI_BOOL disable_literal_context_modeling; + BROTLI_BOOL large_window; BrotliHasherParams hasher; + BrotliDistanceParams dist; + BrotliEncoderDictionary dictionary; } BrotliEncoderParams; #endif /* BROTLI_ENC_PARAMS_H_ */ diff --git a/c/enc/prefix.h b/c/enc/prefix.h index 0168d4e..fd359a4 100644 --- a/c/enc/prefix.h +++ b/c/enc/prefix.h @@ -39,11 +39,10 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code, size_t prefix = (dist >> bucket) & 1; size_t offset = (2 + prefix) << bucket; size_t nbits = bucket - postfix_bits; - *code = (uint16_t)( + *code = (uint16_t)((nbits << 10) | (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes + ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix)); - *extra_bits = (uint32_t)( - (nbits << 24) | ((dist - offset) >> postfix_bits)); + *extra_bits = (uint32_t)((dist - offset) >> postfix_bits); } } diff --git a/c/enc/quality.h b/c/enc/quality.h index 80b7051..f9b1111 100644 --- a/c/enc/quality.h +++ b/c/enc/quality.h @@ -31,7 +31,7 @@ /* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting, so we buffer at most this much literals and commands. */ -#define MAX_NUM_DELAYED_SYMBOLS 0x2fff +#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF /* Returns hash-table size for quality levels 0 and 1. */ static BROTLI_INLINE size_t MaxHashTableSize(int quality) { @@ -60,10 +60,15 @@ static BROTLI_INLINE size_t MaxZopfliCandidates( static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) { params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY, BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality)); + if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { + params->large_window = BROTLI_FALSE; + } if (params->lgwin < BROTLI_MIN_WINDOW_BITS) { params->lgwin = BROTLI_MIN_WINDOW_BITS; - } else if (params->lgwin > BROTLI_MAX_WINDOW_BITS) { - params->lgwin = BROTLI_MAX_WINDOW_BITS; + } else { + int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS : + BROTLI_MAX_WINDOW_BITS; + if (params->lgwin > max_lgwin) params->lgwin = max_lgwin; } } diff --git a/c/enc/ringbuffer.h b/c/enc/ringbuffer.h index 4e58749..86079a8 100644 --- a/c/enc/ringbuffer.h +++ b/c/enc/ringbuffer.h @@ -41,9 +41,9 @@ typedef struct RingBuffer { uint32_t pos_; /* The actual ring buffer containing the copy of the last two bytes, the data, and the copy of the beginning as a tail. */ - uint8_t *data_; + uint8_t* data_; /* The start of the ring-buffer. */ - uint8_t *buffer_; + uint8_t* buffer_; } RingBuffer; static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) { @@ -91,7 +91,7 @@ static BROTLI_INLINE void RingBufferInitBuffer( } static BROTLI_INLINE void RingBufferWriteTail( - const uint8_t *bytes, size_t n, RingBuffer* rb) { + const uint8_t* bytes, size_t n, RingBuffer* rb) { const size_t masked_pos = rb->pos_ & rb->mask_; if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) { /* Just fill the tail buffer with the beginning data. */ @@ -103,7 +103,7 @@ static BROTLI_INLINE void RingBufferWriteTail( /* Push bytes into the ring buffer. */ static BROTLI_INLINE void RingBufferWrite( - MemoryManager* m, const uint8_t *bytes, size_t n, RingBuffer* rb) { + MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) { if (rb->pos_ == 0 && n < rb->tail_size_) { /* Special case for the first write: to process the first block, we don't need to allocate the whole ring-buffer and we don't need the tail @@ -144,12 +144,16 @@ static BROTLI_INLINE void RingBufferWrite( n - (rb->size_ - masked_pos)); } } - rb->buffer_[-2] = rb->buffer_[rb->size_ - 2]; - rb->buffer_[-1] = rb->buffer_[rb->size_ - 1]; - rb->pos_ += (uint32_t)n; - if (rb->pos_ > (1u << 30)) { - /* Wrap, but preserve not-a-first-lap feature. */ - rb->pos_ = (rb->pos_ & ((1u << 30) - 1)) | (1u << 30); + { + BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0; + uint32_t rb_pos_mask = (1u << 31) - 1; + rb->buffer_[-2] = rb->buffer_[rb->size_ - 2]; + rb->buffer_[-1] = rb->buffer_[rb->size_ - 1]; + rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask); + if (not_first_lap) { + /* Wrap, but preserve not-a-first-lap feature. */ + rb->pos_ |= 1u << 31; + } } } diff --git a/c/enc/static_dict.c b/c/enc/static_dict.c index 36caa61..758ef80 100644 --- a/c/enc/static_dict.c +++ b/c/enc/static_dict.c @@ -8,19 +8,20 @@ #include "../common/dictionary.h" #include "../common/platform.h" +#include "../common/transform.h" +#include "./encoder_dict.h" #include "./find_match_length.h" -#include "./static_dict_lut.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif -static const uint8_t kUppercaseFirst = 10; +/* TODO: use BrotliTransforms.cutOffTransforms instead. */ static const uint8_t kOmitLastNTransforms[10] = { 0, 12, 27, 23, 42, 63, 56, 48, 59, 64, }; -static BROTLI_INLINE uint32_t Hash(const uint8_t *data) { +static BROTLI_INLINE uint32_t Hash(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -79,32 +80,33 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary, } BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( - const BrotliDictionary* dictionary, const uint8_t* data, + const BrotliEncoderDictionary* dictionary, const uint8_t* data, size_t min_length, size_t max_length, uint32_t* matches) { BROTLI_BOOL has_found_match = BROTLI_FALSE; { - size_t offset = kStaticDictionaryBuckets[Hash(data)]; + size_t offset = dictionary->buckets[Hash(data)]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0) { const size_t matchlen = - DictMatchLength(dictionary, data, id, l, max_length); + DictMatchLength(dictionary->words, data, id, l, max_length); const uint8_t* s; size_t minlen; size_t maxlen; size_t len; - /* Transform "" + kIdentity + "" */ + /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */ if (matchlen == l) { AddMatch(id, l, l, matches); has_found_match = BROTLI_TRUE; } - /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */ + /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and + "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */ if (matchlen >= l - 1) { AddMatch(id + 12 * n, l - 1, l, matches); if (l + 2 < max_length && @@ -114,7 +116,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } has_found_match = BROTLI_TRUE; } - /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */ + /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */ minlen = min_length; if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9); maxlen = BROTLI_MIN(size_t, matchlen, l - 2); @@ -126,7 +128,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( continue; } s = &data[l]; - /* Transforms "" + kIdentity + <suffix> */ + /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */ if (s[0] == ' ') { AddMatch(id + n, l + 1, l, matches); if (s[1] == 'a') { @@ -273,12 +275,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } } else { - /* Set is_all_caps=0 for kUppercaseFirst and - is_all_caps=1 otherwise (kUppercaseAll) transform. */ + /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and + is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) + transform. */ const BROTLI_BOOL is_all_caps = - TO_BROTLI_BOOL(w.transform != kUppercaseFirst); + TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); const uint8_t* s; - if (!IsMatch(dictionary, w, data, max_length)) { + if (!IsMatch(dictionary->words, w, data, max_length)) { continue; } /* Transform "" + kUppercase{First,All} + "" */ @@ -323,27 +326,29 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( /* Transforms with prefixes " " and "." */ if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' '); - size_t offset = kStaticDictionaryBuckets[Hash(&data[1])]; + size_t offset = dictionary->buckets[Hash(&data[1])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0) { const uint8_t* s; - if (!IsMatch(dictionary, w, &data[1], max_length - 1)) { + if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { continue; } - /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */ + /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and + "." + BROTLI_TRANSFORM_IDENTITY + "" */ AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); has_found_match = BROTLI_TRUE; if (l + 2 >= max_length) { continue; } - /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix> + /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and + "." + BROTLI_TRANSFORM_IDENTITY + <suffix> */ s = &data[l + 1]; if (s[0] == ' ') { @@ -370,12 +375,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } } else if (is_space) { - /* Set is_all_caps=0 for kUppercaseFirst and - is_all_caps=1 otherwise (kUppercaseAll) transform. */ + /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and + is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) + transform. */ const BROTLI_BOOL is_all_caps = - TO_BROTLI_BOOL(w.transform != kUppercaseFirst); + TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); const uint8_t* s; - if (!IsMatch(dictionary, w, &data[1], max_length - 1)) { + if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { continue; } /* Transforms " " + kUppercase{First,All} + "" */ @@ -411,22 +417,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } if (max_length >= 6) { - /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */ + /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */ if ((data[1] == ' ' && (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || - (data[0] == 0xc2 && data[1] == 0xa0)) { - size_t offset = kStaticDictionaryBuckets[Hash(&data[2])]; + (data[0] == 0xC2 && data[1] == 0xA0)) { + size_t offset = dictionary->buckets[Hash(&data[2])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0 && - IsMatch(dictionary, w, &data[2], max_length - 2)) { - if (data[0] == 0xc2) { + IsMatch(dictionary->words, w, &data[2], max_length - 2)) { + if (data[0] == 0xC2) { AddMatch(id + 102 * n, l + 2, l, matches); has_found_match = BROTLI_TRUE; } else if (l + 2 < max_length && data[l + 2] == ' ') { @@ -444,17 +450,17 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( data[3] == 'e' && data[4] == ' ') || (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && data[3] == 'm' && data[4] == '/')) { - size_t offset = kStaticDictionaryBuckets[Hash(&data[5])]; + size_t offset = dictionary->buckets[Hash(&data[5])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0 && - IsMatch(dictionary, w, &data[5], max_length - 5)) { + IsMatch(dictionary->words, w, &data[5], max_length - 5)) { AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); has_found_match = BROTLI_TRUE; if (l + 5 < max_length) { diff --git a/c/enc/static_dict.h b/c/enc/static_dict.h index 0b3f6b3..6b5d4eb 100644 --- a/c/enc/static_dict.h +++ b/c/enc/static_dict.h @@ -12,13 +12,14 @@ #include "../common/dictionary.h" #include "../common/platform.h" #include <brotli/types.h> +#include "./encoder_dict.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif #define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37 -static const uint32_t kInvalidMatch = 0xfffffff; +static const uint32_t kInvalidMatch = 0xFFFFFFF; /* Matches data against static dictionary words, and for each length l, for which a match is found, updates matches[l] to be the minimum possible @@ -28,7 +29,7 @@ static const uint32_t kInvalidMatch = 0xfffffff; matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long all elements are initialized to kInvalidMatch */ BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( - const BrotliDictionary* dictionary, + const BrotliEncoderDictionary* dictionary, const uint8_t* data, size_t min_length, size_t max_length, uint32_t* matches); diff --git a/c/enc/static_dict_lut.h b/c/enc/static_dict_lut.h index ba94f76..e299cda 100644 --- a/c/enc/static_dict_lut.h +++ b/c/enc/static_dict_lut.h @@ -23,7 +23,7 @@ typedef struct DictWord { } DictWord; static const int kDictNumBits = 15; -static const uint32_t kDictHashMul32 = 0x1e35a7bd; +static const uint32_t kDictHashMul32 = 0x1E35A7BD; static const uint16_t kStaticDictionaryBuckets[32768] = { 1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29, diff --git a/c/enc/utf8_util.c b/c/enc/utf8_util.c index a334927..04a7805 100644 --- a/c/enc/utf8_util.c +++ b/c/enc/utf8_util.c @@ -25,37 +25,37 @@ static size_t BrotliParseAsUTF8( } /* 2-byte UTF8 */ if (size > 1u && - (input[0] & 0xe0) == 0xc0 && - (input[1] & 0xc0) == 0x80) { - *symbol = (((input[0] & 0x1f) << 6) | - (input[1] & 0x3f)); - if (*symbol > 0x7f) { + (input[0] & 0xE0) == 0xC0 && + (input[1] & 0xC0) == 0x80) { + *symbol = (((input[0] & 0x1F) << 6) | + (input[1] & 0x3F)); + if (*symbol > 0x7F) { return 2; } } /* 3-byte UFT8 */ if (size > 2u && - (input[0] & 0xf0) == 0xe0 && - (input[1] & 0xc0) == 0x80 && - (input[2] & 0xc0) == 0x80) { - *symbol = (((input[0] & 0x0f) << 12) | - ((input[1] & 0x3f) << 6) | - (input[2] & 0x3f)); - if (*symbol > 0x7ff) { + (input[0] & 0xF0) == 0xE0 && + (input[1] & 0xC0) == 0x80 && + (input[2] & 0xC0) == 0x80) { + *symbol = (((input[0] & 0x0F) << 12) | + ((input[1] & 0x3F) << 6) | + (input[2] & 0x3F)); + if (*symbol > 0x7FF) { return 3; } } /* 4-byte UFT8 */ if (size > 3u && - (input[0] & 0xf8) == 0xf0 && - (input[1] & 0xc0) == 0x80 && - (input[2] & 0xc0) == 0x80 && - (input[3] & 0xc0) == 0x80) { + (input[0] & 0xF8) == 0xF0 && + (input[1] & 0xC0) == 0x80 && + (input[2] & 0xC0) == 0x80 && + (input[3] & 0xC0) == 0x80) { *symbol = (((input[0] & 0x07) << 18) | - ((input[1] & 0x3f) << 12) | - ((input[2] & 0x3f) << 6) | - (input[3] & 0x3f)); - if (*symbol > 0xffff && *symbol <= 0x10ffff) { + ((input[1] & 0x3F) << 12) | + ((input[2] & 0x3F) << 6) | + (input[3] & 0x3F)); + if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) { return 4; } } diff --git a/c/enc/write_bits.h b/c/enc/write_bits.h index efa66f8..7733d92 100644 --- a/c/enc/write_bits.h +++ b/c/enc/write_bits.h @@ -35,25 +35,27 @@ extern "C" { and locate the rest in BYTE+1, BYTE+2, etc. */ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits, uint64_t bits, - size_t * BROTLI_RESTRICT pos, - uint8_t * BROTLI_RESTRICT array) { + size_t* BROTLI_RESTRICT pos, + uint8_t* BROTLI_RESTRICT array) { #ifdef BROTLI_LITTLE_ENDIAN /* This branch of the code can write up to 56 bits at a time, 7 bits are lost by being perhaps already in *p and at least 1 bit is needed to initialize the bit-stream ahead (i.e. if 7 bits are in *p and we write 57 bits, then the next write will access a byte that was never initialized). */ - uint8_t *p = &array[*pos >> 3]; + uint8_t* p = &array[*pos >> 3]; uint64_t v = *p; - BROTLI_LOG(("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos)); + BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits, + (uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF), + (int)*pos)); BROTLI_DCHECK((bits >> n_bits) == 0); BROTLI_DCHECK(n_bits <= 56); v |= bits << (*pos & 7); BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */ *pos += n_bits; #else - /* implicit & 0xff is assumed for uint8_t arithmetics */ - uint8_t *array_pos = &array[*pos >> 3]; + /* implicit & 0xFF is assumed for uint8_t arithmetics */ + uint8_t* array_pos = &array[*pos >> 3]; const size_t bits_reserved_in_first_byte = (*pos & 7); size_t bits_left_to_write; bits <<= bits_reserved_in_first_byte; @@ -70,8 +72,8 @@ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits, } static BROTLI_INLINE void BrotliWriteBitsPrepareStorage( - size_t pos, uint8_t *array) { - BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", pos)); + size_t pos, uint8_t* array) { + BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", (int)pos)); BROTLI_DCHECK((pos & 7) == 0); array[pos >> 3] = 0; } diff --git a/c/include/brotli/decode.h b/c/include/brotli/decode.h index 1acf605..61a4326 100644 --- a/c/include/brotli/decode.h +++ b/c/include/brotli/decode.h @@ -34,11 +34,11 @@ typedef struct BrotliDecoderStateStruct BrotliDecoderState; typedef enum { /** Decoding error, e.g. corrupted input or memory allocation problem. */ BROTLI_DECODER_RESULT_ERROR = 0, - /** Decoding successfully completed */ + /** Decoding successfully completed. */ BROTLI_DECODER_RESULT_SUCCESS = 1, - /** Partially done; should be called again with more input */ + /** Partially done; should be called again with more input. */ BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT = 2, - /** Partially done; should be called again with more output */ + /** Partially done; should be called again with more output. */ BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT = 3 } BrotliDecoderResult; @@ -83,8 +83,9 @@ typedef enum { BROTLI_ERROR_CODE(_ERROR_FORMAT_, WINDOW_BITS, -13) SEPARATOR \ BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_1, -14) SEPARATOR \ BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_2, -15) SEPARATOR \ + BROTLI_ERROR_CODE(_ERROR_FORMAT_, DISTANCE, -16) SEPARATOR \ \ - /* -16..-17 codes are reserved */ \ + /* -17 code is reserved */ \ \ BROTLI_ERROR_CODE(_ERROR_, COMPOUND_DICTIONARY, -18) SEPARATOR \ BROTLI_ERROR_CODE(_ERROR_, DICTIONARY_NOT_SET, -19) SEPARATOR \ @@ -135,7 +136,11 @@ typedef enum BrotliDecoderParameter { * Ring buffer is allocated according to window size, despite the real size of * the content. */ - BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0 + BROTLI_DECODER_PARAM_DISABLE_RING_BUFFER_REALLOCATION = 0, + /** + * Flag that determines if "Large Window Brotli" is used. + */ + BROTLI_DECODER_PARAM_LARGE_WINDOW = 1 } BrotliDecoderParameter; /** diff --git a/c/include/brotli/encode.h b/c/include/brotli/encode.h index 1fa85cc..0b5c8c7 100644 --- a/c/include/brotli/encode.h +++ b/c/include/brotli/encode.h @@ -27,6 +27,11 @@ extern "C" { * @note equal to @c BROTLI_MAX_DISTANCE_BITS constant. */ #define BROTLI_MAX_WINDOW_BITS 24 +/** + * Maximal value for ::BROTLI_PARAM_LGWIN parameter + * in "Large Window Brotli" (32-bit). + */ +#define BROTLI_LARGE_MAX_WINDOW_BITS 30 /** Minimal value for ::BROTLI_PARAM_LGBLOCK parameter. */ #define BROTLI_MIN_INPUT_BLOCK_BITS 16 /** Maximal value for ::BROTLI_PARAM_LGBLOCK parameter. */ @@ -176,7 +181,11 @@ typedef enum BrotliEncoderParameter { * * The default value is 0, which means that the total input size is unknown. */ - BROTLI_PARAM_SIZE_HINT = 5 + BROTLI_PARAM_SIZE_HINT = 5, + /** + * Flag that determines if "Large Window Brotli" is used. + */ + BROTLI_PARAM_LARGE_WINDOW = 6 } BrotliEncoderParameter; /** diff --git a/c/tools/brotli.c b/c/tools/brotli.c index 497ae65..2abfc27 100644 --- a/c/tools/brotli.c +++ b/c/tools/brotli.c @@ -111,8 +111,15 @@ typedef struct { uint8_t* output; const char* current_input_path; const char* current_output_path; + int64_t input_file_length; /* -1, if impossible to calculate */ FILE* fin; FILE* fout; + + /* I/O buffers */ + size_t available_in; + const uint8_t* next_in; + size_t available_out; + uint8_t* next_out; } Context; /* Parse up to 5 decimal digits. */ @@ -185,8 +192,8 @@ static Command ParseParams(Context* params) { /* Too many options. The expected longest option list is: "-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total. - This check is an additinal guard that is never triggered, but provides an - additional guard for future changes. */ + This check is an additional guard that is never triggered, but provides + a guard for future changes. */ if (next_option_index > (MAX_OPTIONS - 2)) { return COMMAND_INVALID; } @@ -414,8 +421,8 @@ static void PrintHelp(const char* name) { " -t, --test test compressed file integrity\n" " -v, --verbose verbose mode\n"); fprintf(stdout, -" -w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d) (default:%d)\n", - BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS, DEFAULT_LGWIN); +" -w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d)\n", + BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS); fprintf(stdout, " window size = 2**NUM - 16\n" " 0 lets compressor choose the optimal value\n"); @@ -473,6 +480,23 @@ static BROTLI_BOOL OpenOutputFile(const char* output_path, FILE** f, return BROTLI_TRUE; } +static int64_t FileSize(const char* path) { + FILE* f = fopen(path, "rb"); + int64_t retval; + if (f == NULL) { + return -1; + } + if (fseek(f, 0L, SEEK_END) != 0) { + fclose(f); + return -1; + } + retval = ftell(f); + if (fclose(f) != 0) { + return -1; + } + return retval; +} + /* Copy file times and permissions. TODO: this is a "best effort" implementation; honest cross-platform fully featured implementation is way too hacky; add more hacks by request. */ @@ -513,6 +537,8 @@ static BROTLI_BOOL NextFile(Context* context) { /* Iterator points to last used arg; increment to search for the next one. */ context->iterator++; + context->input_file_length = -1; + /* No input path; read from console. */ if (context->input_count == 0) { if (context->iterator > 1) return BROTLI_FALSE; @@ -542,6 +568,7 @@ static BROTLI_BOOL NextFile(Context* context) { } context->current_input_path = arg; + context->input_file_length = FileSize(arg); context->current_output_path = context->output_path; if (context->output_path) return BROTLI_TRUE; @@ -626,44 +653,73 @@ static BROTLI_BOOL CloseFiles(Context* context, BROTLI_BOOL success) { static const size_t kFileBufferSize = 1 << 16; +static void InitializeBuffers(Context* context) { + context->available_in = 0; + context->next_in = NULL; + context->available_out = kFileBufferSize; + context->next_out = context->output; +} + +static BROTLI_BOOL HasMoreInput(Context* context) { + return feof(context->fin) ? BROTLI_FALSE : BROTLI_TRUE; +} + +static BROTLI_BOOL ProvideInput(Context* context) { + context->available_in = + fread(context->input, 1, kFileBufferSize, context->fin); + context->next_in = context->input; + if (ferror(context->fin)) { + fprintf(stderr, "failed to read input [%s]: %s\n", + PrintablePath(context->current_input_path), strerror(errno)); + return BROTLI_FALSE; + } + return BROTLI_TRUE; +} + +/* Internal: should be used only in Provide-/Flush-Output. */ +static BROTLI_BOOL WriteOutput(Context* context) { + size_t out_size = (size_t)(context->next_out - context->output); + if (out_size == 0) return BROTLI_TRUE; + if (context->test_integrity) return BROTLI_TRUE; + + fwrite(context->output, 1, out_size, context->fout); + if (ferror(context->fout)) { + fprintf(stderr, "failed to write output [%s]: %s\n", + PrintablePath(context->current_output_path), strerror(errno)); + return BROTLI_FALSE; + } + return BROTLI_TRUE; +} + +static BROTLI_BOOL ProvideOutput(Context* context) { + if (!WriteOutput(context)) return BROTLI_FALSE; + context->available_out = kFileBufferSize; + context->next_out = context->output; + return BROTLI_TRUE; +} + +static BROTLI_BOOL FlushOutput(Context* context) { + if (!WriteOutput(context)) return BROTLI_FALSE; + context->available_out = 0; + return BROTLI_TRUE; +} + static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) { - size_t available_in = 0; - const uint8_t* next_in = NULL; - size_t available_out = kFileBufferSize; - uint8_t* next_out = context->output; BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT; + InitializeBuffers(context); for (;;) { - if (next_out != context->output) { - if (!context->test_integrity) { - size_t out_size = (size_t)(next_out - context->output); - fwrite(context->output, 1, out_size, context->fout); - if (ferror(context->fout)) { - fprintf(stderr, "failed to write output [%s]: %s\n", - PrintablePath(context->current_output_path), strerror(errno)); - return BROTLI_FALSE; - } - } - available_out = kFileBufferSize; - next_out = context->output; - } - if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { - if (feof(context->fin)) { + if (!HasMoreInput(context)) { fprintf(stderr, "corrupt input [%s]\n", PrintablePath(context->current_input_path)); return BROTLI_FALSE; } - available_in = fread(context->input, 1, kFileBufferSize, context->fin); - next_in = context->input; - if (ferror(context->fin)) { - fprintf(stderr, "failed to read input [%s]: %s\n", - PrintablePath(context->current_input_path), strerror(errno)); - return BROTLI_FALSE; - } + if (!ProvideInput(context)) return BROTLI_FALSE; } else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { - /* Nothing to do - output is already written. */ + if (!ProvideOutput(context)) return BROTLI_FALSE; } else if (result == BROTLI_DECODER_RESULT_SUCCESS) { - if (available_in != 0 || !feof(context->fin)) { + if (!FlushOutput(context)) return BROTLI_FALSE; + if (context->available_in != 0 || HasMoreInput(context)) { fprintf(stderr, "corrupt input [%s]\n", PrintablePath(context->current_input_path)); return BROTLI_FALSE; @@ -675,8 +731,8 @@ static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) { return BROTLI_FALSE; } - result = BrotliDecoderDecompressStream( - s, &available_in, &next_in, &available_out, &next_out, 0); + result = BrotliDecoderDecompressStream(s, &context->available_in, + &context->next_in, &context->available_out, &context->next_out, 0); } } @@ -703,46 +759,31 @@ static BROTLI_BOOL DecompressFiles(Context* context) { } static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) { - size_t available_in = 0; - const uint8_t* next_in = NULL; - size_t available_out = kFileBufferSize; - uint8_t* next_out = context->output; BROTLI_BOOL is_eof = BROTLI_FALSE; - + InitializeBuffers(context); for (;;) { - if (available_in == 0 && !is_eof) { - available_in = fread(context->input, 1, kFileBufferSize, context->fin); - next_in = context->input; - if (ferror(context->fin)) { - fprintf(stderr, "failed to read input [%s]: %s\n", - PrintablePath(context->current_input_path), strerror(errno)); - return BROTLI_FALSE; - } - is_eof = feof(context->fin) ? BROTLI_TRUE : BROTLI_FALSE; + if (context->available_in == 0 && !is_eof) { + if (!ProvideInput(context)) return BROTLI_FALSE; + is_eof = !HasMoreInput(context); } if (!BrotliEncoderCompressStream(s, is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS, - &available_in, &next_in, &available_out, &next_out, NULL)) { + &context->available_in, &context->next_in, + &context->available_out, &context->next_out, NULL)) { /* Should detect OOM? */ fprintf(stderr, "failed to compress data [%s]\n", PrintablePath(context->current_input_path)); return BROTLI_FALSE; } - if (available_out != kFileBufferSize) { - size_t out_size = kFileBufferSize - available_out; - fwrite(context->output, 1, out_size, context->fout); - if (ferror(context->fout)) { - fprintf(stderr, "failed to write output [%s]: %s\n", - PrintablePath(context->current_output_path), strerror(errno)); - return BROTLI_FALSE; - } - available_out = kFileBufferSize; - next_out = context->output; + if (context->available_out == 0) { + if (!ProvideOutput(context)) return BROTLI_FALSE; } - if (BrotliEncoderIsFinished(s)) return BROTLI_TRUE; + if (BrotliEncoderIsFinished(s)) { + return FlushOutput(context); + } } } @@ -756,8 +797,30 @@ static BROTLI_BOOL CompressFiles(Context* context) { } BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)context->quality); - BrotliEncoderSetParameter(s, - BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin); + if (context->lgwin > 0) { + /* Specified by user. */ + BrotliEncoderSetParameter(s, + BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin); + } else { + /* 0, or not specified by user; could be chosen by compressor. */ + uint32_t lgwin = DEFAULT_LGWIN; + /* Use file size to limit lgwin. */ + if (context->input_file_length >= 0) { + int32_t size = 1 << BROTLI_MIN_WINDOW_BITS; + lgwin = BROTLI_MIN_WINDOW_BITS; + while (size < context->input_file_length) { + size <<= 1; + lgwin++; + if (lgwin == BROTLI_MAX_WINDOW_BITS) break; + } + } + BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, lgwin); + } + if (context->input_file_length > 0) { + uint32_t size_hint = context->input_file_length < (1 << 30) ? + (uint32_t)context->input_file_length : (1u << 30); + BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, size_hint); + } is_ok = OpenFiles(context); if (is_ok && !context->current_output_path && !context->force_overwrite && isatty(STDOUT_FILENO)) { @@ -779,7 +842,7 @@ int main(int argc, char** argv) { int i; context.quality = 11; - context.lgwin = DEFAULT_LGWIN; + context.lgwin = -1; context.force_overwrite = BROTLI_FALSE; context.junk_source = BROTLI_FALSE; context.copy_stat = BROTLI_TRUE; |