diff options
Diffstat (limited to 'c/enc')
44 files changed, 739 insertions, 715 deletions
diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c index cce0cd4..62ecea7 100644 --- a/c/enc/backward_references.c +++ b/c/enc/backward_references.c @@ -102,23 +102,16 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance, #undef CAT #undef EXPAND_CAT -void BrotliCreateBackwardReferences(const BrotliDictionary* dictionary, - size_t num_bytes, - size_t position, - const uint8_t* ringbuffer, - size_t ringbuffer_mask, - const BrotliEncoderParams* params, - HasherHandle hasher, - int* dist_cache, - size_t* last_insert_len, - Command* commands, - size_t* num_commands, - size_t* num_literals) { +void BrotliCreateBackwardReferences( + size_t num_bytes, size_t position, const uint8_t* ringbuffer, + size_t ringbuffer_mask, const BrotliEncoderParams* params, + HasherHandle hasher, int* dist_cache, size_t* last_insert_len, + Command* commands, size_t* num_commands, size_t* num_literals) { switch (params->hasher.type) { #define CASE_(N) \ case N: \ - CreateBackwardReferencesNH ## N(dictionary, \ - kStaticDictionaryHash, num_bytes, position, ringbuffer, \ + CreateBackwardReferencesNH ## N( \ + num_bytes, position, ringbuffer, \ ringbuffer_mask, params, hasher, dist_cache, \ last_insert_len, commands, num_commands, num_literals); \ return; diff --git a/c/enc/backward_references.h b/c/enc/backward_references.h index 631c2f6..3a41466 100644 --- a/c/enc/backward_references.h +++ b/c/enc/backward_references.h @@ -26,7 +26,6 @@ extern "C" { CreateBackwardReferences calls, and must be incremented by the amount written by this call. */ BROTLI_INTERNAL void BrotliCreateBackwardReferences( - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c index b05cb4f..f2f9918 100644 --- a/c/enc/backward_references_hq.c +++ b/c/enc/backward_references_hq.c @@ -25,6 +25,10 @@ extern "C" { #endif +#define BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE ( \ + BROTLI_NUM_DISTANCE_SHORT_CODES + (2 * BROTLI_LARGE_MAX_DISTANCE_BITS)) +/* BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE == 74 */ + static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */ static const uint32_t kDistanceCacheIndex[] = { @@ -39,40 +43,40 @@ void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) { size_t i; stub.length = 1; stub.distance = 0; - stub.insert_length = 0; + stub.dcode_insert_length = 0; stub.u.cost = kInfinity; for (i = 0; i < length; ++i) array[i] = stub; } static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) { - return self->length & 0xffffff; + return self->length & 0x1FFFFFF; } static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) { - const uint32_t modifier = self->length >> 24; + const uint32_t modifier = self->length >> 25; return ZopfliNodeCopyLength(self) + 9u - modifier; } static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) { - return self->distance & 0x7ffffff; + return self->distance; } static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) { - const uint32_t short_code = self->distance >> 27; + const uint32_t short_code = self->dcode_insert_length >> 27; return short_code == 0 ? ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 : short_code - 1; } static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) { - return ZopfliNodeCopyLength(self) + self->insert_length; + return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF); } /* Histogram based cost model for zopflification. */ typedef struct ZopfliCostModel { /* The insert and copy length symbols. */ float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS]; - float cost_dist_[BROTLI_NUM_DISTANCE_SYMBOLS]; + float cost_dist_[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE]; /* Cumulative costs of literals per position in the stream. */ float* literal_costs_; float min_cost_cmd_; @@ -91,17 +95,26 @@ static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) { } static void SetCost(const uint32_t* histogram, size_t histogram_size, - float* cost) { + BROTLI_BOOL literal_histogram, float* cost) { size_t sum = 0; + size_t missing_symbol_sum; float log2sum; + float missing_symbol_cost; size_t i; for (i = 0; i < histogram_size; i++) { sum += histogram[i]; } log2sum = (float)FastLog2(sum); + missing_symbol_sum = sum; + if (!literal_histogram) { + for (i = 0; i < histogram_size; i++) { + if (histogram[i] == 0) missing_symbol_sum++; + } + } + missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2; for (i = 0; i < histogram_size; i++) { if (histogram[i] == 0) { - cost[i] = log2sum + 2; + cost[i] = missing_symbol_cost; continue; } @@ -122,7 +135,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, size_t last_insert_len) { uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS]; uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS]; - uint32_t histogram_dist[BROTLI_NUM_DISTANCE_SYMBOLS]; + uint32_t histogram_dist[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE]; float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS]; size_t pos = position - last_insert_len; float min_cost_cmd = kInfinity; @@ -136,7 +149,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, for (i = 0; i < num_commands; i++) { size_t inslength = commands[i].insert_len_; size_t copylength = CommandCopyLen(&commands[i]); - size_t distcode = commands[i].dist_prefix_; + size_t distcode = commands[i].dist_prefix_ & 0x3FF; size_t cmdcode = commands[i].cmd_prefix_; size_t j; @@ -150,9 +163,12 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, pos += inslength + copylength; } - SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, cost_literal); - SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, cost_cmd); - SetCost(histogram_dist, BROTLI_NUM_DISTANCE_SYMBOLS, self->cost_dist_); + SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE, + cost_literal); + SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE, + cost_cmd); + SetCost(histogram_dist, BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE, BROTLI_FALSE, + self->cost_dist_); for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) { min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]); @@ -161,11 +177,14 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, { float* literal_costs = self->literal_costs_; + float literal_carry = 0.0; size_t num_bytes = self->num_bytes_; literal_costs[0] = 0.0; for (i = 0; i < num_bytes; ++i) { - literal_costs[i + 1] = literal_costs[i] + + literal_carry += cost_literal[ringbuffer[(position + i) & ringbuffer_mask]]; + literal_costs[i + 1] = literal_costs[i] + literal_carry; + literal_carry -= literal_costs[i + 1] - literal_costs[i]; } } } @@ -175,6 +194,7 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self, const uint8_t* ringbuffer, size_t ringbuffer_mask) { float* literal_costs = self->literal_costs_; + float literal_carry = 0.0; float* cost_dist = self->cost_dist_; float* cost_cmd = self->cost_cmd_; size_t num_bytes = self->num_bytes_; @@ -183,12 +203,14 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self, ringbuffer, &literal_costs[1]); literal_costs[0] = 0.0; for (i = 0; i < num_bytes; ++i) { - literal_costs[i + 1] += literal_costs[i]; + literal_carry += literal_costs[i + 1]; + literal_costs[i + 1] = literal_costs[i] + literal_carry; + literal_carry -= literal_costs[i + 1] - literal_costs[i]; } for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) { cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i); } - for (i = 0; i < BROTLI_NUM_DISTANCE_SYMBOLS; ++i) { + for (i = 0; i < BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE; ++i) { cost_dist[i] = (float)FastLog2(20 + (uint32_t)i); } self->min_cost_cmd_ = (float)FastLog2(11); @@ -221,9 +243,10 @@ static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos, size_t len, size_t len_code, size_t dist, size_t short_code, float cost) { ZopfliNode* next = &nodes[pos + len]; - next->length = (uint32_t)(len | ((len + 9u - len_code) << 24)); - next->distance = (uint32_t)(dist | (short_code << 27)); - next->insert_length = (uint32_t)(pos - start_pos); + next->length = (uint32_t)(len | ((len + 9u - len_code) << 25)); + next->distance = (uint32_t)dist; + next->dcode_insert_length = (uint32_t)( + (short_code << 27) | (pos - start_pos)); next->u.cost = cost; } @@ -303,7 +326,7 @@ static uint32_t ComputeDistanceShortcut(const size_t block_start, const size_t gap, const ZopfliNode* nodes) { const size_t clen = ZopfliNodeCopyLength(&nodes[pos]); - const size_t ilen = nodes[pos].insert_length; + const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF; const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]); /* Since |block_start + pos| is the end position of the command, the copy part starts from |block_start + pos - clen|. Distances that are greater than @@ -335,7 +358,7 @@ static void ComputeDistanceCache(const size_t pos, int idx = 0; size_t p = nodes[pos].u.shortcut; while (idx < 4 && p > 0) { - const size_t ilen = nodes[p].insert_length; + const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF; const size_t clen = ZopfliNodeCopyLength(&nodes[p]); const size_t dist = ZopfliNodeCopyDistance(&nodes[p]); dist_cache[idx++] = (int)dist; @@ -483,9 +506,9 @@ static size_t UpdateNodes( float dist_cost; size_t max_match_len; PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra); - distnumextra = distextra >> 24; + distnumextra = dist_symbol >> 10; dist_cost = base_cost + (float)distnumextra + - ZopfliCostModelGetDistanceCost(model, dist_symbol); + ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF); /* Try all copy lengths up until the maximum copy length corresponding to this distance. If the distance refers to the static dictionary, or @@ -517,7 +540,8 @@ static size_t ComputeShortestPathFromNodes(size_t num_bytes, ZopfliNode* nodes) { size_t index = num_bytes; size_t num_commands = 0; - while (nodes[index].insert_length == 0 && nodes[index].length == 1) --index; + while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 && + nodes[index].length == 1) --index; nodes[index].u.next = BROTLI_UINT32_MAX; while (index != 0) { size_t len = ZopfliNodeCommandLength(&nodes[index]); @@ -546,7 +570,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes, for (i = 0; offset != BROTLI_UINT32_MAX; i++) { const ZopfliNode* next = &nodes[pos + offset]; size_t copy_length = ZopfliNodeCopyLength(next); - size_t insert_length = next->insert_length; + size_t insert_length = next->dcode_insert_length & 0x7FFFFFF; pos += insert_length; offset = next->u.next; if (i == 0) { @@ -624,7 +648,6 @@ static size_t ZopfliIterate(size_t num_bytes, /* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, const size_t max_backward_limit, const int* dist_cache, HasherHandle hasher, @@ -649,9 +672,9 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, const size_t pos = position + i; const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit); size_t skip; - size_t num_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer, - ringbuffer_mask, pos, num_bytes - i, max_distance, gap, params, - &matches[lz_matches_offset]); + size_t num_matches = FindAllMatchesH10(hasher, ¶ms->dictionary, + ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance, gap, + params, &matches[lz_matches_offset]); if (num_matches > 0 && BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) { matches[0] = matches[num_matches - 1]; @@ -683,7 +706,6 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, } void BrotliCreateZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, @@ -693,7 +715,7 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m, nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1); if (BROTLI_IS_OOM(m)) return; BrotliInitZopfliNodes(nodes, num_bytes + 1); - *num_commands += BrotliZopfliComputeShortestPath(m, dictionary, + *num_commands += BrotliZopfliComputeShortestPath(m, num_bytes, position, ringbuffer, ringbuffer_mask, params, max_backward_limit, dist_cache, hasher, nodes); if (BROTLI_IS_OOM(m)) return; @@ -703,7 +725,6 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m, } void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, @@ -736,8 +757,8 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size, cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches); if (BROTLI_IS_OOM(m)) return; - num_found_matches = FindAllMatchesH10(hasher, dictionary, - ringbuffer, ringbuffer_mask, pos, max_length, + num_found_matches = FindAllMatchesH10(hasher, + ¶ms->dictionary, ringbuffer, ringbuffer_mask, pos, max_length, max_distance, gap, params, &matches[cur_match_pos + shadow_matches]); cur_match_end = cur_match_pos + num_found_matches; for (j = cur_match_pos; j + 1 < cur_match_end; ++j) { diff --git a/c/enc/backward_references_hq.h b/c/enc/backward_references_hq.h index cc19544..7c38bd6 100644 --- a/c/enc/backward_references_hq.h +++ b/c/enc/backward_references_hq.h @@ -23,29 +23,26 @@ extern "C" { #endif BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, Command* commands, size_t* num_commands, size_t* num_literals); BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, size_t* last_insert_len, Command* commands, size_t* num_commands, size_t* num_literals); typedef struct ZopfliNode { - /* best length to get up to this byte (not including this byte itself) - highest 8 bit is used to reconstruct the length code */ + /* Best length to get up to this byte (not including this byte itself) + highest 7 bit is used to reconstruct the length code. */ uint32_t length; - /* distance associated with the length; highest 5 bits contain distance - short code + 1 (or zero if no short code); this way only distances shorter - than 128MiB are allowed here */ + /* Distance associated with the length. */ uint32_t distance; - /* number of literal inserts before this copy */ - uint32_t insert_length; + /* Number of literal inserts before this copy; highest 5 bits contain + distance short code + 1 (or zero if no short code). */ + uint32_t dcode_insert_length; /* This union holds information used by dynamic-programming. During forward pass |cost| it used to store the goal function. When node is processed its @@ -78,7 +75,6 @@ BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length); (2) nodes[i].command_length() <= i and (3) nodes[i - nodes[i].command_length()].cost < kInfinity */ BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(MemoryManager* m, - const BrotliDictionary* dictionary, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, const size_t max_backward_limit, const int* dist_cache, HasherHandle hasher, diff --git a/c/enc/backward_references_inc.h b/c/enc/backward_references_inc.h index 0a715b2..967545d 100644 --- a/c/enc/backward_references_inc.h +++ b/c/enc/backward_references_inc.h @@ -8,8 +8,6 @@ /* template parameters: EXPORT_FN, FN */ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)( - const BrotliDictionary* dictionary, - const uint16_t* dictionary_hash, size_t num_bytes, size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask, const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache, @@ -43,9 +41,10 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)( sr.len_code_delta = 0; sr.distance = 0; sr.score = kMinScore; - FN(FindLongestMatch)(hasher, dictionary, dictionary_hash, ringbuffer, - ringbuffer_mask, dist_cache, position, - max_length, max_distance, gap, &sr); + FN(FindLongestMatch)(hasher, ¶ms->dictionary, + ringbuffer, ringbuffer_mask, dist_cache, position, + max_length, max_distance, gap, + params->dist.max_distance, &sr); if (sr.score > kMinScore) { /* Found a match. Let's look for something even better ahead. */ int delayed_backward_references_in_row = 0; @@ -59,9 +58,9 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)( sr2.distance = 0; sr2.score = kMinScore; max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit); - FN(FindLongestMatch)(hasher, dictionary, dictionary_hash, + FN(FindLongestMatch)(hasher, ¶ms->dictionary, ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length, - max_distance, gap, &sr2); + max_distance, gap, params->dist.max_distance, &sr2); if (sr2.score >= sr.score + cost_diff_lazy) { /* Ok, let's just write one byte for now and start a match from the next byte. */ diff --git a/c/enc/bit_cost.h b/c/enc/bit_cost.h index e8b7013..6586469 100644 --- a/c/enc/bit_cost.h +++ b/c/enc/bit_cost.h @@ -18,11 +18,11 @@ extern "C" { #endif -static BROTLI_INLINE double ShannonEntropy(const uint32_t *population, - size_t size, size_t *total) { +static BROTLI_INLINE double ShannonEntropy( + const uint32_t* population, size_t size, size_t* total) { size_t sum = 0; double retval = 0; - const uint32_t *population_end = population + size; + const uint32_t* population_end = population + size; size_t p; if (size & 1) { goto odd_number_of_elements_left; @@ -42,7 +42,7 @@ static BROTLI_INLINE double ShannonEntropy(const uint32_t *population, } static BROTLI_INLINE double BitsEntropy( - const uint32_t *population, size_t size) { + const uint32_t* population, size_t size) { size_t sum; double retval = ShannonEntropy(population, size, &sum); if (retval < sum) { diff --git a/c/enc/block_encoder_inc.h b/c/enc/block_encoder_inc.h index 2a08f90..8cbd5ea 100644 --- a/c/enc/block_encoder_inc.h +++ b/c/enc/block_encoder_inc.h @@ -13,9 +13,9 @@ stream. */ static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self, const HistogramType* histograms, const size_t histograms_size, - HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) { - const size_t alphabet_size = self->alphabet_size_; - const size_t table_size = histograms_size * alphabet_size; + const size_t alphabet_size, HuffmanTree* tree, + size_t* storage_ix, uint8_t* storage) { + const size_t table_size = histograms_size * self->histogram_length_; self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size); self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size); if (BROTLI_IS_OOM(m)) return; @@ -23,9 +23,10 @@ static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self, { size_t i; for (i = 0; i < histograms_size; ++i) { - size_t ix = i * alphabet_size; - BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size, tree, - &self->depths_[ix], &self->bits_[ix], storage_ix, storage); + size_t ix = i * self->histogram_length_; + BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_, + alphabet_size, tree, &self->depths_[ix], &self->bits_[ix], + storage_ix, storage); } } } diff --git a/c/enc/block_splitter.c b/c/enc/block_splitter.c index 6362211..d308eca 100644 --- a/c/enc/block_splitter.c +++ b/c/enc/block_splitter.c @@ -174,7 +174,7 @@ void BrotliSplitBlock(MemoryManager* m, for (i = 0; i < num_commands; ++i) { const Command* cmd = &cmds[i]; if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { - distance_prefixes[j++] = cmd->dist_prefix_; + distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF; } } /* Create the block split on the array of distance prefixes. */ diff --git a/c/enc/block_splitter_inc.h b/c/enc/block_splitter_inc.h index 5712572..023712b 100644 --- a/c/enc/block_splitter_inc.h +++ b/c/enc/block_splitter_inc.h @@ -70,7 +70,7 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length, double* insert_cost, double* cost, uint8_t* switch_signal, - uint8_t *block_id) { + uint8_t* block_id) { const size_t data_size = FN(HistogramDataSize)(); const size_t bitmaplen = (num_histograms + 7) >> 3; size_t num_blocks = 1; diff --git a/c/enc/brotli_bit_stream.c b/c/enc/brotli_bit_stream.c index cd9c594..aaf2dad 100644 --- a/c/enc/brotli_bit_stream.c +++ b/c/enc/brotli_bit_stream.c @@ -13,12 +13,13 @@ #include <string.h> /* memcpy, memset */ #include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include <brotli/types.h> -#include "./context.h" #include "./entropy_encode.h" #include "./entropy_encode_static.h" #include "./fast_log.h" +#include "./histogram.h" #include "./memory.h" #include "./write_bits.h" @@ -27,12 +28,11 @@ extern "C" { #endif #define MAX_HUFFMAN_TREE_SIZE (2 * BROTLI_NUM_COMMAND_SYMBOLS + 1) -/* The size of Huffman dictionary for distances assuming that NPOSTFIX = 0 and - NDIRECT = 0. */ -#define SIMPLE_DISTANCE_ALPHABET_SIZE (BROTLI_NUM_DISTANCE_SHORT_CODES + \ - (2 * BROTLI_MAX_DISTANCE_BITS)) -/* SIMPLE_DISTANCE_ALPHABET_SIZE == 64 */ -#define SIMPLE_DISTANCE_ALPHABET_BITS 6 +/* The maximum size of Huffman dictionary for distances assuming that + NPOSTFIX = 0 and NDIRECT = 0. */ +#define MAX_SIMPLE_DISTANCE_ALPHABET_SIZE \ + BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_LARGE_MAX_DISTANCE_BITS) +/* MAX_SIMPLE_DISTANCE_ALPHABET_SIZE == 140 */ /* Represents the range of values belonging to a prefix code: [offset, offset + 2^nbits) */ @@ -258,7 +258,7 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4], size_t num_symbols, size_t max_bits, - size_t *storage_ix, uint8_t *storage) { + size_t* storage_ix, uint8_t* storage) { /* value of 1 indicates a simple Huffman code */ BrotliWriteBits(2, 1, storage_ix, storage); BrotliWriteBits(2, num_symbols - 1, storage_ix, storage); /* NSYM - 1 */ @@ -297,7 +297,7 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths, depths = symbol depths */ void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree, - size_t *storage_ix, uint8_t *storage) { + size_t* storage_ix, uint8_t* storage) { /* Write the Huffman tree into the brotli-representation. The command alphabet is the largest, so this allocation will fit all alphabets. */ @@ -360,8 +360,9 @@ void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num, /* Builds a Huffman tree from histogram[0:length] into depth[0:length] and bits[0:length] and stores the encoded tree to the bit stream. */ -static void BuildAndStoreHuffmanTree(const uint32_t *histogram, - const size_t length, +static void BuildAndStoreHuffmanTree(const uint32_t* histogram, + const size_t histogram_length, + const size_t alphabet_size, HuffmanTree* tree, uint8_t* depth, uint16_t* bits, @@ -371,7 +372,7 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram, size_t s4[4] = { 0 }; size_t i; size_t max_bits = 0; - for (i = 0; i < length; i++) { + for (i = 0; i < histogram_length; i++) { if (histogram[i]) { if (count < 4) { s4[count] = i; @@ -383,7 +384,7 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram, } { - size_t max_bits_counter = length - 1; + size_t max_bits_counter = alphabet_size - 1; while (max_bits_counter) { max_bits_counter >>= 1; ++max_bits; @@ -398,14 +399,14 @@ static void BuildAndStoreHuffmanTree(const uint32_t *histogram, return; } - memset(depth, 0, length * sizeof(depth[0])); - BrotliCreateHuffmanTree(histogram, length, 15, tree, depth); - BrotliConvertBitDepthsToSymbols(depth, length, bits); + memset(depth, 0, histogram_length * sizeof(depth[0])); + BrotliCreateHuffmanTree(histogram, histogram_length, 15, tree, depth); + BrotliConvertBitDepthsToSymbols(depth, histogram_length, bits); if (count <= 4) { StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage); } else { - BrotliStoreHuffmanTree(depth, length, tree, storage_ix, storage); + BrotliStoreHuffmanTree(depth, histogram_length, tree, storage_ix, storage); } } @@ -729,6 +730,7 @@ static void EncodeContextMap(MemoryManager* m, } } BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix, + num_clusters + max_run_length_prefix, tree, depths, bits, storage_ix, storage); for (i = 0; i < num_rle_symbols; ++i) { const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask; @@ -788,10 +790,11 @@ static void BuildAndStoreBlockSplitCode(const uint8_t* types, } StoreVarLenUint8(num_types - 1, storage_ix, storage); if (num_types > 1) { /* TODO: else? could StoreBlockSwitch occur? */ - BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree, + BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, num_types + 2, tree, &code->type_depths[0], &code->type_bits[0], storage_ix, storage); BuildAndStoreHuffmanTree(&length_histo[0], BROTLI_NUM_BLOCK_LEN_SYMBOLS, + BROTLI_NUM_BLOCK_LEN_SYMBOLS, tree, &code->length_depths[0], &code->length_bits[0], storage_ix, storage); StoreBlockSwitch(code, lengths[0], types[0], 1, storage_ix, storage); @@ -822,8 +825,8 @@ static void StoreTrivialContextMap(size_t num_types, for (i = context_bits; i < alphabet_size; ++i) { histogram[i] = 1; } - BuildAndStoreHuffmanTree(histogram, alphabet_size, tree, - depths, bits, storage_ix, storage); + BuildAndStoreHuffmanTree(histogram, alphabet_size, alphabet_size, + tree, depths, bits, storage_ix, storage); for (i = 0; i < num_types; ++i) { size_t code = (i == 0 ? 0 : i + context_bits - 1); BrotliWriteBits(depths[code], bits[code], storage_ix, storage); @@ -838,7 +841,7 @@ static void StoreTrivialContextMap(size_t num_types, /* Manages the encoding of one block category (literal, command or distance). */ typedef struct BlockEncoder { - size_t alphabet_size_; + size_t histogram_length_; size_t num_block_types_; const uint8_t* block_types_; /* Not owned. */ const uint32_t* block_lengths_; /* Not owned. */ @@ -851,10 +854,10 @@ typedef struct BlockEncoder { uint16_t* bits_; } BlockEncoder; -static void InitBlockEncoder(BlockEncoder* self, size_t alphabet_size, +static void InitBlockEncoder(BlockEncoder* self, size_t histogram_length, size_t num_block_types, const uint8_t* block_types, const uint32_t* block_lengths, const size_t num_blocks) { - self->alphabet_size_ = alphabet_size; + self->histogram_length_ = histogram_length; self->num_block_types_ = num_block_types; self->block_types_ = block_types; self->block_lengths_ = block_lengths; @@ -890,7 +893,7 @@ static void StoreSymbol(BlockEncoder* self, size_t symbol, size_t* storage_ix, uint32_t block_len = self->block_lengths_[block_ix]; uint8_t block_type = self->block_types_[block_ix]; self->block_len_ = block_len; - self->entropy_ix_ = block_type * self->alphabet_size_; + self->entropy_ix_ = block_type * self->histogram_length_; StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0, storage_ix, storage); } @@ -919,7 +922,7 @@ static void StoreSymbolWithContext(BlockEncoder* self, size_t symbol, --self->block_len_; { size_t histo_ix = context_map[self->entropy_ix_ + context]; - size_t ix = histo_ix * self->alphabet_size_ + symbol; + size_t ix = histo_ix * self->histogram_length_ + symbol; BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage); } } @@ -945,42 +948,38 @@ static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) { } void BrotliStoreMetaBlock(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - uint8_t prev_byte, - uint8_t prev_byte2, - BROTLI_BOOL is_last, - uint32_t num_direct_distance_codes, - uint32_t distance_postfix_bits, - ContextType literal_context_mode, - const Command *commands, - size_t n_commands, - const MetaBlockSplit* mb, - size_t *storage_ix, - uint8_t *storage) { + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last, + const BrotliEncoderParams* params, ContextType literal_context_mode, + const Command* commands, size_t n_commands, const MetaBlockSplit* mb, + size_t* storage_ix, uint8_t* storage) { + size_t pos = start_pos; size_t i; - size_t num_distance_codes = - BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_distance_codes + - (48u << distance_postfix_bits); + uint32_t num_distance_symbols = params->dist.alphabet_size; + uint32_t num_effective_distance_symbols = num_distance_symbols; HuffmanTree* tree; + ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); BlockEncoder literal_enc; BlockEncoder command_enc; BlockEncoder distance_enc; + const BrotliDistanceParams* dist = ¶ms->dist; + if (params->large_window && + num_effective_distance_symbols > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) { + num_effective_distance_symbols = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS; + } StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage); tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE); if (BROTLI_IS_OOM(m)) return; - InitBlockEncoder(&literal_enc, 256, mb->literal_split.num_types, - mb->literal_split.types, mb->literal_split.lengths, - mb->literal_split.num_blocks); + InitBlockEncoder(&literal_enc, BROTLI_NUM_LITERAL_SYMBOLS, + mb->literal_split.num_types, mb->literal_split.types, + mb->literal_split.lengths, mb->literal_split.num_blocks); InitBlockEncoder(&command_enc, BROTLI_NUM_COMMAND_SYMBOLS, mb->command_split.num_types, mb->command_split.types, mb->command_split.lengths, mb->command_split.num_blocks); - InitBlockEncoder(&distance_enc, num_distance_codes, + InitBlockEncoder(&distance_enc, num_effective_distance_symbols, mb->distance_split.num_types, mb->distance_split.types, mb->distance_split.lengths, mb->distance_split.num_blocks); @@ -989,9 +988,10 @@ void BrotliStoreMetaBlock(MemoryManager* m, BuildAndStoreBlockSwitchEntropyCodes( &distance_enc, tree, storage_ix, storage); - BrotliWriteBits(2, distance_postfix_bits, storage_ix, storage); - BrotliWriteBits(4, num_direct_distance_codes >> distance_postfix_bits, - storage_ix, storage); + BrotliWriteBits(2, dist->distance_postfix_bits, storage_ix, storage); + BrotliWriteBits( + 4, dist->num_direct_distance_codes >> dist->distance_postfix_bits, + storage_ix, storage); for (i = 0; i < mb->literal_split.num_types; ++i) { BrotliWriteBits(2, literal_context_mode, storage_ix, storage); } @@ -1017,13 +1017,16 @@ void BrotliStoreMetaBlock(MemoryManager* m, } BuildAndStoreEntropyCodesLiteral(m, &literal_enc, mb->literal_histograms, - mb->literal_histograms_size, tree, storage_ix, storage); + mb->literal_histograms_size, BROTLI_NUM_LITERAL_SYMBOLS, tree, + storage_ix, storage); if (BROTLI_IS_OOM(m)) return; BuildAndStoreEntropyCodesCommand(m, &command_enc, mb->command_histograms, - mb->command_histograms_size, tree, storage_ix, storage); + mb->command_histograms_size, BROTLI_NUM_COMMAND_SYMBOLS, tree, + storage_ix, storage); if (BROTLI_IS_OOM(m)) return; BuildAndStoreEntropyCodesDistance(m, &distance_enc, mb->distance_histograms, - mb->distance_histograms_size, tree, storage_ix, storage); + mb->distance_histograms_size, num_distance_symbols, tree, + storage_ix, storage); if (BROTLI_IS_OOM(m)) return; BROTLI_FREE(m, tree); @@ -1041,7 +1044,8 @@ void BrotliStoreMetaBlock(MemoryManager* m, } else { size_t j; for (j = cmd.insert_len_; j != 0; --j) { - size_t context = Context(prev_byte, prev_byte2, literal_context_mode); + size_t context = + BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut); uint8_t literal = input[pos & mask]; StoreSymbolWithContext(&literal_enc, literal, context, mb->literal_context_map, storage_ix, storage, @@ -1056,9 +1060,9 @@ void BrotliStoreMetaBlock(MemoryManager* m, prev_byte2 = input[(pos - 2) & mask]; prev_byte = input[(pos - 1) & mask]; if (cmd.cmd_prefix_ >= 128) { - size_t dist_code = cmd.dist_prefix_; - uint32_t distnumextra = cmd.dist_extra_ >> 24; - uint64_t distextra = cmd.dist_extra_ & 0xffffff; + size_t dist_code = cmd.dist_prefix_ & 0x3FF; + uint32_t distnumextra = cmd.dist_prefix_ >> 10; + uint64_t distextra = cmd.dist_extra_; if (mb->distance_context_map_size == 0) { StoreSymbol(&distance_enc, dist_code, storage_ix, storage); } else { @@ -1082,7 +1086,7 @@ void BrotliStoreMetaBlock(MemoryManager* m, static void BuildHistograms(const uint8_t* input, size_t start_pos, size_t mask, - const Command *commands, + const Command* commands, size_t n_commands, HistogramLiteral* lit_histo, HistogramCommand* cmd_histo, @@ -1099,7 +1103,7 @@ static void BuildHistograms(const uint8_t* input, } pos += CommandCopyLen(&cmd); if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) { - HistogramAddDistance(dist_histo, cmd.dist_prefix_); + HistogramAddDistance(dist_histo, cmd.dist_prefix_ & 0x3FF); } } } @@ -1107,7 +1111,7 @@ static void BuildHistograms(const uint8_t* input, static void StoreDataWithHuffmanCodes(const uint8_t* input, size_t start_pos, size_t mask, - const Command *commands, + const Command* commands, size_t n_commands, const uint8_t* lit_depth, const uint16_t* lit_bits, @@ -1134,9 +1138,9 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input, } pos += CommandCopyLen(&cmd); if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) { - const size_t dist_code = cmd.dist_prefix_; - const uint32_t distnumextra = cmd.dist_extra_ >> 24; - const uint32_t distextra = cmd.dist_extra_ & 0xffffff; + const size_t dist_code = cmd.dist_prefix_ & 0x3FF; + const uint32_t distnumextra = cmd.dist_prefix_ >> 10; + const uint32_t distextra = cmd.dist_extra_; BrotliWriteBits(dist_depth[dist_code], dist_bits[dist_code], storage_ix, storage); BrotliWriteBits(distnumextra, distextra, storage_ix, storage); @@ -1145,15 +1149,10 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input, } void BrotliStoreMetaBlockTrivial(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t *storage_ix, - uint8_t *storage) { + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage) { HistogramLiteral lit_histo; HistogramCommand cmd_histo; HistogramDistance dist_histo; @@ -1161,9 +1160,10 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m, uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS]; uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS]; uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS]; - uint8_t dist_depth[SIMPLE_DISTANCE_ALPHABET_SIZE]; - uint16_t dist_bits[SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; HuffmanTree* tree; + uint32_t num_distance_symbols = params->dist.alphabet_size; StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage); @@ -1178,14 +1178,16 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m, tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE); if (BROTLI_IS_OOM(m)) return; - BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS, tree, + BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS, + BROTLI_NUM_LITERAL_SYMBOLS, tree, lit_depth, lit_bits, storage_ix, storage); - BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS, tree, + BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS, + BROTLI_NUM_COMMAND_SYMBOLS, tree, cmd_depth, cmd_bits, storage_ix, storage); - BuildAndStoreHuffmanTree(dist_histo.data_, SIMPLE_DISTANCE_ALPHABET_SIZE, - tree, + BuildAndStoreHuffmanTree(dist_histo.data_, MAX_SIMPLE_DISTANCE_ALPHABET_SIZE, + num_distance_symbols, tree, dist_depth, dist_bits, storage_ix, storage); BROTLI_FREE(m, tree); @@ -1200,15 +1202,14 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m, } void BrotliStoreMetaBlockFast(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t *storage_ix, - uint8_t *storage) { + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage) { + uint32_t num_distance_symbols = params->dist.alphabet_size; + uint32_t distance_alphabet_bits = + Log2FloorNonZero(num_distance_symbols - 1) + 1; + StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage); BrotliWriteBits(13, 0, storage_ix, storage); @@ -1252,8 +1253,8 @@ void BrotliStoreMetaBlockFast(MemoryManager* m, uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS]; uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS]; uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS]; - uint8_t dist_depth[SIMPLE_DISTANCE_ALPHABET_SIZE]; - uint16_t dist_bits[SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE]; HistogramClearLiteral(&lit_histo); HistogramClearCommand(&cmd_histo); HistogramClearDistance(&dist_histo); @@ -1274,7 +1275,7 @@ void BrotliStoreMetaBlockFast(MemoryManager* m, BrotliBuildAndStoreHuffmanTreeFast(m, dist_histo.data_, dist_histo.total_count_, /* max_bits = */ - SIMPLE_DISTANCE_ALPHABET_BITS, + distance_alphabet_bits, dist_depth, dist_bits, storage_ix, storage); if (BROTLI_IS_OOM(m)) return; @@ -1293,11 +1294,11 @@ void BrotliStoreMetaBlockFast(MemoryManager* m, /* This is for storing uncompressed blocks (simple raw storage of bytes-as-bytes). */ void BrotliStoreUncompressedMetaBlock(BROTLI_BOOL is_final_block, - const uint8_t * BROTLI_RESTRICT input, + const uint8_t* BROTLI_RESTRICT input, size_t position, size_t mask, size_t len, - size_t * BROTLI_RESTRICT storage_ix, - uint8_t * BROTLI_RESTRICT storage) { + size_t* BROTLI_RESTRICT storage_ix, + uint8_t* BROTLI_RESTRICT storage) { size_t masked_pos = position & mask; BrotliStoreUncompressedMetaBlockHeader(len, storage_ix, storage); JumpToByteBoundary(storage_ix, storage); diff --git a/c/enc/brotli_bit_stream.h b/c/enc/brotli_bit_stream.h index 1324b18..9089b1d 100644 --- a/c/enc/brotli_bit_stream.h +++ b/c/enc/brotli_bit_stream.h @@ -16,10 +16,10 @@ #ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_ #define BROTLI_ENC_BROTLI_BIT_STREAM_H_ +#include "../common/context.h" #include "../common/platform.h" #include <brotli/types.h> #include "./command.h" -#include "./context.h" #include "./entropy_encode.h" #include "./memory.h" #include "./metablock.h" @@ -32,7 +32,7 @@ extern "C" { position for the current storage. */ BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num, - HuffmanTree* tree, size_t *storage_ix, uint8_t *storage); + HuffmanTree* tree, size_t* storage_ix, uint8_t* storage); BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast( MemoryManager* m, const uint32_t* histogram, const size_t histogram_total, @@ -42,51 +42,31 @@ BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast( /* REQUIRES: length > 0 */ /* REQUIRES: length <= (1 << 24) */ BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - uint8_t prev_byte, - uint8_t prev_byte2, - BROTLI_BOOL is_final_block, - uint32_t num_direct_distance_codes, - uint32_t distance_postfix_bits, - ContextType literal_context_mode, - const Command* commands, - size_t n_commands, - const MetaBlockSplit* mb, - size_t* storage_ix, - uint8_t* storage); + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last, + const BrotliEncoderParams* params, ContextType literal_context_mode, + const Command* commands, size_t n_commands, const MetaBlockSplit* mb, + size_t* storage_ix, uint8_t* storage); /* Stores the meta-block without doing any block splitting, just collects one histogram per block category and uses that for entropy coding. REQUIRES: length > 0 REQUIRES: length <= (1 << 24) */ BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t* storage_ix, - uint8_t* storage); + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage); /* Same as above, but uses static prefix codes for histograms with a only a few symbols, and uses static code length prefix codes for all other histograms. REQUIRES: length > 0 REQUIRES: length <= (1 << 24) */ BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m, - const uint8_t* input, - size_t start_pos, - size_t length, - size_t mask, - BROTLI_BOOL is_last, - const Command *commands, - size_t n_commands, - size_t* storage_ix, - uint8_t* storage); + const uint8_t* input, size_t start_pos, size_t length, size_t mask, + BROTLI_BOOL is_last, const BrotliEncoderParams* params, + const Command* commands, size_t n_commands, + size_t* storage_ix, uint8_t* storage); /* This is for storing uncompressed blocks (simple raw storage of bytes-as-bytes). diff --git a/c/enc/command.h b/c/enc/command.h index 3bf0cf7..0526815 100644 --- a/c/enc/command.h +++ b/c/enc/command.h @@ -105,10 +105,13 @@ static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) { typedef struct Command { uint32_t insert_len_; - /* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */ + /* Stores copy_len in low 25 bits and copy_code - copy_len in high 7 bit. */ uint32_t copy_len_; + /* Stores distance extra bits. */ uint32_t dist_extra_; uint16_t cmd_prefix_; + /* Stores distance code in low 10 bits + and number of extra bits in high 6 bits. */ uint16_t dist_prefix_; } Command; @@ -118,7 +121,7 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen, /* Don't rely on signed int representation, use honest casts. */ uint32_t delta = (uint8_t)((int8_t)copylen_code_delta); self->insert_len_ = (uint32_t)insertlen; - self->copy_len_ = (uint32_t)(copylen | (delta << 24)); + self->copy_len_ = (uint32_t)(copylen | (delta << 25)); /* The distance prefix and extra bits are stored in this Command as if npostfix and ndirect were 0, they are only recomputed later after the clustering if needed. */ @@ -126,29 +129,29 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen, distance_code, 0, 0, &self->dist_prefix_, &self->dist_extra_); GetLengthCode( insertlen, (size_t)((int)copylen + copylen_code_delta), - TO_BROTLI_BOOL(self->dist_prefix_ == 0), &self->cmd_prefix_); + TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_); } static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) { self->insert_len_ = (uint32_t)insertlen; - self->copy_len_ = 4 << 24; + self->copy_len_ = 4 << 25; self->dist_extra_ = 0; self->dist_prefix_ = BROTLI_NUM_DISTANCE_SHORT_CODES; GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_); } static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(const Command* self) { - if (self->dist_prefix_ < BROTLI_NUM_DISTANCE_SHORT_CODES) { - return self->dist_prefix_; + if ((self->dist_prefix_ & 0x3FF) < BROTLI_NUM_DISTANCE_SHORT_CODES) { + return self->dist_prefix_ & 0x3FF; } else { - uint32_t nbits = self->dist_extra_ >> 24; - uint32_t extra = self->dist_extra_ & 0xffffff; + uint32_t nbits = self->dist_prefix_ >> 10; + uint32_t extra = self->dist_extra_; /* It is assumed that the distance was first encoded with NPOSTFIX = 0 and NDIRECT = 0, so the code itself is of this form: BROTLI_NUM_DISTANCE_SHORT_CODES + 2 * (nbits - 1) + prefix_bit Therefore, the following expression results in (2 + prefix_bit). */ - uint32_t prefix = - self->dist_prefix_ + 4u - BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits; + uint32_t prefix = (self->dist_prefix_ & 0x3FF) + 4u - + BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits; /* Subtract 4 for offset (Chapter 4.) and increase by BROTLI_NUM_DISTANCE_SHORT_CODES - 1 */ return (prefix << nbits) + extra + BROTLI_NUM_DISTANCE_SHORT_CODES - 4u; @@ -165,12 +168,13 @@ static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) { } static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) { - return self->copy_len_ & 0xFFFFFF; + return self->copy_len_ & 0x1FFFFFF; } static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) { - int32_t delta = (int8_t)((uint8_t)(self->copy_len_ >> 24)); - return (uint32_t)((int32_t)(self->copy_len_ & 0xFFFFFF) + delta); + uint32_t modifier = self->copy_len_ >> 25; + int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1))); + return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta); } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/compress_fragment.c b/c/enc/compress_fragment.c index 40dce3e..f75069b 100644 --- a/c/enc/compress_fragment.c +++ b/c/enc/compress_fragment.c @@ -38,7 +38,7 @@ extern "C" { * There is no effort to ensure that it is a prime, the oddity is enough for this use. * The number has been tuned heuristically against compression benchmarks. */ -static const uint32_t kHashMul32 = 0x1e35a7bd; +static const uint32_t kHashMul32 = 0x1E35A7BD; static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) { const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32; @@ -343,7 +343,7 @@ static void BrotliStoreMetaBlockHeader( } static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos, - uint8_t *array) { + uint8_t* array) { while (n_bits > 0) { size_t byte_pos = pos >> 3; size_t n_unchanged_bits = pos & 7; diff --git a/c/enc/compress_fragment_two_pass.c b/c/enc/compress_fragment_two_pass.c index 8259817..b8bd6e8 100644 --- a/c/enc/compress_fragment_two_pass.c +++ b/c/enc/compress_fragment_two_pass.c @@ -37,7 +37,7 @@ extern "C" { * There is no effort to ensure that it is a prime, the oddity is enough for this use. * The number has been tuned heuristically against compression benchmarks. */ -static const uint32_t kHashMul32 = 0x1e35a7bd; +static const uint32_t kHashMul32 = 0x1E35A7BD; static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) { const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 16) * kHashMul32; diff --git a/c/enc/context.h b/c/enc/context.h deleted file mode 100644 index caa4230..0000000 --- a/c/enc/context.h +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright 2013 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -/* Functions to map previous bytes into a context id. */ - -#ifndef BROTLI_ENC_CONTEXT_H_ -#define BROTLI_ENC_CONTEXT_H_ - -#include "../common/platform.h" -#include <brotli/types.h> - -#if defined(__cplusplus) || defined(c_plusplus) -extern "C" { -#endif - -/* Second-order context lookup table for UTF8 byte streams. - - If p1 and p2 are the previous two bytes, we calculate the context as - - context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]. - - If the previous two bytes are ASCII characters (i.e. < 128), this will be - equivalent to - - context = 4 * context1(p1) + context2(p2), - - where context1 is based on the previous byte in the following way: - - 0 : non-ASCII control - 1 : \t, \n, \r - 2 : space - 3 : other punctuation - 4 : " ' - 5 : % - 6 : ( < [ { - 7 : ) > ] } - 8 : , ; : - 9 : . - 10 : = - 11 : number - 12 : upper-case vowel - 13 : upper-case consonant - 14 : lower-case vowel - 15 : lower-case consonant - - and context2 is based on the second last byte: - - 0 : control, space - 1 : punctuation - 2 : upper-case letter, number - 3 : lower-case letter - - If the last byte is ASCII, and the second last byte is not (in a valid UTF8 - stream it will be a continuation byte, value between 128 and 191), the - context is the same as if the second last byte was an ASCII control or space. - - If the last byte is a UTF8 lead byte (value >= 192), then the next byte will - be a continuation byte and the context id is 2 or 3 depending on the LSB of - the last byte and to a lesser extent on the second last byte if it is ASCII. - - If the last byte is a UTF8 continuation byte, the second last byte can be: - - continuation byte: the next byte is probably ASCII or lead byte (assuming - 4-byte UTF8 characters are rare) and the context id is 0 or 1. - - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 - - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 - - The possible value combinations of the previous two bytes, the range of - context ids and the type of the next byte is summarized in the table below: - - |--------\-----------------------------------------------------------------| - | \ Last byte | - | Second \---------------------------------------------------------------| - | last byte \ ASCII | cont. byte | lead byte | - | \ (0-127) | (128-191) | (192-) | - |=============|===================|=====================|==================| - | ASCII | next: ASCII/lead | not valid | next: cont. | - | (0-127) | context: 4 - 63 | | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | - | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: ASCII/lead | not valid | - | (192-207) | | context: 0 - 1 | | - |-------------|-------------------|---------------------|------------------| - | lead byte | not valid | next: cont. | not valid | - | (208-) | | context: 2 - 3 | | - |-------------|-------------------|---------------------|------------------| -*/ -static const uint8_t kUTF8ContextLookup[512] = { - /* Last byte. */ - /* */ - /* ASCII range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, - 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, - 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, - 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, - 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, - 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, - /* UTF8 continuation byte range. */ - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - /* UTF8 lead byte range. */ - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - /* Second last byte. */ - /* */ - /* ASCII range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, - 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, - /* UTF8 continuation byte range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* UTF8 lead byte range. */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -/* Context lookup table for small signed integers. */ -static const uint8_t kSigned3BitContextLookup[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, -}; - -typedef enum ContextType { - CONTEXT_LSB6 = 0, - CONTEXT_MSB6 = 1, - CONTEXT_UTF8 = 2, - CONTEXT_SIGNED = 3 -} ContextType; - -static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) { - switch (mode) { - case CONTEXT_LSB6: - return p1 & 0x3f; - case CONTEXT_MSB6: - return (uint8_t)(p1 >> 2); - case CONTEXT_UTF8: - return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]; - case CONTEXT_SIGNED: - return (uint8_t)((kSigned3BitContextLookup[p1] << 3) + - kSigned3BitContextLookup[p2]); - default: - return 0; - } -} - -#if defined(__cplusplus) || defined(c_plusplus) -} /* extern "C" */ -#endif - -#endif /* BROTLI_ENC_CONTEXT_H_ */ diff --git a/c/enc/encode.c b/c/enc/encode.c index 794a409..4fd28d0 100644 --- a/c/enc/encode.c +++ b/c/enc/encode.c @@ -11,6 +11,8 @@ #include <stdlib.h> /* free, malloc */ #include <string.h> /* memcpy, memset */ +#include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include "../common/version.h" #include "./backward_references.h" @@ -19,7 +21,7 @@ #include "./brotli_bit_stream.h" #include "./compress_fragment.h" #include "./compress_fragment_two_pass.h" -#include "./context.h" +#include "./encoder_dict.h" #include "./entropy_encode.h" #include "./fast_log.h" #include "./hash.h" @@ -69,8 +71,8 @@ typedef struct BrotliEncoderStateStruct { uint64_t last_processed_pos_; int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES]; int saved_dist_cache_[4]; - uint8_t last_byte_; - uint8_t last_byte_bits_; + uint16_t last_bytes_; + uint8_t last_bytes_bits_; uint8_t prev_byte_; uint8_t prev_byte2_; size_t storage_size_; @@ -161,6 +163,10 @@ BROTLI_BOOL BrotliEncoderSetParameter( state->params.size_hint = value; return BROTLI_TRUE; + case BROTLI_PARAM_LARGE_WINDOW: + state->params.large_window = TO_BROTLI_BOOL(!!value); + return BROTLI_TRUE; + default: return BROTLI_FALSE; } } @@ -251,20 +257,25 @@ static int* GetHashTable(BrotliEncoderState* s, int quality, return table; } -static void EncodeWindowBits(int lgwin, uint8_t* last_byte, - uint8_t* last_byte_bits) { - if (lgwin == 16) { - *last_byte = 0; - *last_byte_bits = 1; - } else if (lgwin == 17) { - *last_byte = 1; - *last_byte_bits = 7; - } else if (lgwin > 17) { - *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1); - *last_byte_bits = 4; +static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window, + uint16_t* last_bytes, uint8_t* last_bytes_bits) { + if (large_window) { + *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11); + *last_bytes_bits = 14; } else { - *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1); - *last_byte_bits = 7; + if (lgwin == 16) { + *last_bytes = 0; + *last_bytes_bits = 1; + } else if (lgwin == 17) { + *last_bytes = 1; + *last_bytes_bits = 7; + } else if (lgwin > 17) { + *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01); + *last_bytes_bits = 4; + } else { + *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01); + *last_bytes_bits = 7; + } } } @@ -420,6 +431,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input, double entropy[3]; size_t dummy; size_t i; + ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8); for (; start_pos + 64 <= end_pos; start_pos += 4096) { const size_t stride_end_pos = start_pos + 64; uint8_t prev2 = input[start_pos & mask]; @@ -430,7 +442,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input, for (pos = start_pos + 2; pos < stride_end_pos; ++pos) { const uint8_t literal = input[pos & mask]; const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[ - Context(prev1, prev2, CONTEXT_UTF8)]; + BROTLI_CONTEXT(prev1, prev2, utf8_lut)]; ++total; ++combined_histo[literal >> 3]; ++context_histo[context][literal >> 3]; @@ -519,12 +531,26 @@ static BROTLI_BOOL ShouldCompress( return BROTLI_TRUE; } +/* Chooses the literal context mode for a metablock */ +static ContextType ChooseContextMode(const BrotliEncoderParams* params, + const uint8_t* data, const size_t pos, const size_t mask, + const size_t length) { + /* We only do the computation for the option of something else than + CONTEXT_UTF8 for the highest qualities */ + if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING && + !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) { + return CONTEXT_SIGNED; + } + return CONTEXT_UTF8; +} + static void WriteMetaBlockInternal(MemoryManager* m, const uint8_t* data, const size_t mask, const uint64_t last_flush_pos, const size_t bytes, const BROTLI_BOOL is_last, + ContextType literal_context_mode, const BrotliEncoderParams* params, const uint8_t prev_byte, const uint8_t prev_byte2, @@ -536,10 +562,9 @@ static void WriteMetaBlockInternal(MemoryManager* m, size_t* storage_ix, uint8_t* storage) { const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos); - uint8_t last_byte; - uint8_t last_byte_bits; - uint32_t num_direct_distance_codes = 0; - uint32_t distance_postfix_bits = 0; + uint16_t last_bytes; + uint8_t last_bytes_bits; + ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); if (bytes == 0) { /* Write the ISLAST and ISEMPTY bits. */ @@ -559,31 +584,29 @@ static void WriteMetaBlockInternal(MemoryManager* m, return; } - last_byte = storage[0]; - last_byte_bits = (uint8_t)(*storage_ix & 0xff); - if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES && - params->mode == BROTLI_MODE_FONT) { - num_direct_distance_codes = 12; - distance_postfix_bits = 1; + BROTLI_DCHECK(*storage_ix <= 14); + last_bytes = (uint16_t)((storage[1] << 8) | storage[0]); + last_bytes_bits = (uint8_t)(*storage_ix); + if (params->dist.num_direct_distance_codes != 0 || + params->dist.distance_postfix_bits != 0) { RecomputeDistancePrefixes(commands, num_commands, - num_direct_distance_codes, - distance_postfix_bits); + params->dist.num_direct_distance_codes, + params->dist.distance_postfix_bits); } if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos, - bytes, mask, is_last, + bytes, mask, is_last, params, commands, num_commands, storage_ix, storage); if (BROTLI_IS_OOM(m)) return; } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) { BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos, - bytes, mask, is_last, + bytes, mask, is_last, params, commands, num_commands, storage_ix, storage); if (BROTLI_IS_OOM(m)) return; } else { - ContextType literal_context_mode = CONTEXT_UTF8; MetaBlockSplit mb; InitMetaBlockSplit(&mb); if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) { @@ -596,14 +619,10 @@ static void WriteMetaBlockInternal(MemoryManager* m, &literal_context_map); } BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask, - prev_byte, prev_byte2, literal_context_mode, num_literal_contexts, + prev_byte, prev_byte2, literal_context_lut, num_literal_contexts, literal_context_map, commands, num_commands, &mb); if (BROTLI_IS_OOM(m)) return; } else { - if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes, - kMinUTF8Ratio)) { - literal_context_mode = CONTEXT_SIGNED; - } BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, params, prev_byte, prev_byte2, commands, num_commands, @@ -612,15 +631,18 @@ static void WriteMetaBlockInternal(MemoryManager* m, if (BROTLI_IS_OOM(m)) return; } if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) { - BrotliOptimizeHistograms(num_direct_distance_codes, - distance_postfix_bits, - &mb); + /* The number of distance symbols effectively used by + "Large Window Brotli" (32-bit). */ + uint32_t num_effective_dist_codes = params->dist.alphabet_size; + if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) { + num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS; + } + BrotliOptimizeHistograms(num_effective_dist_codes, &mb); } BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask, prev_byte, prev_byte2, is_last, - num_direct_distance_codes, - distance_postfix_bits, + params, literal_context_mode, commands, num_commands, &mb, @@ -631,20 +653,54 @@ static void WriteMetaBlockInternal(MemoryManager* m, if (bytes + 4 < (*storage_ix >> 3)) { /* Restore the distance cache and last byte. */ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); - storage[0] = last_byte; - *storage_ix = last_byte_bits; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); + *storage_ix = last_bytes_bits; BrotliStoreUncompressedMetaBlock(is_last, data, wrapped_last_flush_pos, mask, bytes, storage_ix, storage); } } +static void ChooseDistanceParams(BrotliEncoderParams* params) { + uint32_t num_direct_distance_codes = 0; + uint32_t distance_postfix_bits = 0; + uint32_t alphabet_size; + size_t max_distance = BROTLI_MAX_DISTANCE; + + if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES && + params->mode == BROTLI_MODE_FONT) { + num_direct_distance_codes = 12; + distance_postfix_bits = 1; + max_distance = (1U << 27) + 4; + } + + alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE( + num_direct_distance_codes, distance_postfix_bits, + BROTLI_MAX_DISTANCE_BITS); + if (params->large_window) { + max_distance = BROTLI_MAX_ALLOWED_DISTANCE; + if (num_direct_distance_codes != 0 || distance_postfix_bits != 0) { + max_distance = (3U << 29) - 4; + } + alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE( + num_direct_distance_codes, distance_postfix_bits, + BROTLI_LARGE_MAX_DISTANCE_BITS); + } + + params->dist.num_direct_distance_codes = num_direct_distance_codes; + params->dist.distance_postfix_bits = distance_postfix_bits; + params->dist.alphabet_size = alphabet_size; + params->dist.max_distance = max_distance; +} + static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE; if (s->is_initialized_) return BROTLI_TRUE; SanitizeParams(&s->params); s->params.lgblock = ComputeLgBlock(&s->params); + ChooseDistanceParams(&s->params); s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; @@ -657,7 +713,8 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { lgwin = BROTLI_MAX(int, lgwin, 18); } - EncodeWindowBits(lgwin, &s->last_byte_, &s->last_byte_bits_); + EncodeWindowBits(lgwin, s->params.large_window, + &s->last_bytes_, &s->last_bytes_bits_); } if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { @@ -671,11 +728,18 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { static void BrotliEncoderInitParams(BrotliEncoderParams* params) { params->mode = BROTLI_DEFAULT_MODE; + params->large_window = BROTLI_FALSE; params->quality = BROTLI_DEFAULT_QUALITY; params->lgwin = BROTLI_DEFAULT_WINDOW; params->lgblock = 0; params->size_hint = 0; params->disable_literal_context_modeling = BROTLI_FALSE; + BrotliInitEncoderDictionary(¶ms->dictionary); + params->dist.num_direct_distance_codes = 0; + params->dist.distance_postfix_bits = 0; + params->dist.alphabet_size = + BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS); + params->dist.max_distance = BROTLI_MAX_DISTANCE; } static void BrotliEncoderInitState(BrotliEncoderState* s) { @@ -837,6 +901,37 @@ static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) { return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos); } +static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes, + uint32_t* wrapped_last_processed_pos) { + Command* last_command = &s->commands_[s->num_commands_ - 1]; + const uint8_t* data = s->ringbuffer_.buffer_; + const uint32_t mask = s->ringbuffer_.mask_; + uint64_t max_backward_distance = (1u << s->params.lgwin) - BROTLI_WINDOW_GAP; + uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF; + uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len; + uint64_t max_distance = last_processed_pos < max_backward_distance ? + last_processed_pos : max_backward_distance; + uint64_t cmd_dist = (uint64_t)s->dist_cache_[0]; + uint32_t distance_code = CommandRestoreDistanceCode(last_command); + if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES || + distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) { + if (cmd_dist <= max_distance) { + while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] == + data[(*wrapped_last_processed_pos - cmd_dist) & mask]) { + last_command->copy_len_++; + (*bytes)--; + (*wrapped_last_processed_pos)++; + } + } + /* The copy length is at most the metablock size, and thus expressible. */ + GetLengthCode(last_command->insert_len_, + (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) + + (int)(last_command->copy_len_ >> 25)), + TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0), + &last_command->cmd_prefix_); + } +} + /* Processes the accumulated input data and sets |*out_size| to the length of the new output meta-block, or to zero if no new output meta-block has been @@ -853,13 +948,12 @@ static BROTLI_BOOL EncodeData( BrotliEncoderState* s, const BROTLI_BOOL is_last, const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) { const uint64_t delta = UnprocessedInputSize(s); - const uint32_t bytes = (uint32_t)delta; - const uint32_t wrapped_last_processed_pos = - WrapPosition(s->last_processed_pos_); + uint32_t bytes = (uint32_t)delta; + uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_); uint8_t* data; uint32_t mask; MemoryManager* m = &s->memory_manager_; - const BrotliDictionary* dictionary = BrotliGetDictionary(); + ContextType literal_context_mode; if (!EnsureInitialized(s)) return BROTLI_FALSE; data = s->ringbuffer_.buffer_; @@ -884,7 +978,7 @@ static BROTLI_BOOL EncodeData( if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { uint8_t* storage; - size_t storage_ix = s->last_byte_bits_; + size_t storage_ix = s->last_bytes_bits_; size_t table_size; int* table; @@ -894,9 +988,10 @@ static BROTLI_BOOL EncodeData( *out_size = 0; return BROTLI_TRUE; } - storage = GetBrotliStorage(s, 2 * bytes + 502); + storage = GetBrotliStorage(s, 2 * bytes + 503); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; - storage[0] = s->last_byte_; + storage[0] = (uint8_t)s->last_bytes_; + storage[1] = (uint8_t)(s->last_bytes_ >> 8); table = GetHashTable(s, s->params.quality, bytes, &table_size); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { @@ -917,8 +1012,8 @@ static BROTLI_BOOL EncodeData( &storage_ix, storage); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } - s->last_byte_ = storage[storage_ix >> 3]; - s->last_byte_bits_ = storage_ix & 7u; + s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); + s->last_bytes_bits_ = storage_ix & 7u; UpdateLastProcessedPos(s); *output = &storage[0]; *out_size = storage_ix >> 3; @@ -946,27 +1041,36 @@ static BROTLI_BOOL EncodeData( InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params, wrapped_last_processed_pos, bytes, is_last); + + literal_context_mode = ChooseContextMode( + &s->params, data, WrapPosition(s->last_flush_pos_), + mask, (size_t)(s->input_pos_ - s->last_flush_pos_)); + if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; + if (s->num_commands_ && s->last_insert_len_ == 0) { + ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos); + } + if (s->params.quality == ZOPFLIFICATION_QUALITY) { BROTLI_DCHECK(s->params.hasher.type == 10); - BrotliCreateZopfliBackwardReferences( - m, dictionary, bytes, wrapped_last_processed_pos, + BrotliCreateZopfliBackwardReferences(m, + bytes, wrapped_last_processed_pos, data, mask, &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) { BROTLI_DCHECK(s->params.hasher.type == 10); - BrotliCreateHqZopfliBackwardReferences( - m, dictionary, bytes, wrapped_last_processed_pos, + BrotliCreateHqZopfliBackwardReferences(m, + bytes, wrapped_last_processed_pos, data, mask, &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } else { BrotliCreateBackwardReferences( - dictionary, bytes, wrapped_last_processed_pos, + bytes, wrapped_last_processed_pos, data, mask, &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); @@ -1018,18 +1122,19 @@ static BROTLI_BOOL EncodeData( { const uint32_t metablock_size = (uint32_t)(s->input_pos_ - s->last_flush_pos_); - uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 502); - size_t storage_ix = s->last_byte_bits_; + uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503); + size_t storage_ix = s->last_bytes_bits_; if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; - storage[0] = s->last_byte_; + storage[0] = (uint8_t)s->last_bytes_; + storage[1] = (uint8_t)(s->last_bytes_ >> 8); WriteMetaBlockInternal( m, data, mask, s->last_flush_pos_, metablock_size, is_last, - &s->params, s->prev_byte_, s->prev_byte2_, + literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_, s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_, s->dist_cache_, &storage_ix, storage); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; - s->last_byte_ = storage[storage_ix >> 3]; - s->last_byte_bits_ = storage_ix & 7u; + s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); + s->last_bytes_bits_ = storage_ix & 7u; s->last_flush_pos_ = s->input_pos_; if (UpdateLastProcessedPos(s)) { HasherReset(s->hasher_); @@ -1058,10 +1163,11 @@ static BROTLI_BOOL EncodeData( static size_t WriteMetadataHeader( BrotliEncoderState* s, const size_t block_size, uint8_t* header) { size_t storage_ix; - storage_ix = s->last_byte_bits_; - header[0] = s->last_byte_; - s->last_byte_ = 0; - s->last_byte_bits_ = 0; + storage_ix = s->last_bytes_bits_; + header[0] = (uint8_t)s->last_bytes_; + header[1] = (uint8_t)(s->last_bytes_ >> 8); + s->last_bytes_ = 0; + s->last_bytes_bits_ = 0; BrotliWriteBits(1, 0, &storage_ix, header); BrotliWriteBits(2, 3, &storage_ix, header); @@ -1091,15 +1197,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( BROTLI_BOOL ok = BROTLI_TRUE; const size_t max_out_size = *encoded_size; size_t total_out_size = 0; - uint8_t last_byte; - uint8_t last_byte_bits; + uint16_t last_bytes; + uint8_t last_bytes_bits; HasherHandle hasher = NULL; const size_t hasher_eff_size = BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP); BrotliEncoderParams params; - const BrotliDictionary* dictionary = BrotliGetDictionary(); const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1); size_t max_block_size; @@ -1113,14 +1218,18 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( BrotliEncoderInitParams(¶ms); params.quality = 10; params.lgwin = lgwin; + if (lgwin > BROTLI_MAX_WINDOW_BITS) { + params.large_window = BROTLI_TRUE; + } SanitizeParams(¶ms); params.lgblock = ComputeLgBlock(¶ms); + ChooseDistanceParams(¶ms); max_block_size = (size_t)1 << params.lgblock; BrotliInitMemoryManager(m, 0, 0, 0); BROTLI_DCHECK(input_size <= mask + 1); - EncodeWindowBits(lgwin, &last_byte, &last_byte_bits); + EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits); InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, ¶ms, 0, hasher_eff_size, BROTLI_TRUE); if (BROTLI_IS_OOM(m)) goto oom; @@ -1140,6 +1249,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( uint8_t* storage; size_t storage_ix; + ContextType literal_context_mode = ChooseContextMode(¶ms, + input_buffer, metablock_start, mask, metablock_end - metablock_start); + size_t block_start; for (block_start = metablock_start; block_start < metablock_end; ) { size_t block_size = @@ -1151,10 +1263,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( BrotliInitZopfliNodes(nodes, block_size + 1); StitchToPreviousBlockH10(hasher, block_size, block_start, input_buffer, mask); - path_size = BrotliZopfliComputeShortestPath( - m, dictionary, block_size, block_start, - input_buffer, mask, ¶ms, max_backward_limit, dist_cache, hasher, - nodes); + path_size = BrotliZopfliComputeShortestPath(m, + block_size, block_start, input_buffer, mask, ¶ms, + max_backward_limit, dist_cache, hasher, nodes); if (BROTLI_IS_OOM(m)) goto oom; /* We allocate a command buffer in the first iteration of this loop that will be likely big enough for the whole metablock, so that for most @@ -1197,13 +1308,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size); storage = NULL; - storage_ix = last_byte_bits; + storage_ix = last_bytes_bits; if (metablock_size == 0) { /* Write the ISLAST and ISEMPTY bits. */ storage = BROTLI_ALLOC(m, uint8_t, 16); if (BROTLI_IS_OOM(m)) goto oom; - storage[0] = last_byte; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); BrotliWriteBits(2, 3, &storage_ix, storage); storage_ix = (storage_ix + 7u) & ~7u; } else if (!ShouldCompress(input_buffer, mask, metablock_start, @@ -1213,37 +1325,35 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16); if (BROTLI_IS_OOM(m)) goto oom; - storage[0] = last_byte; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); BrotliStoreUncompressedMetaBlock(is_last, input_buffer, metablock_start, mask, metablock_size, &storage_ix, storage); } else { - uint32_t num_direct_distance_codes = 0; - uint32_t distance_postfix_bits = 0; - ContextType literal_context_mode = CONTEXT_UTF8; MetaBlockSplit mb; - InitMetaBlockSplit(&mb); - if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask, - metablock_size, kMinUTF8Ratio)) { - literal_context_mode = CONTEXT_SIGNED; + /* The number of distance symbols effectively used by + "Large Window Brotli" (32-bit). */ + uint32_t num_effective_dist_codes = params.dist.alphabet_size; + if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) { + num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS; } + InitMetaBlockSplit(&mb); BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, ¶ms, prev_byte, prev_byte2, commands, num_commands, literal_context_mode, &mb); if (BROTLI_IS_OOM(m)) goto oom; - BrotliOptimizeHistograms(num_direct_distance_codes, - distance_postfix_bits, - &mb); - storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 502); + BrotliOptimizeHistograms(num_effective_dist_codes, &mb); + storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503); if (BROTLI_IS_OOM(m)) goto oom; - storage[0] = last_byte; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size, mask, prev_byte, prev_byte2, is_last, - num_direct_distance_codes, - distance_postfix_bits, + ¶ms, literal_context_mode, commands, num_commands, &mb, @@ -1252,16 +1362,17 @@ static BROTLI_BOOL BrotliCompressBufferQuality10( if (metablock_size + 4 < (storage_ix >> 3)) { /* Restore the distance cache and last byte. */ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); - storage[0] = last_byte; - storage_ix = last_byte_bits; + storage[0] = (uint8_t)last_bytes; + storage[1] = (uint8_t)(last_bytes >> 8); + storage_ix = last_bytes_bits; BrotliStoreUncompressedMetaBlock(is_last, input_buffer, metablock_start, mask, metablock_size, &storage_ix, storage); } DestroyMetaBlockSplit(m, &mb); } - last_byte = storage[storage_ix >> 3]; - last_byte_bits = storage_ix & 7u; + last_bytes = (uint16_t)(storage[storage_ix >> 3]); + last_bytes_bits = storage_ix & 7u; metablock_start += metablock_size; if (metablock_start < input_size) { prev_byte = input_buffer[metablock_start - 1]; @@ -1296,8 +1407,8 @@ oom: size_t BrotliEncoderMaxCompressedSize(size_t input_size) { /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */ - size_t num_small_blocks = input_size >> 14; - size_t overhead = 2 + (4 * num_small_blocks) + 3 + 1; + size_t num_large_blocks = input_size >> 14; + size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1; size_t result = input_size + overhead; if (input_size == 0) return 2; return (result < input_size) ? 0 : result; @@ -1360,7 +1471,7 @@ BROTLI_BOOL BrotliEncoderCompress( } if (quality == 10) { /* TODO: Implement this direct path for all quality levels. */ - const int lg_win = BROTLI_MIN(int, BROTLI_MAX_WINDOW_BITS, + const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS, BROTLI_MAX(int, 16, lgwin)); int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer, encoded_size, encoded_buffer); @@ -1384,6 +1495,9 @@ BROTLI_BOOL BrotliEncoderCompress( BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin); BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode); BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size); + if (lgwin > BROTLI_MAX_WINDOW_BITS) { + BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE); + } result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH, &available_in, &next_in, &available_out, &next_out, &total_out); if (!BrotliEncoderIsFinished(s)) result = 0; @@ -1406,11 +1520,11 @@ fallback: } static void InjectBytePaddingBlock(BrotliEncoderState* s) { - uint32_t seal = s->last_byte_; - size_t seal_bits = s->last_byte_bits_; + uint32_t seal = s->last_bytes_; + size_t seal_bits = s->last_bytes_bits_; uint8_t* destination; - s->last_byte_ = 0; - s->last_byte_bits_ = 0; + s->last_bytes_ = 0; + s->last_bytes_bits_ = 0; /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */ seal |= 0x6u << seal_bits; seal_bits += 6; @@ -1424,6 +1538,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) { } destination[0] = (uint8_t)seal; if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8); + if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16); s->available_out_ += (seal_bits + 7) >> 3; } @@ -1432,7 +1547,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) { static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, size_t* available_out, uint8_t** next_out, size_t* total_out) { if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && - s->last_byte_bits_ != 0) { + s->last_bytes_bits_ != 0) { InjectBytePaddingBlock(s); return BROTLI_TRUE; } @@ -1513,10 +1628,10 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH); BROTLI_BOOL force_flush = (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH); - size_t max_out_size = 2 * block_size + 502; + size_t max_out_size = 2 * block_size + 503; BROTLI_BOOL inplace = BROTLI_TRUE; uint8_t* storage = NULL; - size_t storage_ix = s->last_byte_bits_; + size_t storage_ix = s->last_bytes_bits_; size_t table_size; int* table; @@ -1531,7 +1646,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( storage = GetBrotliStorage(s, max_out_size); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; } - storage[0] = s->last_byte_; + storage[0] = (uint8_t)s->last_bytes_; + storage[1] = (uint8_t)(s->last_bytes_ >> 8); table = GetHashTable(s, s->params.quality, block_size, &table_size); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; @@ -1561,8 +1677,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( s->next_out_ = storage; s->available_out_ = out_bytes; } - s->last_byte_ = storage[storage_ix >> 3]; - s->last_byte_bits_ = storage_ix & 7u; + s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]); + s->last_bytes_bits_ = storage_ix & 7u; if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; diff --git a/c/enc/encoder_dict.c b/c/enc/encoder_dict.c new file mode 100755 index 0000000..8b2f6ad --- /dev/null +++ b/c/enc/encoder_dict.c @@ -0,0 +1,32 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#include "./encoder_dict.h" + +#include "../common/dictionary.h" +#include "../common/transform.h" +#include "./dictionary_hash.h" +#include "./hash.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) { + dict->words = BrotliGetDictionary(); + + dict->hash_table = kStaticDictionaryHash; + dict->buckets = kStaticDictionaryBuckets; + dict->dict_words = kStaticDictionaryWords; + + dict->cutoffTransformsCount = kCutoffTransformsCount; + dict->cutoffTransforms = kCutoffTransforms; + +} + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif diff --git a/c/enc/encoder_dict.h b/c/enc/encoder_dict.h new file mode 100755 index 0000000..9ac8b4a --- /dev/null +++ b/c/enc/encoder_dict.h @@ -0,0 +1,42 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#ifndef BROTLI_ENC_ENCODER_DICT_H_ +#define BROTLI_ENC_ENCODER_DICT_H_ + +#include "../common/dictionary.h" +#include "../common/platform.h" +#include <brotli/types.h> +#include "./static_dict_lut.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/* Dictionary data (words and transforms) for 1 possible context */ +typedef struct BrotliEncoderDictionary { + const BrotliDictionary* words; + + /* cut off for fast encoder */ + uint32_t cutoffTransformsCount; + uint64_t cutoffTransforms; + + /* from dictionary_hash.h, for fast encoder */ + const uint16_t* hash_table; + + /* from static_dict_lut.h, for slow encoder */ + const uint16_t* buckets; + const DictWord* dict_words; +} BrotliEncoderDictionary; + +BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict); + + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif + +#endif /* BROTLI_ENC_ENCODER_DICT_H_ */ diff --git a/c/enc/entropy_encode.c b/c/enc/entropy_encode.c index 9e0ea11..97f9dfb 100644 --- a/c/enc/entropy_encode.c +++ b/c/enc/entropy_encode.c @@ -66,11 +66,11 @@ static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree( we are not planning to use this with extremely long blocks. See http://en.wikipedia.org/wiki/Huffman_coding */ -void BrotliCreateHuffmanTree(const uint32_t *data, +void BrotliCreateHuffmanTree(const uint32_t* data, const size_t length, const int tree_limit, HuffmanTree* tree, - uint8_t *depth) { + uint8_t* depth) { uint32_t count_limit; HuffmanTree sentinel; InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1); @@ -371,8 +371,8 @@ void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, } static void DecideOverRleUse(const uint8_t* depth, const size_t length, - BROTLI_BOOL *use_rle_for_non_zero, - BROTLI_BOOL *use_rle_for_zero) { + BROTLI_BOOL* use_rle_for_non_zero, + BROTLI_BOOL* use_rle_for_zero) { size_t total_reps_zero = 0; size_t total_reps_non_zero = 0; size_t count_reps_zero = 1; @@ -454,26 +454,26 @@ void BrotliWriteHuffmanTree(const uint8_t* depth, static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) { static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */ - 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, - 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf + 0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E, + 0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F }; - size_t retval = kLut[bits & 0xf]; + size_t retval = kLut[bits & 0x0F]; size_t i; for (i = 4; i < num_bits; i += 4) { retval <<= 4; bits = (uint16_t)(bits >> 4); - retval |= kLut[bits & 0xf]; + retval |= kLut[bits & 0x0F]; } - retval >>= ((0 - num_bits) & 0x3); + retval >>= ((0 - num_bits) & 0x03); return (uint16_t)retval; } /* 0..15 are values for bits */ #define MAX_HUFFMAN_BITS 16 -void BrotliConvertBitDepthsToSymbols(const uint8_t *depth, +void BrotliConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, - uint16_t *bits) { + uint16_t* bits) { /* In Brotli, all bit depths are [1..15] 0 bit depth means that the symbol does not exist. */ uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 }; diff --git a/c/enc/entropy_encode.h b/c/enc/entropy_encode.h index ef7c216..f23d9c3 100644 --- a/c/enc/entropy_encode.h +++ b/c/enc/entropy_encode.h @@ -46,11 +46,11 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth( be at least 2 * length + 1 long. See http://en.wikipedia.org/wiki/Huffman_coding */ -BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t *data, +BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data, const size_t length, const int tree_limit, HuffmanTree* tree, - uint8_t *depth); + uint8_t* depth); /* Change the population counts in a way that the consequent Huffman tree compression, especially its RLE-part will be more @@ -72,9 +72,9 @@ BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth, uint8_t* extra_bits_data); /* Get the actual bit values for a tree of bit depths. */ -BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t *depth, +BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, - uint16_t *bits); + uint16_t* bits); /* Input size optimized Shell sort. */ typedef BROTLI_BOOL (*HuffmanTreeComparator)( diff --git a/c/enc/entropy_encode_static.h b/c/enc/entropy_encode_static.h index b2c1fbb..62b99a9 100644 --- a/c/enc/entropy_encode_static.h +++ b/c/enc/entropy_encode_static.h @@ -83,7 +83,7 @@ static const uint32_t kCodeLengthBits[18] = { static BROTLI_INLINE void StoreStaticCodeLengthCode( size_t* storage_ix, uint8_t* storage) { BrotliWriteBits( - 40, BROTLI_MAKE_UINT64_T(0x0000ffU, 0x55555554U), storage_ix, storage); + 40, BROTLI_MAKE_UINT64_T(0x0000FFu, 0x55555554u), storage_ix, storage); } static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = { @@ -529,7 +529,7 @@ static const uint16_t kStaticDistanceCodeBits[64] = { static BROTLI_INLINE void StoreStaticDistanceHuffmanTree( size_t* storage_ix, uint8_t* storage) { - BrotliWriteBits(28, 0x0369dc03U, storage_ix, storage); + BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage); } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/hash.h b/c/enc/hash.h index 2a1634d..1827ce6 100644 --- a/c/enc/hash.h +++ b/c/enc/hash.h @@ -16,6 +16,7 @@ #include "../common/dictionary.h" #include "../common/platform.h" #include <brotli/types.h> +#include "./encoder_dict.h" #include "./fast_log.h" #include "./find_match_length.h" #include "./memory.h" @@ -73,10 +74,10 @@ typedef struct HasherSearchResult { * There is no effort to ensure that it is a prime, the oddity is enough for this use. * The number has been tuned heuristically against compression benchmarks. */ -static const uint32_t kHashMul32 = 0x1e35a7bd; -static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1e35a7bd, 0x1e35a7bd); +static const uint32_t kHashMul32 = 0x1E35A7BD; +static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD); static const uint64_t kHashMul64Long = - BROTLI_MAKE_UINT64_T(0x1fe35a7bU, 0xd3579bd3U); + BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u); static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; @@ -146,8 +147,9 @@ static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance( } static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( - const BrotliDictionary* dictionary, size_t item, const uint8_t* data, - size_t max_length, size_t max_backward, HasherSearchResult* out) { + const BrotliEncoderDictionary* dictionary, size_t item, const uint8_t* data, + size_t max_length, size_t max_backward, size_t max_distance, + HasherSearchResult* out) { size_t len; size_t dist; size_t offset; @@ -156,24 +158,24 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( score_t score; len = item & 0x1F; dist = item >> 5; - offset = dictionary->offsets_by_length[len] + len * dist; + offset = dictionary->words->offsets_by_length[len] + len * dist; if (len > max_length) { return BROTLI_FALSE; } matchlen = - FindMatchLengthWithLimit(data, &dictionary->data[offset], len); - if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) { + FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len); + if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) { return BROTLI_FALSE; } { size_t cut = len - matchlen; - size_t transform_id = - (cut << 2) + (size_t)((kCutoffTransforms >> (cut * 6)) & 0x3F); + size_t transform_id = (cut << 2) + + (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); backward = max_backward + dist + 1 + - (transform_id << dictionary->size_bits_by_length[len]); + (transform_id << dictionary->words->size_bits_by_length[len]); } - if (backward >= BROTLI_MAX_DISTANCE) { + if (backward > max_distance) { return BROTLI_FALSE; } score = BackwardReferenceScore(matchlen, backward); @@ -188,9 +190,10 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( } static BROTLI_INLINE void SearchInStaticDictionary( - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, HasherHandle handle, const uint8_t* data, size_t max_length, - size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) { + size_t max_backward, size_t max_distance, + HasherSearchResult* out, BROTLI_BOOL shallow) { size_t key; size_t i; HasherCommon* self = GetHasherCommon(handle); @@ -199,11 +202,11 @@ static BROTLI_INLINE void SearchInStaticDictionary( } key = Hash14(data) << 1; for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) { - size_t item = dictionary_hash[key]; + size_t item = dictionary->hash_table[key]; self->dict_num_lookups++; if (item != 0) { BROTLI_BOOL item_matches = TestStaticDictionaryItem( - dictionary, item, data, max_length, max_backward, out); + dictionary, item, data, max_length, max_backward, max_distance, out); if (item_matches) { self->dict_num_matches++; } diff --git a/c/enc/hash_forgetful_chain_inc.h b/c/enc/hash_forgetful_chain_inc.h index 46d363c..41cb3ff 100644 --- a/c/enc/hash_forgetful_chain_inc.h +++ b/c/enc/hash_forgetful_chain_inc.h @@ -28,7 +28,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } /* HashBytes is the function that chooses the bucket to place the address in.*/ -static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t *data) { +static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* data) { const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -115,7 +115,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -154,11 +154,12 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, - const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) { + const size_t gap, const size_t max_distance, + HasherSearchResult* BROTLI_RESTRICT out) { HashForgetfulChain* self = FN(Self)(handle); const size_t cur_ix_masked = cur_ix & ring_buffer_mask; /* Don't accept a short copy from far away. */ @@ -240,9 +241,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, FN(Store)(handle, data, ring_buffer_mask, cur_ix); } if (out->score == min_score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_FALSE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_FALSE); } } diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h index 6b0697b..e099edf 100644 --- a/c/enc/hash_longest_match64_inc.h +++ b/c/enc/hash_longest_match64_inc.h @@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; } static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; } /* HashBytes is the function that chooses the bucket to place the address in. */ -static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t *data, +static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* data, const uint64_t mask, const int shift) { const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long; @@ -105,7 +105,7 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and store the value of ix at that position. */ -static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, +static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data, const size_t mask, const size_t ix) { HashLongestMatch* self = FN(Self)(handle); uint16_t* num = FN(Num)(self); @@ -119,7 +119,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -158,11 +158,11 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, const size_t gap, - HasherSearchResult* BROTLI_RESTRICT out) { + const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { HasherCommon* common = GetHasherCommon(handle); HashLongestMatch* self = FN(Self)(handle); uint16_t* num = FN(Num)(self); @@ -257,9 +257,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, ++num[key]; } if (min_score == out->score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_FALSE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_FALSE); } } diff --git a/c/enc/hash_longest_match_inc.h b/c/enc/hash_longest_match_inc.h index d24576d..951d7a4 100644 --- a/c/enc/hash_longest_match_inc.h +++ b/c/enc/hash_longest_match_inc.h @@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } /* HashBytes is the function that chooses the bucket to place the address in. */ -static uint32_t FN(HashBytes)(const uint8_t *data, const int shift) { +static uint32_t FN(HashBytes)(const uint8_t* data, const int shift) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -112,7 +112,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -151,11 +151,11 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, + const BrotliEncoderDictionary* dictionary, const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, const size_t gap, - HasherSearchResult* BROTLI_RESTRICT out) { + const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { HasherCommon* common = GetHasherCommon(handle); HashLongestMatch* self = FN(Self)(handle); uint16_t* num = FN(Num)(self); @@ -249,9 +249,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, ++num[key]; } if (min_score == out->score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_FALSE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_FALSE); } } diff --git a/c/enc/hash_longest_match_quickly_inc.h b/c/enc/hash_longest_match_quickly_inc.h index 2c78351..a7b9639 100644 --- a/c/enc/hash_longest_match_quickly_inc.h +++ b/c/enc/hash_longest_match_quickly_inc.h @@ -81,7 +81,7 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( Compute a hash from these, and store the value somewhere within [ix .. ix+3]. */ static BROTLI_INLINE void FN(Store)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix) { + const uint8_t* data, const size_t mask, const size_t ix) { const uint32_t key = FN(HashBytes)(&data[ix & mask]); /* Wiggle the value with the bucket sweep range. */ const uint32_t off = (ix >> 3) % BUCKET_SWEEP; @@ -89,7 +89,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i; for (i = ix_start; i < ix_end; ++i) { @@ -125,11 +125,12 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)( Writes the best match into |out|. |out|->score is updated only if a better match is found. */ static BROTLI_INLINE void FN(FindLongestMatch)( - HasherHandle handle, const BrotliDictionary* dictionary, - const uint16_t* dictionary_hash, const uint8_t* BROTLI_RESTRICT data, + HasherHandle handle, const BrotliEncoderDictionary* dictionary, + const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t max_backward, - const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) { + const size_t gap, const size_t max_distance, + HasherSearchResult* BROTLI_RESTRICT out) { HashLongestMatchQuickly* self = FN(Self)(handle); const size_t best_len_in = out->len; const size_t cur_ix_masked = cur_ix & ring_buffer_mask; @@ -191,7 +192,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)( } } } else { - uint32_t *bucket = self->buckets_ + key; + uint32_t* bucket = self->buckets_ + key; int i; prev_ix = *bucket++; for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) { @@ -221,9 +222,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)( } } if (USE_DICTIONARY && min_score == out->score) { - SearchInStaticDictionary(dictionary, dictionary_hash, - handle, &data[cur_ix_masked], max_length, max_backward + gap, out, - BROTLI_TRUE); + SearchInStaticDictionary(dictionary, + handle, &data[cur_ix_masked], max_length, max_backward + gap, + max_distance, out, BROTLI_TRUE); } self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix; } diff --git a/c/enc/hash_to_binary_tree_inc.h b/c/enc/hash_to_binary_tree_inc.h index 73774b2..48097b1 100644 --- a/c/enc/hash_to_binary_tree_inc.h +++ b/c/enc/hash_to_binary_tree_inc.h @@ -24,7 +24,7 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; } -static uint32_t FN(HashBytes)(const uint8_t *data) { +static uint32_t FN(HashBytes)(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -200,7 +200,7 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)( sorted by strictly increasing length and (non-strictly) increasing distance. */ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle, - const BrotliDictionary* dictionary, const uint8_t* data, + const BrotliEncoderDictionary* dictionary, const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix, const size_t max_length, const size_t max_backward, const size_t gap, const BrotliEncoderParams* params, BackwardMatch* matches) { @@ -252,7 +252,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle, uint32_t dict_id = dict_matches[l]; if (dict_id < kInvalidMatch) { size_t distance = max_backward + gap + (dict_id >> 5) + 1; - if (distance < BROTLI_MAX_DISTANCE) { + if (distance <= params->dist.max_distance) { InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31); } } @@ -265,7 +265,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle, /* Stores the hash of the next 4 bytes and re-roots the binary tree at the current sequence, without returning any matches. REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */ -static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, +static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data, const size_t mask, const size_t ix) { HashToBinaryTree* self = FN(Self)(handle); /* Maximum distance is window size - 16, see section 9.1. of the spec. */ @@ -275,7 +275,7 @@ static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, } static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, - const uint8_t *data, const size_t mask, const size_t ix_start, + const uint8_t* data, const size_t mask, const size_t ix_start, const size_t ix_end) { size_t i = ix_start; size_t j = ix_start; diff --git a/c/enc/histogram.c b/c/enc/histogram.c index bb7b4c5..6da2ff6 100644 --- a/c/enc/histogram.c +++ b/c/enc/histogram.c @@ -8,9 +8,9 @@ #include "./histogram.h" +#include "../common/context.h" #include "./block_splitter.h" #include "./command.h" -#include "./context.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -63,13 +63,16 @@ void BrotliBuildHistogramsWithContext( BlockSplitIteratorNext(&insert_and_copy_it); HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_], cmd->cmd_prefix_); + /* TODO: unwrap iterator blocks. */ for (j = cmd->insert_len_; j != 0; --j) { size_t context; BlockSplitIteratorNext(&literal_it); - context = context_modes ? - ((literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) + - Context(prev_byte, prev_byte2, context_modes[literal_it.type_])) : - literal_it.type_; + context = literal_it.type_; + if (context_modes) { + ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]); + context = (context << BROTLI_LITERAL_CONTEXT_BITS) + + BROTLI_CONTEXT(prev_byte, prev_byte2, lut); + } HistogramAddLiteral(&literal_histograms[context], ringbuffer[pos & mask]); prev_byte2 = prev_byte; @@ -86,7 +89,7 @@ void BrotliBuildHistogramsWithContext( context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) + CommandDistanceContext(cmd); HistogramAddDistance(©_dist_histograms[context], - cmd->dist_prefix_); + cmd->dist_prefix_ & 0x3FF); } } } diff --git a/c/enc/histogram.h b/c/enc/histogram.h index b1b8d11..42af3c3 100644 --- a/c/enc/histogram.h +++ b/c/enc/histogram.h @@ -12,16 +12,19 @@ #include <string.h> /* memset */ #include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include <brotli/types.h> #include "./block_splitter.h" #include "./command.h" -#include "./context.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif +/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */ +#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544 + #define FN(X) X ## Literal #define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS #define DataType uint8_t @@ -38,7 +41,7 @@ extern "C" { #undef FN #define FN(X) X ## Distance -#define DATA_SIZE BROTLI_NUM_DISTANCE_SYMBOLS +#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS #include "./histogram_inc.h" /* NOLINT(build/include) */ #undef DataType #undef DATA_SIZE diff --git a/c/enc/histogram_inc.h b/c/enc/histogram_inc.h index 7807036..50eaf74 100644 --- a/c/enc/histogram_inc.h +++ b/c/enc/histogram_inc.h @@ -33,7 +33,7 @@ static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) { } static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self, - const DataType *p, size_t n) { + const DataType* p, size_t n) { self->total_count_ += n; n += 1; while (--n) ++self->data_[*p++]; diff --git a/c/enc/literal_cost.c b/c/enc/literal_cost.c index 9bcb680..c231100 100644 --- a/c/enc/literal_cost.c +++ b/c/enc/literal_cost.c @@ -25,7 +25,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) { return BROTLI_MIN(size_t, 1, clamp); } else { /* Let's decide over the last byte if this ends the sequence. */ - if (last < 0xe0) { + if (last < 0xE0) { return 0; /* Completed two or three byte coding. */ } else { /* Next one is the 'Byte 3' of utf-8 encoding. */ return BROTLI_MIN(size_t, 2, clamp); @@ -34,7 +34,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) { } static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask, - const uint8_t *data) { + const uint8_t* data) { size_t counts[3] = { 0 }; size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */ size_t last_c = 0; @@ -54,7 +54,7 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask, } static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, - const uint8_t *data, float *cost) { + const uint8_t* data, float* cost) { /* max_utf8 is 0 (normal ASCII single byte modeling), 1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */ const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data); @@ -125,7 +125,7 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, } void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask, - const uint8_t *data, float *cost) { + const uint8_t* data, float* cost) { if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) { EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost); return; diff --git a/c/enc/literal_cost.h b/c/enc/literal_cost.h index d2f430c..8f53f39 100644 --- a/c/enc/literal_cost.h +++ b/c/enc/literal_cost.h @@ -21,7 +21,7 @@ extern "C" { ring-buffer (data, mask) will take entropy coded and writes these estimates to the cost[0..len) array. */ BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals( - size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost); + size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost); #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ diff --git a/c/enc/memory.c b/c/enc/memory.c index 3716b98..f6ed7e3 100644 --- a/c/enc/memory.c +++ b/c/enc/memory.c @@ -27,22 +27,12 @@ extern "C" { #define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED #define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED) -static void* DefaultAllocFunc(void* opaque, size_t size) { - BROTLI_UNUSED(opaque); - return malloc(size); -} - -static void DefaultFreeFunc(void* opaque, void* address) { - BROTLI_UNUSED(opaque); - free(address); -} - void BrotliInitMemoryManager( MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { if (!alloc_func) { - m->alloc_func = DefaultAllocFunc; - m->free_func = DefaultFreeFunc; + m->alloc_func = BrotliDefaultAllocFunc; + m->free_func = BrotliDefaultFreeFunc; m->opaque = 0; } else { m->alloc_func = alloc_func; diff --git a/c/enc/metablock.c b/c/enc/metablock.c index 50f2ea2..6219292 100644 --- a/c/enc/metablock.c +++ b/c/enc/metablock.c @@ -10,12 +10,12 @@ #include "./metablock.h" #include "../common/constants.h" +#include "../common/context.h" #include "../common/platform.h" #include <brotli/types.h> #include "./bit_cost.h" #include "./block_splitter.h" #include "./cluster.h" -#include "./context.h" #include "./entropy_encode.h" #include "./histogram.h" #include "./memory.h" @@ -398,9 +398,9 @@ static void MapStaticContexts(MemoryManager* m, static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask, - uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode, + uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut, const size_t num_contexts, const uint32_t* static_context_map, - const Command *commands, size_t n_commands, MetaBlockSplit* mb) { + const Command* commands, size_t n_commands, MetaBlockSplit* mb) { union { BlockSplitterLiteral plain; ContextBlockSplitter ctx; @@ -441,7 +441,8 @@ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( if (num_contexts == 1) { BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal); } else { - size_t context = Context(prev_byte, prev_byte2, literal_context_mode); + size_t context = + BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut); ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal, static_context_map[context]); if (BROTLI_IS_OOM(m)) return; @@ -455,7 +456,7 @@ static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( prev_byte2 = ringbuffer[(pos - 2) & mask]; prev_byte = ringbuffer[(pos - 1) & mask]; if (cmd.cmd_prefix_ >= 128) { - BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_); + BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_ & 0x3FF); } } } @@ -482,7 +483,7 @@ void BrotliBuildMetaBlockGreedy(MemoryManager* m, size_t mask, uint8_t prev_byte, uint8_t prev_byte2, - ContextType literal_context_mode, + ContextLut literal_context_lut, size_t num_contexts, const uint32_t* static_context_map, const Command* commands, @@ -490,19 +491,17 @@ void BrotliBuildMetaBlockGreedy(MemoryManager* m, MetaBlockSplit* mb) { if (num_contexts == 1) { BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte, - prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb); + prev_byte2, literal_context_lut, 1, NULL, commands, n_commands, mb); } else { BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte, - prev_byte2, literal_context_mode, num_contexts, static_context_map, + prev_byte2, literal_context_lut, num_contexts, static_context_map, commands, n_commands, mb); } } -void BrotliOptimizeHistograms(size_t num_direct_distance_codes, - size_t distance_postfix_bits, +void BrotliOptimizeHistograms(uint32_t num_distance_codes, MetaBlockSplit* mb) { uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS]; - size_t num_distance_codes; size_t i; for (i = 0; i < mb->literal_histograms_size; ++i) { BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_, @@ -513,9 +512,6 @@ void BrotliOptimizeHistograms(size_t num_direct_distance_codes, mb->command_histograms[i].data_, good_for_rle); } - num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES + - num_direct_distance_codes + - ((2 * BROTLI_MAX_DISTANCE_BITS) << distance_postfix_bits); for (i = 0; i < mb->distance_histograms_size; ++i) { BrotliOptimizeHuffmanCountsForRle(num_distance_codes, mb->distance_histograms[i].data_, diff --git a/c/enc/metablock.h b/c/enc/metablock.h index 3fa6d65..76a6594 100644 --- a/c/enc/metablock.h +++ b/c/enc/metablock.h @@ -10,11 +10,11 @@ #ifndef BROTLI_ENC_METABLOCK_H_ #define BROTLI_ENC_METABLOCK_H_ +#include "../common/context.h" #include "../common/platform.h" #include <brotli/types.h> #include "./block_splitter.h" #include "./command.h" -#include "./context.h" #include "./histogram.h" #include "./memory.h" #include "./quality.h" @@ -85,12 +85,11 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m, is the same for all block types. */ BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy( MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask, - uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode, + uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut, size_t num_contexts, const uint32_t* static_context_map, const Command* commands, size_t n_commands, MetaBlockSplit* mb); -BROTLI_INTERNAL void BrotliOptimizeHistograms(size_t num_direct_distance_codes, - size_t distance_postfix_bits, +BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes, MetaBlockSplit* mb); #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/params.h b/c/enc/params.h index acb3668..9bcf236 100755 --- a/c/enc/params.h +++ b/c/enc/params.h @@ -10,6 +10,7 @@ #define BROTLI_ENC_PARAMS_H_ #include <brotli/encode.h> +#include "./encoder_dict.h" typedef struct BrotliHasherParams { int type; @@ -19,6 +20,13 @@ typedef struct BrotliHasherParams { int num_last_distances_to_check; } BrotliHasherParams; +typedef struct BrotliDistanceParams { + uint32_t num_direct_distance_codes; + uint32_t distance_postfix_bits; + uint32_t alphabet_size; + size_t max_distance; +} BrotliDistanceParams; + /* Encoding parameters */ typedef struct BrotliEncoderParams { BrotliEncoderMode mode; @@ -27,7 +35,10 @@ typedef struct BrotliEncoderParams { int lgblock; size_t size_hint; BROTLI_BOOL disable_literal_context_modeling; + BROTLI_BOOL large_window; BrotliHasherParams hasher; + BrotliDistanceParams dist; + BrotliEncoderDictionary dictionary; } BrotliEncoderParams; #endif /* BROTLI_ENC_PARAMS_H_ */ diff --git a/c/enc/prefix.h b/c/enc/prefix.h index 0168d4e..fd359a4 100644 --- a/c/enc/prefix.h +++ b/c/enc/prefix.h @@ -39,11 +39,10 @@ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code, size_t prefix = (dist >> bucket) & 1; size_t offset = (2 + prefix) << bucket; size_t nbits = bucket - postfix_bits; - *code = (uint16_t)( + *code = (uint16_t)((nbits << 10) | (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes + ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix)); - *extra_bits = (uint32_t)( - (nbits << 24) | ((dist - offset) >> postfix_bits)); + *extra_bits = (uint32_t)((dist - offset) >> postfix_bits); } } diff --git a/c/enc/quality.h b/c/enc/quality.h index 80b7051..f9b1111 100644 --- a/c/enc/quality.h +++ b/c/enc/quality.h @@ -31,7 +31,7 @@ /* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting, so we buffer at most this much literals and commands. */ -#define MAX_NUM_DELAYED_SYMBOLS 0x2fff +#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF /* Returns hash-table size for quality levels 0 and 1. */ static BROTLI_INLINE size_t MaxHashTableSize(int quality) { @@ -60,10 +60,15 @@ static BROTLI_INLINE size_t MaxZopfliCandidates( static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) { params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY, BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality)); + if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { + params->large_window = BROTLI_FALSE; + } if (params->lgwin < BROTLI_MIN_WINDOW_BITS) { params->lgwin = BROTLI_MIN_WINDOW_BITS; - } else if (params->lgwin > BROTLI_MAX_WINDOW_BITS) { - params->lgwin = BROTLI_MAX_WINDOW_BITS; + } else { + int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS : + BROTLI_MAX_WINDOW_BITS; + if (params->lgwin > max_lgwin) params->lgwin = max_lgwin; } } diff --git a/c/enc/ringbuffer.h b/c/enc/ringbuffer.h index 4e58749..86079a8 100644 --- a/c/enc/ringbuffer.h +++ b/c/enc/ringbuffer.h @@ -41,9 +41,9 @@ typedef struct RingBuffer { uint32_t pos_; /* The actual ring buffer containing the copy of the last two bytes, the data, and the copy of the beginning as a tail. */ - uint8_t *data_; + uint8_t* data_; /* The start of the ring-buffer. */ - uint8_t *buffer_; + uint8_t* buffer_; } RingBuffer; static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) { @@ -91,7 +91,7 @@ static BROTLI_INLINE void RingBufferInitBuffer( } static BROTLI_INLINE void RingBufferWriteTail( - const uint8_t *bytes, size_t n, RingBuffer* rb) { + const uint8_t* bytes, size_t n, RingBuffer* rb) { const size_t masked_pos = rb->pos_ & rb->mask_; if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) { /* Just fill the tail buffer with the beginning data. */ @@ -103,7 +103,7 @@ static BROTLI_INLINE void RingBufferWriteTail( /* Push bytes into the ring buffer. */ static BROTLI_INLINE void RingBufferWrite( - MemoryManager* m, const uint8_t *bytes, size_t n, RingBuffer* rb) { + MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) { if (rb->pos_ == 0 && n < rb->tail_size_) { /* Special case for the first write: to process the first block, we don't need to allocate the whole ring-buffer and we don't need the tail @@ -144,12 +144,16 @@ static BROTLI_INLINE void RingBufferWrite( n - (rb->size_ - masked_pos)); } } - rb->buffer_[-2] = rb->buffer_[rb->size_ - 2]; - rb->buffer_[-1] = rb->buffer_[rb->size_ - 1]; - rb->pos_ += (uint32_t)n; - if (rb->pos_ > (1u << 30)) { - /* Wrap, but preserve not-a-first-lap feature. */ - rb->pos_ = (rb->pos_ & ((1u << 30) - 1)) | (1u << 30); + { + BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0; + uint32_t rb_pos_mask = (1u << 31) - 1; + rb->buffer_[-2] = rb->buffer_[rb->size_ - 2]; + rb->buffer_[-1] = rb->buffer_[rb->size_ - 1]; + rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask); + if (not_first_lap) { + /* Wrap, but preserve not-a-first-lap feature. */ + rb->pos_ |= 1u << 31; + } } } diff --git a/c/enc/static_dict.c b/c/enc/static_dict.c index 36caa61..758ef80 100644 --- a/c/enc/static_dict.c +++ b/c/enc/static_dict.c @@ -8,19 +8,20 @@ #include "../common/dictionary.h" #include "../common/platform.h" +#include "../common/transform.h" +#include "./encoder_dict.h" #include "./find_match_length.h" -#include "./static_dict_lut.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif -static const uint8_t kUppercaseFirst = 10; +/* TODO: use BrotliTransforms.cutOffTransforms instead. */ static const uint8_t kOmitLastNTransforms[10] = { 0, 12, 27, 23, 42, 63, 56, 48, 59, 64, }; -static BROTLI_INLINE uint32_t Hash(const uint8_t *data) { +static BROTLI_INLINE uint32_t Hash(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -79,32 +80,33 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary, } BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( - const BrotliDictionary* dictionary, const uint8_t* data, + const BrotliEncoderDictionary* dictionary, const uint8_t* data, size_t min_length, size_t max_length, uint32_t* matches) { BROTLI_BOOL has_found_match = BROTLI_FALSE; { - size_t offset = kStaticDictionaryBuckets[Hash(data)]; + size_t offset = dictionary->buckets[Hash(data)]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0) { const size_t matchlen = - DictMatchLength(dictionary, data, id, l, max_length); + DictMatchLength(dictionary->words, data, id, l, max_length); const uint8_t* s; size_t minlen; size_t maxlen; size_t len; - /* Transform "" + kIdentity + "" */ + /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */ if (matchlen == l) { AddMatch(id, l, l, matches); has_found_match = BROTLI_TRUE; } - /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */ + /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and + "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */ if (matchlen >= l - 1) { AddMatch(id + 12 * n, l - 1, l, matches); if (l + 2 < max_length && @@ -114,7 +116,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } has_found_match = BROTLI_TRUE; } - /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */ + /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */ minlen = min_length; if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9); maxlen = BROTLI_MIN(size_t, matchlen, l - 2); @@ -126,7 +128,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( continue; } s = &data[l]; - /* Transforms "" + kIdentity + <suffix> */ + /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */ if (s[0] == ' ') { AddMatch(id + n, l + 1, l, matches); if (s[1] == 'a') { @@ -273,12 +275,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } } else { - /* Set is_all_caps=0 for kUppercaseFirst and - is_all_caps=1 otherwise (kUppercaseAll) transform. */ + /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and + is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) + transform. */ const BROTLI_BOOL is_all_caps = - TO_BROTLI_BOOL(w.transform != kUppercaseFirst); + TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); const uint8_t* s; - if (!IsMatch(dictionary, w, data, max_length)) { + if (!IsMatch(dictionary->words, w, data, max_length)) { continue; } /* Transform "" + kUppercase{First,All} + "" */ @@ -323,27 +326,29 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( /* Transforms with prefixes " " and "." */ if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' '); - size_t offset = kStaticDictionaryBuckets[Hash(&data[1])]; + size_t offset = dictionary->buckets[Hash(&data[1])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0) { const uint8_t* s; - if (!IsMatch(dictionary, w, &data[1], max_length - 1)) { + if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { continue; } - /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */ + /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and + "." + BROTLI_TRANSFORM_IDENTITY + "" */ AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); has_found_match = BROTLI_TRUE; if (l + 2 >= max_length) { continue; } - /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix> + /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and + "." + BROTLI_TRANSFORM_IDENTITY + <suffix> */ s = &data[l + 1]; if (s[0] == ' ') { @@ -370,12 +375,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } } else if (is_space) { - /* Set is_all_caps=0 for kUppercaseFirst and - is_all_caps=1 otherwise (kUppercaseAll) transform. */ + /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and + is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) + transform. */ const BROTLI_BOOL is_all_caps = - TO_BROTLI_BOOL(w.transform != kUppercaseFirst); + TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); const uint8_t* s; - if (!IsMatch(dictionary, w, &data[1], max_length - 1)) { + if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { continue; } /* Transforms " " + kUppercase{First,All} + "" */ @@ -411,22 +417,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } if (max_length >= 6) { - /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */ + /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */ if ((data[1] == ' ' && (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || - (data[0] == 0xc2 && data[1] == 0xa0)) { - size_t offset = kStaticDictionaryBuckets[Hash(&data[2])]; + (data[0] == 0xC2 && data[1] == 0xA0)) { + size_t offset = dictionary->buckets[Hash(&data[2])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0 && - IsMatch(dictionary, w, &data[2], max_length - 2)) { - if (data[0] == 0xc2) { + IsMatch(dictionary->words, w, &data[2], max_length - 2)) { + if (data[0] == 0xC2) { AddMatch(id + 102 * n, l + 2, l, matches); has_found_match = BROTLI_TRUE; } else if (l + 2 < max_length && data[l + 2] == ' ') { @@ -444,17 +450,17 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( data[3] == 'e' && data[4] == ' ') || (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && data[3] == 'm' && data[4] == '/')) { - size_t offset = kStaticDictionaryBuckets[Hash(&data[5])]; + size_t offset = dictionary->buckets[Hash(&data[5])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0 && - IsMatch(dictionary, w, &data[5], max_length - 5)) { + IsMatch(dictionary->words, w, &data[5], max_length - 5)) { AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); has_found_match = BROTLI_TRUE; if (l + 5 < max_length) { diff --git a/c/enc/static_dict.h b/c/enc/static_dict.h index 0b3f6b3..6b5d4eb 100644 --- a/c/enc/static_dict.h +++ b/c/enc/static_dict.h @@ -12,13 +12,14 @@ #include "../common/dictionary.h" #include "../common/platform.h" #include <brotli/types.h> +#include "./encoder_dict.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif #define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37 -static const uint32_t kInvalidMatch = 0xfffffff; +static const uint32_t kInvalidMatch = 0xFFFFFFF; /* Matches data against static dictionary words, and for each length l, for which a match is found, updates matches[l] to be the minimum possible @@ -28,7 +29,7 @@ static const uint32_t kInvalidMatch = 0xfffffff; matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long all elements are initialized to kInvalidMatch */ BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( - const BrotliDictionary* dictionary, + const BrotliEncoderDictionary* dictionary, const uint8_t* data, size_t min_length, size_t max_length, uint32_t* matches); diff --git a/c/enc/static_dict_lut.h b/c/enc/static_dict_lut.h index ba94f76..e299cda 100644 --- a/c/enc/static_dict_lut.h +++ b/c/enc/static_dict_lut.h @@ -23,7 +23,7 @@ typedef struct DictWord { } DictWord; static const int kDictNumBits = 15; -static const uint32_t kDictHashMul32 = 0x1e35a7bd; +static const uint32_t kDictHashMul32 = 0x1E35A7BD; static const uint16_t kStaticDictionaryBuckets[32768] = { 1,0,0,0,0,0,0,0,0,3,6,0,0,0,0,0,20,0,0,0,21,0,22,0,0,0,0,0,0,0,0,23,0,0,25,0,29, diff --git a/c/enc/utf8_util.c b/c/enc/utf8_util.c index a334927..04a7805 100644 --- a/c/enc/utf8_util.c +++ b/c/enc/utf8_util.c @@ -25,37 +25,37 @@ static size_t BrotliParseAsUTF8( } /* 2-byte UTF8 */ if (size > 1u && - (input[0] & 0xe0) == 0xc0 && - (input[1] & 0xc0) == 0x80) { - *symbol = (((input[0] & 0x1f) << 6) | - (input[1] & 0x3f)); - if (*symbol > 0x7f) { + (input[0] & 0xE0) == 0xC0 && + (input[1] & 0xC0) == 0x80) { + *symbol = (((input[0] & 0x1F) << 6) | + (input[1] & 0x3F)); + if (*symbol > 0x7F) { return 2; } } /* 3-byte UFT8 */ if (size > 2u && - (input[0] & 0xf0) == 0xe0 && - (input[1] & 0xc0) == 0x80 && - (input[2] & 0xc0) == 0x80) { - *symbol = (((input[0] & 0x0f) << 12) | - ((input[1] & 0x3f) << 6) | - (input[2] & 0x3f)); - if (*symbol > 0x7ff) { + (input[0] & 0xF0) == 0xE0 && + (input[1] & 0xC0) == 0x80 && + (input[2] & 0xC0) == 0x80) { + *symbol = (((input[0] & 0x0F) << 12) | + ((input[1] & 0x3F) << 6) | + (input[2] & 0x3F)); + if (*symbol > 0x7FF) { return 3; } } /* 4-byte UFT8 */ if (size > 3u && - (input[0] & 0xf8) == 0xf0 && - (input[1] & 0xc0) == 0x80 && - (input[2] & 0xc0) == 0x80 && - (input[3] & 0xc0) == 0x80) { + (input[0] & 0xF8) == 0xF0 && + (input[1] & 0xC0) == 0x80 && + (input[2] & 0xC0) == 0x80 && + (input[3] & 0xC0) == 0x80) { *symbol = (((input[0] & 0x07) << 18) | - ((input[1] & 0x3f) << 12) | - ((input[2] & 0x3f) << 6) | - (input[3] & 0x3f)); - if (*symbol > 0xffff && *symbol <= 0x10ffff) { + ((input[1] & 0x3F) << 12) | + ((input[2] & 0x3F) << 6) | + (input[3] & 0x3F)); + if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) { return 4; } } diff --git a/c/enc/write_bits.h b/c/enc/write_bits.h index efa66f8..7733d92 100644 --- a/c/enc/write_bits.h +++ b/c/enc/write_bits.h @@ -35,25 +35,27 @@ extern "C" { and locate the rest in BYTE+1, BYTE+2, etc. */ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits, uint64_t bits, - size_t * BROTLI_RESTRICT pos, - uint8_t * BROTLI_RESTRICT array) { + size_t* BROTLI_RESTRICT pos, + uint8_t* BROTLI_RESTRICT array) { #ifdef BROTLI_LITTLE_ENDIAN /* This branch of the code can write up to 56 bits at a time, 7 bits are lost by being perhaps already in *p and at least 1 bit is needed to initialize the bit-stream ahead (i.e. if 7 bits are in *p and we write 57 bits, then the next write will access a byte that was never initialized). */ - uint8_t *p = &array[*pos >> 3]; + uint8_t* p = &array[*pos >> 3]; uint64_t v = *p; - BROTLI_LOG(("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos)); + BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits, + (uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF), + (int)*pos)); BROTLI_DCHECK((bits >> n_bits) == 0); BROTLI_DCHECK(n_bits <= 56); v |= bits << (*pos & 7); BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */ *pos += n_bits; #else - /* implicit & 0xff is assumed for uint8_t arithmetics */ - uint8_t *array_pos = &array[*pos >> 3]; + /* implicit & 0xFF is assumed for uint8_t arithmetics */ + uint8_t* array_pos = &array[*pos >> 3]; const size_t bits_reserved_in_first_byte = (*pos & 7); size_t bits_left_to_write; bits <<= bits_reserved_in_first_byte; @@ -70,8 +72,8 @@ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits, } static BROTLI_INLINE void BrotliWriteBitsPrepareStorage( - size_t pos, uint8_t *array) { - BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", pos)); + size_t pos, uint8_t* array) { + BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", (int)pos)); BROTLI_DCHECK((pos & 7) == 0); array[pos >> 3] = 0; } |