diff options
author | Eugene Kliuchnikov <eustas@google.com> | 2018-06-20 15:14:10 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-20 15:14:10 +0200 |
commit | eb12ec04ebb0e9ebb95bb2f13d1edfb62d7ae51d (patch) | |
tree | 16643827f5033fcee3c798ec1c0db9f96b9bfaf6 /c/enc | |
parent | 7505290ef9880ae636a57f57caf77a8dac56d283 (diff) | |
download | brotli-eb12ec04ebb0e9ebb95bb2f13d1edfb62d7ae51d.zip brotli-eb12ec04ebb0e9ebb95bb2f13d1edfb62d7ae51d.tar.gz brotli-eb12ec04ebb0e9ebb95bb2f13d1edfb62d7ae51d.tar.bz2 |
Update (#688)
* add rolling-composite-hasher for large-window mode
* make API methods explicitly public
Diffstat (limited to 'c/enc')
-rw-r--r-- | c/enc/backward_references.c | 15 | ||||
-rw-r--r-- | c/enc/hash.h | 56 | ||||
-rwxr-xr-x | c/enc/hash_composite_inc.h | 133 | ||||
-rwxr-xr-x | c/enc/hash_rolling_inc.h | 215 | ||||
-rw-r--r-- | c/enc/quality.h | 18 |
5 files changed, 432 insertions, 5 deletions
diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c index 3dd897b..cd023d9 100644 --- a/c/enc/backward_references.c +++ b/c/enc/backward_references.c @@ -97,6 +97,21 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance, #include "./backward_references_inc.h" #undef HASHER +#define HASHER() H35 +/* NOLINTNEXTLINE(build/include) */ +#include "./backward_references_inc.h" +#undef HASHER + +#define HASHER() H55 +/* NOLINTNEXTLINE(build/include) */ +#include "./backward_references_inc.h" +#undef HASHER + +#define HASHER() H65 +/* NOLINTNEXTLINE(build/include) */ +#include "./backward_references_inc.h" +#undef HASHER + #undef PREFIX #undef EXPORT_FN diff --git a/c/enc/hash.h b/c/enc/hash.h index fc7356b..2602490 100644 --- a/c/enc/hash.h +++ b/c/enc/hash.h @@ -153,14 +153,14 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( size_t max_length, size_t max_backward, size_t max_distance, HasherSearchResult* out) { size_t len; - size_t dist; + size_t word_idx; size_t offset; size_t matchlen; size_t backward; score_t score; len = item & 0x1F; - dist = item >> 5; - offset = dictionary->words->offsets_by_length[len] + len * dist; + word_idx = item >> 5; + offset = dictionary->words->offsets_by_length[len] + len * word_idx; if (len > max_length) { return BROTLI_FALSE; } @@ -174,7 +174,7 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( size_t cut = len - matchlen; size_t transform_id = (cut << 2) + (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); - backward = max_backward + dist + 1 + + backward = max_backward + 1 + word_idx + (transform_id << dictionary->words->size_bits_by_length[len]); } if (backward > max_distance) { @@ -343,11 +343,57 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { #undef BUCKET_BITS #undef HASHER +/* fast large window hashers */ + +#define HASHER() HROLLING_FAST +#define CHUNKLEN 32 +#define JUMP 4 +#define NUMBUCKETS 16777216 +#define MASK ((NUMBUCKETS * 64) - 1) +#include "./hash_rolling_inc.h" /* NOLINT(build/include) */ +#undef JUMP +#undef HASHER + + +#define HASHER() HROLLING +#define JUMP 1 +#include "./hash_rolling_inc.h" /* NOLINT(build/include) */ +#undef MASK +#undef NUMBUCKETS +#undef JUMP +#undef CHUNKLEN +#undef HASHER + +#define HASHER() H35 +#define HASHER_A H3 +#define HASHER_B HROLLING_FAST +#include "./hash_composite_inc.h" /* NOLINT(build/include) */ +#undef HASHER_A +#undef HASHER_B +#undef HASHER + +#define HASHER() H55 +#define HASHER_A H54 +#define HASHER_B HROLLING_FAST +#include "./hash_composite_inc.h" /* NOLINT(build/include) */ +#undef HASHER_A +#undef HASHER_B +#undef HASHER + +#define HASHER() H65 +#define HASHER_A H6 +#define HASHER_B HROLLING +#include "./hash_composite_inc.h" /* NOLINT(build/include) */ +#undef HASHER_A +#undef HASHER_B +#undef HASHER + #undef FN #undef CAT #undef EXPAND_CAT -#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54) +#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)\ + H(35) H(55) H(65) #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10) static BROTLI_INLINE void DestroyHasher( diff --git a/c/enc/hash_composite_inc.h b/c/enc/hash_composite_inc.h new file mode 100755 index 0000000..f829a97 --- /dev/null +++ b/c/enc/hash_composite_inc.h @@ -0,0 +1,133 @@ +/* NOLINT(build/header_guard) */ +/* Copyright 2018 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* template parameters: FN, HASHER_A, HASHER_B */ + +/* Composite hasher: This hasher allows to combine two other hashers, HASHER_A + and HASHER_B. */ + +#define HashComposite HASHER() + +#define FN_A(X) EXPAND_CAT(X, HASHER_A) +#define FN_B(X) EXPAND_CAT(X, HASHER_B) + +static BROTLI_INLINE size_t FN(HashTypeLength)(void) { + size_t a = FN_A(HashTypeLength)(); + size_t b = FN_B(HashTypeLength)(); + return a > b ? a : b; +} + +static BROTLI_INLINE size_t FN(StoreLookahead)(void) { + size_t a = FN_A(StoreLookahead)(); + size_t b = FN_B(StoreLookahead)(); + return a > b ? a : b; +} + +typedef struct HashComposite { + HasherHandle ha; + HasherHandle hb; + const BrotliEncoderParams* params; +} HashComposite; + +static BROTLI_INLINE HashComposite* FN(Self)(HasherHandle handle) { + return (HashComposite*)&(GetHasherCommon(handle)[1]); +} + +static void FN(Initialize)( + HasherHandle handle, const BrotliEncoderParams* params) { + HashComposite* self = FN(Self)(handle); + self->ha = 0; + self->hb = 0; + self->params = params; + /* TODO: Initialize of the hashers is defered to Prepare (and params + remembered here) because we don't get the one_shot and input_size params + here that are needed to know the memory size of them. Instead provide + those params to all hashers FN(Initialize) */ +} + +static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot, + size_t input_size, const uint8_t* data) { + HashComposite* self = FN(Self)(handle); + if (!self->ha) { + HasherCommon* common_a; + HasherCommon* common_b; + + self->ha = handle + sizeof(HasherCommon) + sizeof(HashComposite); + common_a = (HasherCommon*)self->ha; + common_a->params = self->params->hasher; + common_a->is_prepared_ = BROTLI_FALSE; + common_a->dict_num_lookups = 0; + common_a->dict_num_matches = 0; + FN_A(Initialize)(self->ha, self->params); + + self->hb = self->ha + sizeof(HasherCommon) + FN_A(HashMemAllocInBytes)( + self->params, one_shot, input_size); + common_b = (HasherCommon*)self->hb; + common_b->params = self->params->hasher; + common_b->is_prepared_ = BROTLI_FALSE; + common_b->dict_num_lookups = 0; + common_b->dict_num_matches = 0; + FN_B(Initialize)(self->hb, self->params); + } + FN_A(Prepare)(self->ha, one_shot, input_size, data); + FN_B(Prepare)(self->hb, one_shot, input_size, data); +} + +static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( + const BrotliEncoderParams* params, BROTLI_BOOL one_shot, + size_t input_size) { + return sizeof(HashComposite) + 2 * sizeof(HasherCommon) + + FN_A(HashMemAllocInBytes)(params, one_shot, input_size) + + FN_B(HashMemAllocInBytes)(params, one_shot, input_size); +} + +static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle, + const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) { + HashComposite* self = FN(Self)(handle); + FN_A(Store)(self->ha, data, mask, ix); + FN_B(Store)(self->hb, data, mask, ix); +} + +static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, + const uint8_t* data, const size_t mask, const size_t ix_start, + const size_t ix_end) { + HashComposite* self = FN(Self)(handle); + FN_A(StoreRange)(self->ha, data, mask, ix_start, ix_end); + FN_B(StoreRange)(self->hb, data, mask, ix_start, ix_end); +} + +static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle, + size_t num_bytes, size_t position, const uint8_t* ringbuffer, + size_t ring_buffer_mask) { + HashComposite* self = FN(Self)(handle); + FN_A(StitchToPreviousBlock)(self->ha, num_bytes, position, ringbuffer, + ring_buffer_mask); + FN_B(StitchToPreviousBlock)(self->hb, num_bytes, position, ringbuffer, + ring_buffer_mask); +} + +static BROTLI_INLINE void FN(PrepareDistanceCache)( + HasherHandle handle, int* BROTLI_RESTRICT distance_cache) { + HashComposite* self = FN(Self)(handle); + FN_A(PrepareDistanceCache)(self->ha, distance_cache); + FN_B(PrepareDistanceCache)(self->hb, distance_cache); +} + +static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, + const BrotliEncoderDictionary* dictionary, + const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, + const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, + const size_t max_length, const size_t max_backward, const size_t gap, + const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { + HashComposite* self = FN(Self)(handle); + FN_A(FindLongestMatch)(self->ha, dictionary, data, ring_buffer_mask, + distance_cache, cur_ix, max_length, max_backward, gap, max_distance, out); + FN_B(FindLongestMatch)(self->hb, dictionary, data, ring_buffer_mask, + distance_cache, cur_ix, max_length, max_backward, gap, max_distance, out); +} + +#undef HashComposite diff --git a/c/enc/hash_rolling_inc.h b/c/enc/hash_rolling_inc.h new file mode 100755 index 0000000..4d5d14a --- /dev/null +++ b/c/enc/hash_rolling_inc.h @@ -0,0 +1,215 @@ +/* NOLINT(build/header_guard) */ +/* Copyright 2018 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */ +/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */ +/* JUMP = skip bytes for speedup */ + +/* Rolling hash for long distance long string matches. Stores one position + per bucket, bucket key is computed over a long region. */ + +#define HashRolling HASHER() + +static const uint32_t FN(kRollingHashMul32) = 69069; +static const uint32_t FN(kInvalidPos) = 0xffffffff; + +/* This hasher uses a longer forward length, but returning a higher value here + will hurt compression by the main hasher when combined with a composite + hasher. The hasher tests for forward itself instead. */ +static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } +static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } + +/* Computes a code from a single byte. A lookup table of 256 values could be + used, but simply adding 1 works about as good. */ +static uint32_t FN(HashByte)(uint8_t byte) { + return (uint32_t)byte + 1u; +} + +static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add, + uint32_t factor) { + return (uint32_t)(factor * state + FN(HashByte)(add)); +} + +static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add, + uint8_t rem, uint32_t factor, + uint32_t factor_remove) { + return (uint32_t)(factor * state + + FN(HashByte)(add) - factor_remove * FN(HashByte)(rem)); +} + +typedef struct HashRolling { + uint32_t state; + uint32_t* table; + size_t next_ix; + + uint32_t chunk_len; + uint32_t factor; + uint32_t factor_remove; +} HashRolling; + +static BROTLI_INLINE HashRolling* FN(Self)(HasherHandle handle) { + return (HashRolling*)&(GetHasherCommon(handle)[1]); +} + +static void FN(Initialize)( + HasherHandle handle, const BrotliEncoderParams* params) { + HashRolling* self = FN(Self)(handle); + size_t i; + self->state = 0; + self->next_ix = 0; + + self->factor = FN(kRollingHashMul32); + + /* Compute the factor of the oldest byte to remove: factor**steps modulo + 0xffffffff (the multiplications rely on 32-bit overflow) */ + self->factor_remove = 1; + for (i = 0; i < CHUNKLEN; i += JUMP) { + self->factor_remove *= self->factor; + } + + self->table = (uint32_t*)((HasherHandle)self + sizeof(HashRolling)); + for (i = 0; i < NUMBUCKETS; i++) { + self->table[i] = FN(kInvalidPos); + } + + BROTLI_UNUSED(params); +} + +static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot, + size_t input_size, const uint8_t* data) { + HashRolling* self = FN(Self)(handle); + size_t i; + /* Too small size, cannot use this hasher. */ + if (input_size < CHUNKLEN) return; + self->state = 0; + for (i = 0; i < CHUNKLEN; i += JUMP) { + self->state = FN(HashRollingFunctionInitial)( + self->state, data[i], self->factor); + } + BROTLI_UNUSED(one_shot); +} + +static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( + const BrotliEncoderParams* params, BROTLI_BOOL one_shot, + size_t input_size) { + return sizeof(HashRolling) + NUMBUCKETS * sizeof(uint32_t); + BROTLI_UNUSED(params); + BROTLI_UNUSED(one_shot); + BROTLI_UNUSED(input_size); +} + +static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle, + const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) { + BROTLI_UNUSED(handle); + BROTLI_UNUSED(data); + BROTLI_UNUSED(mask); + BROTLI_UNUSED(ix); +} + +static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, + const uint8_t* data, const size_t mask, const size_t ix_start, + const size_t ix_end) { + BROTLI_UNUSED(handle); + BROTLI_UNUSED(data); + BROTLI_UNUSED(mask); + BROTLI_UNUSED(ix_start); + BROTLI_UNUSED(ix_end); +} + +static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle, + size_t num_bytes, size_t position, const uint8_t* ringbuffer, + size_t ring_buffer_mask) { + /* In this case we must re-initialize the hasher from scratch from the + current position. */ + HashRolling* self = FN(Self)(handle); + size_t position_masked; + size_t available = num_bytes; + if ((position & (JUMP - 1)) != 0) { + size_t diff = JUMP - (position & (JUMP - 1)); + available = (diff > available) ? 0 : (available - diff); + position += diff; + } + position_masked = position & ring_buffer_mask; + /* wrapping around ringbuffer not handled. */ + if (available > ring_buffer_mask - position_masked) { + available = ring_buffer_mask - position_masked; + } + + FN(Prepare)(handle, BROTLI_FALSE, available, + ringbuffer + (position & ring_buffer_mask)); + self->next_ix = position; + BROTLI_UNUSED(num_bytes); +} + +static BROTLI_INLINE void FN(PrepareDistanceCache)( + HasherHandle handle, int* BROTLI_RESTRICT distance_cache) { + BROTLI_UNUSED(handle); + BROTLI_UNUSED(distance_cache); +} + +static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, + const BrotliEncoderDictionary* dictionary, + const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, + const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, + const size_t max_length, const size_t max_backward, const size_t gap, + const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { + HashRolling* self = FN(Self)(handle); + const size_t cur_ix_masked = cur_ix & ring_buffer_mask; + size_t pos = self->next_ix; + + if ((cur_ix & (JUMP - 1)) != 0) return; + + /* Not enough lookahead */ + if (max_length < CHUNKLEN) return; + + for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) { + uint32_t code = self->state & MASK; + + uint8_t rem = data[pos & ring_buffer_mask]; + uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask]; + size_t found_ix = FN(kInvalidPos); + + self->state = FN(HashRollingFunction)( + self->state, add, rem, self->factor, self->factor_remove); + + if (code < NUMBUCKETS) { + found_ix = self->table[code]; + self->table[code] = (uint32_t)pos; + if (pos == cur_ix && found_ix != FN(kInvalidPos)) { + /* The cast to 32-bit makes backward distances up to 4GB work even + if cur_ix is above 4GB, despite using 32-bit values in the table. */ + size_t backward = (uint32_t)(cur_ix - found_ix); + if (backward <= max_backward) { + const size_t found_ix_masked = found_ix & ring_buffer_mask; + const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked], + &data[cur_ix_masked], + max_length); + if (len >= 4 && len > out->len) { + score_t score = BackwardReferenceScore(len, backward); + if (score > out->score) { + out->len = len; + out->distance = backward; + out->score = score; + out->len_code_delta = 0; + } + } + } + } + } + } + + self->next_ix = cur_ix + JUMP; + + /* NOTE: this hasher does not search in the dictionary. It is used as + backup-hasher, the main hasher already searches in it. */ + BROTLI_UNUSED(dictionary); + BROTLI_UNUSED(distance_cache); + BROTLI_UNUSED(gap); + BROTLI_UNUSED(max_distance); +} + +#undef HashRolling diff --git a/c/enc/quality.h b/c/enc/quality.h index aa7ba0d..5f4d034 100644 --- a/c/enc/quality.h +++ b/c/enc/quality.h @@ -142,6 +142,24 @@ static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params, hparams->num_last_distances_to_check = params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16; } + + if (params->lgwin > 24) { + /* Different hashers for large window brotli: not for qualities <= 2, + these are too fast for large window. Not for qualities >= 10: their + hasher already works well with large window. So the changes are: + H3 --> H35: for quality 3. + H54 --> H55: for quality 4 with size hint > 1MB + H6 --> H65: for qualities 5, 6, 7, 8, 9. */ + if (hparams->type == 3) { + hparams->type = 35; + } + if (hparams->type == 54) { + hparams->type = 55; + } + if (hparams->type == 6) { + hparams->type = 65; + } + } } #endif /* BROTLI_ENC_QUALITY_H_ */ |