From 9351fa7ffb9d5aa133dc8f8b14e175cace52d9e4 Mon Sep 17 00:00:00 2001 From: Brotli Date: Wed, 20 Mar 2024 10:29:20 -0700 Subject: Compare 4 bytes when checking if a longer match is possible. Loading and comparing 4 bytes is ~as fast as 1 byte, but allows us to avoid more full match length calculation. PiperOrigin-RevId: 617556847 --- c/enc/hash.h | 12 +++++++++--- c/enc/hash_forgetful_chain_inc.h | 9 ++++++++- c/enc/hash_longest_match64_inc.h | 9 ++++++++- c/enc/hash_longest_match_inc.h | 9 ++++++++- c/enc/hash_longest_match_quickly_inc.h | 1 + 5 files changed, 34 insertions(+), 6 deletions(-) diff --git a/c/enc/hash.h b/c/enc/hash.h index 5677d82..ba9b0d8 100644 --- a/c/enc/hash.h +++ b/c/enc/hash.h @@ -574,6 +574,11 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch( } } } + /* we require matches of len >4, so increase best_len to 3, so we can compare + * 4 bytes all the time. */ + if (best_len < 3) { + best_len = 3; + } while (item == 0) { size_t offset; size_t distance; @@ -586,9 +591,10 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch( limit = source_size - offset; limit = (limit > max_length) ? max_length : limit; if (distance > max_distance) continue; - if (cur_ix_masked + best_len > ring_buffer_mask || - best_len >= limit || - data[cur_ix_masked + best_len] != source[offset + best_len]) { + if (cur_ix_masked + best_len > ring_buffer_mask || best_len >= limit || + /* compare 4 bytes ending at best_len + 1 */ + BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) != + BrotliUnalignedRead32(&source[offset + best_len - 3])) { continue; } { diff --git a/c/enc/hash_forgetful_chain_inc.h b/c/enc/hash_forgetful_chain_inc.h index 48e1cdc..9a8efeb 100644 --- a/c/enc/hash_forgetful_chain_inc.h +++ b/c/enc/hash_forgetful_chain_inc.h @@ -241,6 +241,11 @@ static BROTLI_INLINE void FN(FindLongestMatch)( } } } + /* we require matches of len >4, so increase best_len to 3, so we can compare + * 4 bytes all the time. */ + if (best_len < 3) { + best_len = 3; + } { const size_t bank = key & (NUM_BANKS - 1); size_t backward = 0; @@ -257,7 +262,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)( delta = banks[bank].slots[last].delta; if (cur_ix_masked + best_len > ring_buffer_mask || prev_ix + best_len > ring_buffer_mask || - data[cur_ix_masked + best_len] != data[prev_ix + best_len]) { + /* compare 4 bytes ending at best_len + 1 */ + BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) != + BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) { continue; } { diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h index e48fc61..8f825de 100644 --- a/c/enc/hash_longest_match64_inc.h +++ b/c/enc/hash_longest_match64_inc.h @@ -211,6 +211,11 @@ static BROTLI_INLINE void FN(FindLongestMatch)( } } } + /* we require matches of len >4, so increase best_len to 3, so we can compare + * 4 bytes all the time. */ + if (best_len < 3) { + best_len = 3; + } { const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_); uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_]; @@ -230,7 +235,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)( prev_ix &= ring_buffer_mask; if (cur_ix_masked + best_len > ring_buffer_mask || prev_ix + best_len > ring_buffer_mask || - data[cur_ix_masked + best_len] != data[prev_ix + best_len]) { + /* compare 4 bytes ending at best_len + 1 */ + BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) != + BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) { continue; } current4 = BrotliUnalignedRead32(data + prev_ix); diff --git a/c/enc/hash_longest_match_inc.h b/c/enc/hash_longest_match_inc.h index 788e9ef..608fa66 100644 --- a/c/enc/hash_longest_match_inc.h +++ b/c/enc/hash_longest_match_inc.h @@ -208,6 +208,11 @@ static BROTLI_INLINE void FN(FindLongestMatch)( } } } + /* we require matches of len >4, so increase best_len to 3, so we can compare + * 4 bytes all the time. */ + if (best_len < 3) { + best_len = 3; + } { const uint32_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_); @@ -223,7 +228,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)( prev_ix &= ring_buffer_mask; if (cur_ix_masked + best_len > ring_buffer_mask || prev_ix + best_len > ring_buffer_mask || - data[cur_ix_masked + best_len] != data[prev_ix + best_len]) { + /* compare 4 bytes ending at best_len + 1 */ + BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) != + BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) { continue; } { diff --git a/c/enc/hash_longest_match_quickly_inc.h b/c/enc/hash_longest_match_quickly_inc.h index 54397ef..1f36022 100644 --- a/c/enc/hash_longest_match_quickly_inc.h +++ b/c/enc/hash_longest_match_quickly_inc.h @@ -155,6 +155,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)( uint32_t* BROTLI_RESTRICT buckets = self->buckets_; const size_t best_len_in = out->len; const size_t cur_ix_masked = cur_ix & ring_buffer_mask; + /* TODO: compare 4 bytes at once (and set the minimum best len to 4) */ int compare_char = data[cur_ix_masked + best_len_in]; size_t key = FN(HashBytes)(&data[cur_ix_masked]); size_t key_out; -- cgit v1.1