aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrotli <no-reply@google.com>2024-04-18 19:59:28 -0700
committerCopybara-Service <copybara-worker@google.com>2024-04-18 20:00:02 -0700
commit1b3a5ccb6e7b9384b741437532f4dae0730c61f2 (patch)
tree4fe4f6271a54307c9dce11b40d5ed9e71e37dda6
parent443af10a8001c5de7bab306d329de614c3defebc (diff)
downloadbrotli-1b3a5ccb6e7b9384b741437532f4dae0730c61f2.zip
brotli-1b3a5ccb6e7b9384b741437532f4dae0730c61f2.tar.gz
brotli-1b3a5ccb6e7b9384b741437532f4dae0730c61f2.tar.bz2
Prefetch the backreference hashtable bucket.
Place the prefetch before the last distance checks, to give the prefetch enough time to work. PiperOrigin-RevId: 626228820
-rw-r--r--c/common/platform.h15
-rw-r--r--c/enc/hash_longest_match64_inc.h7
-rw-r--r--c/enc/hash_longest_match_inc.h13
3 files changed, 28 insertions, 7 deletions
diff --git a/c/common/platform.h b/c/common/platform.h
index dbba942..c18ff02 100644
--- a/c/common/platform.h
+++ b/c/common/platform.h
@@ -519,6 +519,21 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
#if BROTLI_ENABLE_DUMP
BROTLI_UNUSED(&BrotliDump);
#endif
+
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
+# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+#elif BROTLI_GNUC_HAS_BUILTIN(__builtin_prefetch, 3, 1, 0)
+# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+#elif defined(__aarch64__)
+# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
+# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
+#else
+# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
+# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
+#endif
}
#endif /* BROTLI_COMMON_PLATFORM_H_ */
diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h
index 441ab86..7e0b2f5 100644
--- a/c/enc/hash_longest_match64_inc.h
+++ b/c/enc/hash_longest_match64_inc.h
@@ -170,6 +170,11 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
score_t best_score = out->score;
size_t best_len = out->len;
size_t i;
+ /* Precalculate the hash key and prefetch the bucket. */
+ const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
+ uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
+ PREFETCH_L1(bucket);
+ if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
out->len = 0;
out->len_code_delta = 0;
@@ -220,8 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
best_len = 3;
}
{
- const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
- uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ?
(num[key] - self->block_size_) : 0u;
diff --git a/c/enc/hash_longest_match_inc.h b/c/enc/hash_longest_match_inc.h
index 4679dac..b744f4d 100644
--- a/c/enc/hash_longest_match_inc.h
+++ b/c/enc/hash_longest_match_inc.h
@@ -169,11 +169,17 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
score_t best_score = out->score;
size_t best_len = out->len;
size_t i;
+ /* Precalculate the hash key and prefetch the bucket. */
+ const uint32_t key =
+ FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
+ uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
+ PREFETCH_L1(bucket);
+ if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
+ out->len = 0;
+ out->len_code_delta = 0;
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
- out->len = 0;
- out->len_code_delta = 0;
/* Try last distance first. */
for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
const size_t backward = (size_t)distance_cache[i];
@@ -219,9 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
best_len = 3;
}
{
- const uint32_t key =
- FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
- uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
for (i = num[key]; i > down;) {