diff options
author | Brotli <no-reply@google.com> | 2024-05-08 17:59:09 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-05-08 17:59:58 -0700 |
commit | bb809ac908a4b42772eaf4f5e9fb66ca7886a6c9 (patch) | |
tree | 53b3fe4ecb1d3d1981b662f8b2f419134fecc229 /c/enc/matching_tag_mask.h | |
parent | d01a4caaa80c0072fe1b6bf073814b9400667fcc (diff) | |
download | brotli-bb809ac908a4b42772eaf4f5e9fb66ca7886a6c9.zip brotli-bb809ac908a4b42772eaf4f5e9fb66ca7886a6c9.tar.gz brotli-bb809ac908a4b42772eaf4f5e9fb66ca7886a6c9.tar.bz2 |
Use a hash table header and SIMD to speed up hash table operations (similar to [Swiss Tables](https://abseil.io/about/design/swisstables)).
PiperOrigin-RevId: 631982664
Diffstat (limited to 'c/enc/matching_tag_mask.h')
-rw-r--r-- | c/enc/matching_tag_mask.h | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/c/enc/matching_tag_mask.h b/c/enc/matching_tag_mask.h new file mode 100644 index 0000000..c166255 --- /dev/null +++ b/c/enc/matching_tag_mask.h @@ -0,0 +1,68 @@ +#ifndef THIRD_PARTY_BROTLI_ENC_MATCHING_TAG_MASK_H_ +#define THIRD_PARTY_BROTLI_ENC_MATCHING_TAG_MASK_H_ + +#include "../common/platform.h" + +#if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) +#define SUPPORTS_SSE_2 +#endif + +#if defined(SUPPORTS_SSE_2) +#include <immintrin.h> +#endif + + +static BROTLI_INLINE uint64_t GetMatchingTagMask( + size_t chunk_count, const uint8_t tag, + const uint8_t* BROTLI_RESTRICT tag_bucket, const size_t head) { + uint64_t matches = 0; +#if defined(SUPPORTS_SSE_2) + const __m128i comparison_mask = _mm_set1_epi8((char)tag); + size_t i; + for (i = 0; i < chunk_count && i < 4; i++) { + const __m128i chunk = + _mm_loadu_si128((const __m128i*)(const void*)(tag_bucket + 16 * i)); + const __m128i equal_mask = _mm_cmpeq_epi8(chunk, comparison_mask); + matches |= (uint64_t)_mm_movemask_epi8(equal_mask) << 16 * i; + } +#else + const int chunk_size = sizeof(size_t); + const size_t shift_amount = ((chunk_size * 8) - chunk_size); + const size_t xFF = ~((size_t)0); + const size_t x01 = xFF / 0xFF; + const size_t x80 = x01 << 7; + const size_t splat_char = tag * x01; + int i = ((int)chunk_count * 16) - chunk_size; + BROTLI_DCHECK((sizeof(size_t) == 4) || (sizeof(size_t) == 8)); +#if BROTLI_LITTLE_ENDIAN + const size_t extractMagic = (xFF / 0x7F) >> chunk_size; + do { + size_t chunk = BrotliUnalignedReadSizeT(&tag_bucket[i]); + chunk ^= splat_char; + chunk = (((chunk | x80) - x01) | chunk) & x80; + matches <<= chunk_size; + matches |= (chunk * extractMagic) >> shift_amount; + i -= chunk_size; + } while (i >= 0); +#else + const size_t msb = xFF ^ (xFF >> 1); + const size_t extractMagic = (msb / 0x1FF) | msb; + do { + size_t chunk = BrotliUnalignedReadSizeT(&tag_bucket[i]); + chunk ^= splat_char; + chunk = (((chunk | x80) - x01) | chunk) & x80; + matches <<= chunk_size; + matches |= ((chunk >> 7) * extractMagic) >> shift_amount; + i -= chunk_size; + } while (i >= 0); +#endif + matches = ~matches; +#endif + if (chunk_count == 1) return BrotliRotateRight16((uint16_t)matches, head); + if (chunk_count == 2) return BrotliRotateRight32((uint32_t)matches, head); + return BrotliRotateRight64(matches, head); +} + +#undef SUPPORTS_SSE_2 + +#endif // THIRD_PARTY_BROTLI_ENC_MATCHING_TAG_MASK_H_ |