From cb1ced3a258782d67af6914474764d86b5d26e2f Mon Sep 17 00:00:00 2001 From: Evgenii Kliuchnikov Date: Wed, 15 Mar 2023 08:15:19 +0000 Subject: speedup decoder by 0.2%-1.2% PiperOrigin-RevId: 516754779 --- c/dec/bit_reader.h | 68 ++++++++++++++++++++++++++++++++++-------------------- c/dec/decode.c | 32 ++++++++++++------------- 2 files changed, 58 insertions(+), 42 deletions(-) (limited to 'c') diff --git a/c/dec/bit_reader.h b/c/dec/bit_reader.h index 5f48711..b3fddea 100644 --- a/c/dec/bit_reader.h +++ b/c/dec/bit_reader.h @@ -22,6 +22,9 @@ extern "C" { #define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1) +/* 162 bits + 7 bytes */ +#define BROTLI_FAST_INPUT_SLACK 28 + BROTLI_INTERNAL extern const brotli_reg_t kBrotliBitMask[33]; static BROTLI_INLINE brotli_reg_t BitMask(brotli_reg_t n) { @@ -38,7 +41,8 @@ typedef struct { brotli_reg_t val_; /* pre-fetched bits */ brotli_reg_t bit_pos_; /* current bit-reading position in val_ */ const uint8_t* next_in; /* the byte we're reading from */ - size_t avail_in; + const uint8_t* guard_in; /* position from which "fast-path" is prohibited */ + const uint8_t* last_in; /* == next_in + avail_in */ } BrotliBitReader; typedef struct { @@ -64,12 +68,28 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* br); BROTLI_INTERNAL BROTLI_NOINLINE BROTLI_BOOL BrotliSafeReadBits32Slow( BrotliBitReader* br, brotli_reg_t n_bits, brotli_reg_t* val); +static BROTLI_INLINE size_t +BrotliBitReaderGetAvailIn(BrotliBitReader* const br) { + return (size_t)(br->last_in - br->next_in); +} + static BROTLI_INLINE void BrotliBitReaderSaveState( BrotliBitReader* const from, BrotliBitReaderState* to) { to->val_ = from->val_; to->bit_pos_ = from->bit_pos_; to->next_in = from->next_in; - to->avail_in = from->avail_in; + to->avail_in = BrotliBitReaderGetAvailIn(from); +} + +static BROTLI_INLINE void BrotliBitReaderSetInput( + BrotliBitReader* const br, const uint8_t* next_in, size_t avail_in) { + br->next_in = next_in; + br->last_in = next_in + avail_in; + if (avail_in + 1 > BROTLI_FAST_INPUT_SLACK) { + br->guard_in = next_in + (avail_in + 1 - BROTLI_FAST_INPUT_SLACK); + } else { + br->guard_in = next_in; + } } static BROTLI_INLINE void BrotliBitReaderRestoreState( @@ -77,7 +97,7 @@ static BROTLI_INLINE void BrotliBitReaderRestoreState( to->val_ = from->val_; to->bit_pos_ = from->bit_pos_; to->next_in = from->next_in; - to->avail_in = from->avail_in; + BrotliBitReaderSetInput(to, from->next_in, from->avail_in); } static BROTLI_INLINE brotli_reg_t BrotliGetAvailableBits( @@ -90,15 +110,16 @@ static BROTLI_INLINE brotli_reg_t BrotliGetAvailableBits( maximal ring-buffer size (larger number won't be utilized anyway). */ static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) { static const size_t kCap = (size_t)1 << BROTLI_LARGE_MAX_WBITS; - if (br->avail_in > kCap) return kCap; - return br->avail_in + (BrotliGetAvailableBits(br) >> 3); + size_t avail_in = BrotliBitReaderGetAvailIn(br); + if (avail_in > kCap) return kCap; + return avail_in + (BrotliGetAvailableBits(br) >> 3); } /* Checks if there is at least |num| bytes left in the input ring-buffer (excluding the bits remaining in br->val_). */ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount( - BrotliBitReader* const br, size_t num) { - return TO_BROTLI_BOOL(br->avail_in >= num); + BrotliBitReader* const br) { + return TO_BROTLI_BOOL(br->next_in < br->guard_in); } /* Guarantees that there are at least |n_bits| + 1 bits in accumulator. @@ -116,7 +137,6 @@ static BROTLI_INLINE void BrotliFillBitWindow( (br->val_ >> 56) | (BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 8); br->bit_pos_ = bit_pos ^ 56; /* here same as -= 56 because of the if condition */ - br->avail_in -= 7; br->next_in += 7; } } else if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) && @@ -127,7 +147,6 @@ static BROTLI_INLINE void BrotliFillBitWindow( (br->val_ >> 48) | (BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 16); br->bit_pos_ = bit_pos ^ 48; /* here same as -= 48 because of the if condition */ - br->avail_in -= 6; br->next_in += 6; } } else { @@ -137,7 +156,6 @@ static BROTLI_INLINE void BrotliFillBitWindow( (((uint64_t)BROTLI_UNALIGNED_LOAD32LE(br->next_in)) << 32); br->bit_pos_ = bit_pos ^ 32; /* here same as -= 32 because of the if condition */ - br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ; br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ; } } @@ -150,7 +168,6 @@ static BROTLI_INLINE void BrotliFillBitWindow( (br->val_ >> 24) | (BROTLI_UNALIGNED_LOAD32LE(br->next_in) << 8); br->bit_pos_ = bit_pos ^ 24; /* here same as -= 24 because of the if condition */ - br->avail_in -= 3; br->next_in += 3; } } else { @@ -160,7 +177,6 @@ static BROTLI_INLINE void BrotliFillBitWindow( (((brotli_reg_t)BROTLI_UNALIGNED_LOAD16LE(br->next_in)) << 16); br->bit_pos_ = bit_pos ^ 16; /* here same as -= 16 because of the if condition */ - br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ; br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ; } } @@ -176,7 +192,7 @@ static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) { /* Tries to pull one byte of input to accumulator. Returns BROTLI_FALSE if there is no input available. */ static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) { - if (br->avail_in == 0) { + if (br->next_in == br->last_in) { return BROTLI_FALSE; } br->val_ >>= 8; @@ -186,7 +202,6 @@ static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) { br->val_ |= ((brotli_reg_t)*br->next_in) << 24; #endif br->bit_pos_ -= 8; - --br->avail_in; ++br->next_in; return BROTLI_TRUE; } @@ -236,7 +251,6 @@ static BROTLI_INLINE void BrotliDropBits( static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) { brotli_reg_t unused_bytes = BrotliGetAvailableBits(br) >> 3; brotli_reg_t unused_bits = unused_bytes << 3; - br->avail_in += unused_bytes; br->next_in -= unused_bytes; if (unused_bits == sizeof(br->val_) << 3) { br->val_ = 0; @@ -248,11 +262,13 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) { /* Reads the specified number of bits from |br| and advances the bit pos. Precondition: accumulator MUST contain at least |n_bits|. */ -static BROTLI_INLINE void BrotliTakeBits( - BrotliBitReader* const br, brotli_reg_t n_bits, brotli_reg_t* val) { +static BROTLI_INLINE void BrotliTakeBits(BrotliBitReader* const br, + brotli_reg_t n_bits, + brotli_reg_t* val) { *val = BrotliGetBitsUnmasked(br) & BitMask(n_bits); BROTLI_LOG(("[BrotliTakeBits] %d %d %d val: %6x\n", - (int)br->avail_in, (int)br->bit_pos_, (int)n_bits, (int)*val)); + (int)BrotliBitReaderGetAvailIn(br), (int)br->bit_pos_, + (int)n_bits, (int)*val)); BrotliDropBits(br, n_bits); } @@ -342,7 +358,6 @@ static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) { } static BROTLI_INLINE void BrotliDropBytes(BrotliBitReader* br, size_t num) { - br->avail_in -= num; br->next_in += num; } @@ -365,21 +380,24 @@ static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest, BROTLI_UNUSED_FUNCTION void BrotliBitReaderSuppressUnusedFunctions(void) { BROTLI_UNUSED(&BrotliBitReaderSuppressUnusedFunctions); - BROTLI_UNUSED(&BrotliBitReaderSaveState); + + BROTLI_UNUSED(&BrotliBitReaderGetAvailIn); BROTLI_UNUSED(&BrotliBitReaderRestoreState); - BROTLI_UNUSED(&BrotliGetRemainingBytes); + BROTLI_UNUSED(&BrotliBitReaderSaveState); + BROTLI_UNUSED(&BrotliBitReaderSetInput); + BROTLI_UNUSED(&BrotliBitReaderUnload); BROTLI_UNUSED(&BrotliCheckInputAmount); + BROTLI_UNUSED(&BrotliCopyBytes); BROTLI_UNUSED(&BrotliFillBitWindow16); BROTLI_UNUSED(&BrotliGet16BitsUnmasked); BROTLI_UNUSED(&BrotliGetBits); - BROTLI_UNUSED(&BrotliSafeGetBits); - BROTLI_UNUSED(&BrotliBitReaderUnload); + BROTLI_UNUSED(&BrotliGetRemainingBytes); + BROTLI_UNUSED(&BrotliJumpToByteBoundary); BROTLI_UNUSED(&BrotliReadBits24); BROTLI_UNUSED(&BrotliReadBits32); + BROTLI_UNUSED(&BrotliSafeGetBits); BROTLI_UNUSED(&BrotliSafeReadBits); BROTLI_UNUSED(&BrotliSafeReadBits32); - BROTLI_UNUSED(&BrotliJumpToByteBoundary); - BROTLI_UNUSED(&BrotliCopyBytes); } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/dec/decode.c b/c/dec/decode.c index 6389364..fe8af63 100644 --- a/c/dec/decode.c +++ b/c/dec/decode.c @@ -611,7 +611,7 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths( const HuffmanCode* p = h->table; brotli_reg_t code_len; BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p); - if (!BrotliCheckInputAmount(br, BROTLI_SHORT_FILL_BIT_WINDOW_READ)) { + if (!BrotliCheckInputAmount(br)) { h->symbol = symbol; h->repeat = repeat; h->prev_code_len = prev_code_len; @@ -1876,11 +1876,11 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadCommand( } static BROTLI_INLINE BROTLI_BOOL CheckInputAmount( - int safe, BrotliBitReader* const br, size_t num) { + int safe, BrotliBitReader* const br) { if (safe) { return BROTLI_TRUE; } - return BrotliCheckInputAmount(br, num); + return BrotliCheckInputAmount(br); } #define BROTLI_SAFE(METHOD) \ @@ -1903,7 +1903,7 @@ static BROTLI_INLINE BrotliDecoderErrorCode ProcessCommandsInternal( BrotliBitReader* br = &s->br; int compound_dictionary_size = GetCompoundDictionarySize(s); - if (!CheckInputAmount(safe, br, 28)) { + if (!CheckInputAmount(safe, br)) { result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; } @@ -1928,7 +1928,7 @@ CommandBegin: if (safe) { s->state = BROTLI_STATE_COMMAND_BEGIN; } - if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */ + if (!CheckInputAmount(safe, br)) { s->state = BROTLI_STATE_COMMAND_BEGIN; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1956,7 +1956,7 @@ CommandInner: brotli_reg_t value; PreloadSymbol(safe, s->literal_htree, br, &bits, &value); do { - if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ + if (!CheckInputAmount(safe, br)) { s->state = BROTLI_STATE_COMMAND_INNER; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -1990,7 +1990,7 @@ CommandInner: do { const HuffmanCode* hc; uint8_t context; - if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */ + if (!CheckInputAmount(safe, br)) { s->state = BROTLI_STATE_COMMAND_INNER; result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; @@ -2315,14 +2315,13 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } if (!*available_out) next_out = 0; if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */ - br->avail_in = *available_in; - br->next_in = *next_in; + BrotliBitReaderSetInput(br, *next_in, *available_in); } else { /* At least one byte of input is required. More than one byte of input may be required to complete the transaction -> reading more data must be done in a loop -> do it in a main loop. */ result = BROTLI_DECODER_NEEDS_MORE_INPUT; - br->next_in = &s->buffer.u8[0]; + BrotliBitReaderSetInput(br, &s->buffer.u8[0], s->buffer_length); } /* State machine */ for (;;) { @@ -2339,15 +2338,14 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } } if (s->buffer_length != 0) { /* Used with internal buffer. */ - if (br->avail_in == 0) { + if (br->next_in == br->last_in) { /* Successfully finished read transaction. Accumulator contains less than 8 bits, because internal buffer is expanded byte-by-byte until it is enough to complete read. */ s->buffer_length = 0; /* Switch to input stream and restart. */ result = BROTLI_DECODER_SUCCESS; - br->avail_in = *available_in; - br->next_in = *next_in; + BrotliBitReaderSetInput(br, *next_in, *available_in); continue; } else if (*available_in != 0) { /* Not enough data in buffer, but can take one more byte from @@ -2355,7 +2353,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( result = BROTLI_DECODER_SUCCESS; s->buffer.u8[s->buffer_length] = **next_in; s->buffer_length++; - br->avail_in = s->buffer_length; + BrotliBitReaderSetInput(br, &s->buffer.u8[0], s->buffer_length); (*next_in)++; (*available_in)--; /* Retry with more data in buffer. */ @@ -2366,7 +2364,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } else { /* Input stream doesn't contain enough input. */ /* Copy tail to internal buffer and return. */ *next_in = br->next_in; - *available_in = br->avail_in; + *available_in = BrotliBitReaderGetAvailIn(br); while (*available_in) { s->buffer.u8[s->buffer_length] = **next_in; s->buffer_length++; @@ -2389,7 +2387,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( stream it has less than 8 bits in accumulator, so it is safe to return unused accumulator bits there. */ BrotliBitReaderUnload(br); - *available_in = br->avail_in; + *available_in = BrotliBitReaderGetAvailIn(br); *next_in = br->next_in; } break; @@ -2756,7 +2754,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( } if (s->buffer_length == 0) { BrotliBitReaderUnload(br); - *available_in = br->avail_in; + *available_in = BrotliBitReaderGetAvailIn(br); *next_in = br->next_in; } s->state = BROTLI_STATE_DONE; -- cgit v1.1