diff options
author | Stephen Kyle <stephen.kyle@arm.com> | 2018-09-27 12:15:46 +0100 |
---|---|---|
committer | Eugene Kliuchnikov <eustas@google.com> | 2018-09-27 13:15:46 +0200 |
commit | 9402ac5c08806f4ba400bc390420c29ddd78ecd1 (patch) | |
tree | 40dc55ba7b0e88749e220c342290eb29d0d654f7 /c/dec | |
parent | 67f059eaf5eb6cf9201ffe7776b48229a442d864 (diff) | |
download | brotli-9402ac5c08806f4ba400bc390420c29ddd78ecd1.zip brotli-9402ac5c08806f4ba400bc390420c29ddd78ecd1.tar.gz brotli-9402ac5c08806f4ba400bc390420c29ddd78ecd1.tar.bz2 |
decode: faster huffman code loading on 32-bit Arm (#703)
* platform: add macro for using the 'aligned' attribute
* decode: add accessor macros for HuffmanCode fields
Adds a constructor function for building HuffmanCode values
so they can be accessed quickly on different architectures.
Also adds macros for marking a HuffmanCode table pointer
that can be accessed quickly (BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD),
adjusting the index into that table (BROTLI_HC_ADJUST_TABLE_INDEX),
and getting the .bits or .value fields out of the table at the
current index (BROTLI_HC_GET_BITS/VALUE).
For example, assuming |table| contains a HuffmanCode pointer:
BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
BROTLI_HC_ADJUST_TABLE_INDEX(table, index_into_table);
*bits = BROTLI_HC_GET_BITS(table);
*value = BROTLI_HC_GET_VALUE(table);
BROTLI_HC_ADJUST_TABLE_INDEX(table, offset);
*bits2 = BROTLI_HC_GET_BITS(table);
*value2 = BROTLI_HC_GET_VALUE(table);
All uses of the HuffmanCode have been updated appropriately.
* decode: add alternative accessors for HuffmanCode on Arm AArch32
Diffstat (limited to 'c/dec')
-rw-r--r-- | c/dec/decode.c | 81 | ||||
-rw-r--r-- | c/dec/huffman.c | 79 | ||||
-rw-r--r-- | c/dec/huffman.h | 55 |
3 files changed, 132 insertions, 83 deletions
diff --git a/c/dec/decode.c b/c/dec/decode.c index 674b829..1fde329 100644 --- a/c/dec/decode.c +++ b/c/dec/decode.c @@ -347,15 +347,17 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE DecodeMetaBlockLength( static BROTLI_INLINE uint32_t DecodeSymbol(uint32_t bits, const HuffmanCode* table, BrotliBitReader* br) { - table += bits & HUFFMAN_TABLE_MASK; - if (table->bits > HUFFMAN_TABLE_BITS) { - uint32_t nbits = table->bits - HUFFMAN_TABLE_BITS; + BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table); + BROTLI_HC_ADJUST_TABLE_INDEX(table, bits & HUFFMAN_TABLE_MASK); + if (BROTLI_HC_FAST_LOAD_BITS(table) > HUFFMAN_TABLE_BITS) { + uint32_t nbits = BROTLI_HC_FAST_LOAD_BITS(table) - HUFFMAN_TABLE_BITS; BrotliDropBits(br, HUFFMAN_TABLE_BITS); - table += table->value; - table += (bits >> HUFFMAN_TABLE_BITS) & BitMask(nbits); + BROTLI_HC_ADJUST_TABLE_INDEX(table, + BROTLI_HC_FAST_LOAD_VALUE(table) + + ((bits >> HUFFMAN_TABLE_BITS) & BitMask(nbits))); } - BrotliDropBits(br, table->bits); - return table->value; + BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table)); + return BROTLI_HC_FAST_LOAD_VALUE(table); } /* Reads and decodes the next Huffman code from bit-stream. @@ -371,19 +373,20 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( const HuffmanCode* table, BrotliBitReader* br, uint32_t* result) { uint32_t val; uint32_t available_bits = BrotliGetAvailableBits(br); + BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table); if (available_bits == 0) { - if (table->bits == 0) { - *result = table->value; + if (BROTLI_HC_FAST_LOAD_BITS(table) == 0) { + *result = BROTLI_HC_FAST_LOAD_VALUE(table); return BROTLI_TRUE; } return BROTLI_FALSE; /* No valid bits at all. */ } val = (uint32_t)BrotliGetBitsUnmasked(br); - table += val & HUFFMAN_TABLE_MASK; - if (table->bits <= HUFFMAN_TABLE_BITS) { - if (table->bits <= available_bits) { - BrotliDropBits(br, table->bits); - *result = table->value; + BROTLI_HC_ADJUST_TABLE_INDEX(table, val & HUFFMAN_TABLE_MASK); + if (BROTLI_HC_FAST_LOAD_BITS(table) <= HUFFMAN_TABLE_BITS) { + if (BROTLI_HC_FAST_LOAD_BITS(table) <= available_bits) { + BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table)); + *result = BROTLI_HC_FAST_LOAD_VALUE(table); return BROTLI_TRUE; } else { return BROTLI_FALSE; /* Not enough bits for the first level. */ @@ -394,15 +397,15 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol( } /* Speculatively drop HUFFMAN_TABLE_BITS. */ - val = (val & BitMask(table->bits)) >> HUFFMAN_TABLE_BITS; + val = (val & BitMask(BROTLI_HC_FAST_LOAD_BITS(table))) >> HUFFMAN_TABLE_BITS; available_bits -= HUFFMAN_TABLE_BITS; - table += table->value + val; - if (available_bits < table->bits) { + BROTLI_HC_ADJUST_TABLE_INDEX(table, BROTLI_HC_FAST_LOAD_VALUE(table) + val); + if (available_bits < BROTLI_HC_FAST_LOAD_BITS(table)) { return BROTLI_FALSE; /* Not enough bits for the second level. */ } - BrotliDropBits(br, HUFFMAN_TABLE_BITS + table->bits); - *result = table->value; + BrotliDropBits(br, HUFFMAN_TABLE_BITS + BROTLI_HC_FAST_LOAD_BITS(table)); + *result = BROTLI_HC_FAST_LOAD_VALUE(table); return BROTLI_TRUE; } @@ -425,9 +428,10 @@ static BROTLI_INLINE void PreloadSymbol(int safe, if (safe) { return; } - table += BrotliGetBits(br, HUFFMAN_TABLE_BITS); - *bits = table->bits; - *value = table->value; + BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table); + BROTLI_HC_ADJUST_TABLE_INDEX(table, BrotliGetBits(br, HUFFMAN_TABLE_BITS)); + *bits = BROTLI_HC_FAST_LOAD_BITS(table); + *value = BROTLI_HC_FAST_LOAD_VALUE(table); } /* Decodes the next Huffman code using data prepared by PreloadSymbol. @@ -441,10 +445,11 @@ static BROTLI_INLINE uint32_t ReadPreloadedSymbol(const HuffmanCode* table, uint32_t val = BrotliGet16BitsUnmasked(br); const HuffmanCode* ext = table + (val & HUFFMAN_TABLE_MASK) + *value; uint32_t mask = BitMask((*bits - HUFFMAN_TABLE_BITS)); + BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(ext); BrotliDropBits(br, HUFFMAN_TABLE_BITS); - ext += (val >> HUFFMAN_TABLE_BITS) & mask; - BrotliDropBits(br, ext->bits); - result = ext->value; + BROTLI_HC_ADJUST_TABLE_INDEX(ext, (val >> HUFFMAN_TABLE_BITS) & mask); + BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(ext)); + result = BROTLI_HC_FAST_LOAD_VALUE(ext); } else { BrotliDropBits(br, *bits); } @@ -597,6 +602,7 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths( while (symbol < alphabet_size && space > 0) { const HuffmanCode* p = s->table; uint32_t code_len; + BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p); if (!BrotliCheckInputAmount(br, BROTLI_SHORT_FILL_BIT_WINDOW_READ)) { s->symbol = symbol; s->repeat = repeat; @@ -606,10 +612,10 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths( return BROTLI_DECODER_NEEDS_MORE_INPUT; } BrotliFillBitWindow16(br); - p += BrotliGetBitsUnmasked(br) & - BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH); - BrotliDropBits(br, p->bits); /* Use 1..5 bits. */ - code_len = p->value; /* code_len == 0..17 */ + BROTLI_HC_ADJUST_TABLE_INDEX(p, BrotliGetBitsUnmasked(br) & + BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH)); + BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p)); /* Use 1..5 bits. */ + code_len = BROTLI_HC_FAST_LOAD_VALUE(p); /* code_len == 0..17 */ if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) { ProcessSingleCodeLength(code_len, &symbol, &repeat, &space, &prev_code_len, symbol_lists, code_length_histo, next_symbol); @@ -637,31 +643,34 @@ static BrotliDecoderErrorCode SafeReadSymbolCodeLengths( uint32_t code_len; uint32_t available_bits; uint32_t bits = 0; + BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p); if (get_byte && !BrotliPullByte(br)) return BROTLI_DECODER_NEEDS_MORE_INPUT; get_byte = BROTLI_FALSE; available_bits = BrotliGetAvailableBits(br); if (available_bits != 0) { bits = (uint32_t)BrotliGetBitsUnmasked(br); } - p += bits & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH); - if (p->bits > available_bits) { + BROTLI_HC_ADJUST_TABLE_INDEX(p, + bits & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH)); + if (BROTLI_HC_FAST_LOAD_BITS(p) > available_bits) { get_byte = BROTLI_TRUE; continue; } - code_len = p->value; /* code_len == 0..17 */ + code_len = BROTLI_HC_FAST_LOAD_VALUE(p); /* code_len == 0..17 */ if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) { - BrotliDropBits(br, p->bits); + BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p)); ProcessSingleCodeLength(code_len, &s->symbol, &s->repeat, &s->space, &s->prev_code_len, s->symbol_lists, s->code_length_histo, s->next_symbol); } else { /* code_len == 16..17, extra_bits == 2..3 */ uint32_t extra_bits = code_len - 14U; - uint32_t repeat_delta = (bits >> p->bits) & BitMask(extra_bits); - if (available_bits < p->bits + extra_bits) { + uint32_t repeat_delta = (bits >> BROTLI_HC_FAST_LOAD_BITS(p)) & + BitMask(extra_bits); + if (available_bits < BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits) { get_byte = BROTLI_TRUE; continue; } - BrotliDropBits(br, p->bits + extra_bits); + BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits); ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size, &s->symbol, &s->repeat, &s->space, &s->prev_code_len, &s->repeat_code_len, s->symbol_lists, s->code_length_histo, diff --git a/c/dec/huffman.c b/c/dec/huffman.c index c5eafad..9deb42c 100644 --- a/c/dec/huffman.c +++ b/c/dec/huffman.c @@ -142,8 +142,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, /* Special case: all symbols but one have 0 code length. */ if (offset[0] == 0) { - code.bits = 0; - code.value = (uint16_t)sorted[0]; + code = ConstructHuffmanCode(0, (uint16_t)sorted[0]); for (key = 0; key < (brotli_reg_t)table_size; ++key) { table[key] = code; } @@ -157,9 +156,9 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, bits = 1; step = 2; do { - code.bits = (uint8_t)bits; for (bits_count = count[bits]; bits_count != 0; --bits_count) { - code.value = (uint16_t)sorted[symbol++]; + code = ConstructHuffmanCode((uint8_t)bits, + (uint16_t)sorted[symbol++]); ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code); key += key_step; } @@ -211,11 +210,10 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, bits = 1; step = 2; do { - code.bits = (uint8_t)bits; symbol = bits - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1); for (bits_count = count[bits]; bits_count != 0; --bits_count) { symbol = symbol_lists[symbol]; - code.value = (uint16_t)symbol; + code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)symbol); ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code); key += key_step; } @@ -244,14 +242,14 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table, total_size += table_size; sub_key = BrotliReverseBits(key); key += key_step; - root_table[sub_key].bits = (uint8_t)(table_bits + root_bits); - root_table[sub_key].value = - (uint16_t)(((size_t)(table - root_table)) - sub_key); + root_table[sub_key] = ConstructHuffmanCode( + (uint8_t)(table_bits + root_bits), + (uint16_t)(((size_t)(table - root_table)) - sub_key)); sub_key = 0; } - code.bits = (uint8_t)(len - root_bits); symbol = symbol_lists[symbol]; - code.value = (uint16_t)symbol; + code = ConstructHuffmanCode((uint8_t)(len - root_bits), + (uint16_t)symbol); ReplicateValue( &table[BrotliReverseBits(sub_key)], step, table_size, code); sub_key += sub_key_step; @@ -270,35 +268,28 @@ uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table, const uint32_t goal_size = 1U << root_bits; switch (num_symbols) { case 0: - table[0].bits = 0; - table[0].value = val[0]; + table[0] = ConstructHuffmanCode(0, val[0]); break; case 1: - table[0].bits = 1; - table[1].bits = 1; if (val[1] > val[0]) { - table[0].value = val[0]; - table[1].value = val[1]; + table[0] = ConstructHuffmanCode(1, val[0]); + table[1] = ConstructHuffmanCode(1, val[1]); } else { - table[0].value = val[1]; - table[1].value = val[0]; + table[0] = ConstructHuffmanCode(1, val[1]); + table[1] = ConstructHuffmanCode(1, val[0]); } table_size = 2; break; case 2: - table[0].bits = 1; - table[0].value = val[0]; - table[2].bits = 1; - table[2].value = val[0]; + table[0] = ConstructHuffmanCode(1, val[0]); + table[2] = ConstructHuffmanCode(1, val[0]); if (val[2] > val[1]) { - table[1].value = val[1]; - table[3].value = val[2]; + table[1] = ConstructHuffmanCode(2, val[1]); + table[3] = ConstructHuffmanCode(2, val[2]); } else { - table[1].value = val[2]; - table[3].value = val[1]; + table[1] = ConstructHuffmanCode(2, val[2]); + table[3] = ConstructHuffmanCode(2, val[1]); } - table[1].bits = 2; - table[3].bits = 2; table_size = 4; break; case 3: { @@ -312,33 +303,27 @@ uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table, } } } - for (i = 0; i < 4; ++i) { - table[i].bits = 2; - } - table[0].value = val[0]; - table[2].value = val[1]; - table[1].value = val[2]; - table[3].value = val[3]; + table[0] = ConstructHuffmanCode(2, val[0]); + table[2] = ConstructHuffmanCode(2, val[1]); + table[1] = ConstructHuffmanCode(2, val[2]); + table[3] = ConstructHuffmanCode(2, val[3]); table_size = 4; break; } case 4: { - int i; if (val[3] < val[2]) { uint16_t t = val[3]; val[3] = val[2]; val[2] = t; } - for (i = 0; i < 7; ++i) { - table[i].value = val[0]; - table[i].bits = (uint8_t)(1 + (i & 1)); - } - table[1].value = val[1]; - table[3].value = val[2]; - table[5].value = val[1]; - table[7].value = val[3]; - table[3].bits = 3; - table[7].bits = 3; + table[0] = ConstructHuffmanCode(1, val[0]); + table[1] = ConstructHuffmanCode(2, val[1]); + table[2] = ConstructHuffmanCode(1, val[0]); + table[3] = ConstructHuffmanCode(3, val[2]); + table[4] = ConstructHuffmanCode(1, val[0]); + table[5] = ConstructHuffmanCode(2, val[1]); + table[6] = ConstructHuffmanCode(1, val[0]); + table[7] = ConstructHuffmanCode(3, val[3]); table_size = 8; break; } diff --git a/c/dec/huffman.h b/c/dec/huffman.h index 521ec6e..b9f0716 100644 --- a/c/dec/huffman.h +++ b/c/dec/huffman.h @@ -33,11 +33,66 @@ static const uint16_t kMaxHuffmanTableSize[] = { #define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5 +#if ((defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_32)) && \ + BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)) +#define BROTLI_HUFFMAN_CODE_FAST_LOAD +#endif + +#if !defined(BROTLI_HUFFMAN_CODE_FAST_LOAD) +/* Do not create this struct directly - use the ConstructHuffmanCode + * constructor below! */ typedef struct { uint8_t bits; /* number of bits used for this symbol */ uint16_t value; /* symbol value or table offset */ } HuffmanCode; +static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits, + const uint16_t value) { + HuffmanCode h; + h.bits = bits; + h.value = value; + return h; +} + +/* Please use the following macros to optimize HuffmanCode accesses in hot + * paths. + * + * For example, assuming |table| contains a HuffmanCode pointer: + * + * BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table); + * BROTLI_HC_ADJUST_TABLE_INDEX(table, index_into_table); + * *bits = BROTLI_HC_GET_BITS(table); + * *value = BROTLI_HC_GET_VALUE(table); + * BROTLI_HC_ADJUST_TABLE_INDEX(table, offset); + * *bits2 = BROTLI_HC_GET_BITS(table); + * *value2 = BROTLI_HC_GET_VALUE(table); + * + */ + +#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H) +#define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V) + +/* These must be given a HuffmanCode pointer! */ +#define BROTLI_HC_FAST_LOAD_BITS(H) (H->bits) +#define BROTLI_HC_FAST_LOAD_VALUE(H) (H->value) + +#else /* BROTLI_HUFFMAN_CODE_FAST_LOAD */ + +typedef BROTLI_ALIGNED(4) uint32_t HuffmanCode; + +static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits, + const uint16_t value) { + return ((value & 0xFFFF) << 16) | (bits & 0xFF); +} + +#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H) uint32_t __fastload_##H = (*H) +#define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V); __fastload_##H = (*H) + +/* These must be given a HuffmanCode pointer! */ +#define BROTLI_HC_FAST_LOAD_BITS(H) ((__fastload_##H) & 0xFF) +#define BROTLI_HC_FAST_LOAD_VALUE(H) ((__fastload_##H) >> 16) +#endif /* BROTLI_HUFFMAN_CODE_FAST_LOAD */ + /* Builds Huffman lookup table assuming code lengths are in symbol order. */ BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table, const uint8_t* const code_lengths, uint16_t* count); |