aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Kyle <stephen.kyle@arm.com>2018-09-27 12:15:46 +0100
committerEugene Kliuchnikov <eustas@google.com>2018-09-27 13:15:46 +0200
commit9402ac5c08806f4ba400bc390420c29ddd78ecd1 (patch)
tree40dc55ba7b0e88749e220c342290eb29d0d654f7
parent67f059eaf5eb6cf9201ffe7776b48229a442d864 (diff)
downloadbrotli-9402ac5c08806f4ba400bc390420c29ddd78ecd1.zip
brotli-9402ac5c08806f4ba400bc390420c29ddd78ecd1.tar.gz
brotli-9402ac5c08806f4ba400bc390420c29ddd78ecd1.tar.bz2
decode: faster huffman code loading on 32-bit Arm (#703)
* platform: add macro for using the 'aligned' attribute * decode: add accessor macros for HuffmanCode fields Adds a constructor function for building HuffmanCode values so they can be accessed quickly on different architectures. Also adds macros for marking a HuffmanCode table pointer that can be accessed quickly (BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD), adjusting the index into that table (BROTLI_HC_ADJUST_TABLE_INDEX), and getting the .bits or .value fields out of the table at the current index (BROTLI_HC_GET_BITS/VALUE). For example, assuming |table| contains a HuffmanCode pointer: BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table); BROTLI_HC_ADJUST_TABLE_INDEX(table, index_into_table); *bits = BROTLI_HC_GET_BITS(table); *value = BROTLI_HC_GET_VALUE(table); BROTLI_HC_ADJUST_TABLE_INDEX(table, offset); *bits2 = BROTLI_HC_GET_BITS(table); *value2 = BROTLI_HC_GET_VALUE(table); All uses of the HuffmanCode have been updated appropriately. * decode: add alternative accessors for HuffmanCode on Arm AArch32
-rwxr-xr-xc/common/platform.h8
-rw-r--r--c/dec/decode.c81
-rw-r--r--c/dec/huffman.c79
-rw-r--r--c/dec/huffman.h55
4 files changed, 139 insertions, 84 deletions
diff --git a/c/common/platform.h b/c/common/platform.h
index 0d84b19..3945a27 100755
--- a/c/common/platform.h
+++ b/c/common/platform.h
@@ -180,6 +180,12 @@ OR:
#define BROTLI_UNUSED_FUNCTION static BROTLI_INLINE
#endif
+#if BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)
+#define BROTLI_ALIGNED(N) __attribute__((aligned(N)))
+#else
+#define BROTLI_ALIGNED(N)
+#endif
+
#if (defined(__ARM_ARCH) && (__ARM_ARCH == 7)) || \
(defined(M_ARM) && (M_ARM == 7))
#define BROTLI_TARGET_ARMV7
@@ -343,7 +349,7 @@ static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
/* If __attribute__(aligned) is available, use that. Otherwise, memcpy. */
#if BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)
-typedef __attribute__((aligned(1))) uint64_t brotli_unaligned_uint64_t;
+typedef BROTLI_ALIGNED(1) uint64_t brotli_unaligned_uint64_t;
static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
return (uint64_t) ((brotli_unaligned_uint64_t*) p)[0];
diff --git a/c/dec/decode.c b/c/dec/decode.c
index 674b829..1fde329 100644
--- a/c/dec/decode.c
+++ b/c/dec/decode.c
@@ -347,15 +347,17 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE DecodeMetaBlockLength(
static BROTLI_INLINE uint32_t DecodeSymbol(uint32_t bits,
const HuffmanCode* table,
BrotliBitReader* br) {
- table += bits & HUFFMAN_TABLE_MASK;
- if (table->bits > HUFFMAN_TABLE_BITS) {
- uint32_t nbits = table->bits - HUFFMAN_TABLE_BITS;
+ BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+ BROTLI_HC_ADJUST_TABLE_INDEX(table, bits & HUFFMAN_TABLE_MASK);
+ if (BROTLI_HC_FAST_LOAD_BITS(table) > HUFFMAN_TABLE_BITS) {
+ uint32_t nbits = BROTLI_HC_FAST_LOAD_BITS(table) - HUFFMAN_TABLE_BITS;
BrotliDropBits(br, HUFFMAN_TABLE_BITS);
- table += table->value;
- table += (bits >> HUFFMAN_TABLE_BITS) & BitMask(nbits);
+ BROTLI_HC_ADJUST_TABLE_INDEX(table,
+ BROTLI_HC_FAST_LOAD_VALUE(table) +
+ ((bits >> HUFFMAN_TABLE_BITS) & BitMask(nbits)));
}
- BrotliDropBits(br, table->bits);
- return table->value;
+ BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table));
+ return BROTLI_HC_FAST_LOAD_VALUE(table);
}
/* Reads and decodes the next Huffman code from bit-stream.
@@ -371,19 +373,20 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
const HuffmanCode* table, BrotliBitReader* br, uint32_t* result) {
uint32_t val;
uint32_t available_bits = BrotliGetAvailableBits(br);
+ BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
if (available_bits == 0) {
- if (table->bits == 0) {
- *result = table->value;
+ if (BROTLI_HC_FAST_LOAD_BITS(table) == 0) {
+ *result = BROTLI_HC_FAST_LOAD_VALUE(table);
return BROTLI_TRUE;
}
return BROTLI_FALSE; /* No valid bits at all. */
}
val = (uint32_t)BrotliGetBitsUnmasked(br);
- table += val & HUFFMAN_TABLE_MASK;
- if (table->bits <= HUFFMAN_TABLE_BITS) {
- if (table->bits <= available_bits) {
- BrotliDropBits(br, table->bits);
- *result = table->value;
+ BROTLI_HC_ADJUST_TABLE_INDEX(table, val & HUFFMAN_TABLE_MASK);
+ if (BROTLI_HC_FAST_LOAD_BITS(table) <= HUFFMAN_TABLE_BITS) {
+ if (BROTLI_HC_FAST_LOAD_BITS(table) <= available_bits) {
+ BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(table));
+ *result = BROTLI_HC_FAST_LOAD_VALUE(table);
return BROTLI_TRUE;
} else {
return BROTLI_FALSE; /* Not enough bits for the first level. */
@@ -394,15 +397,15 @@ static BROTLI_NOINLINE BROTLI_BOOL SafeDecodeSymbol(
}
/* Speculatively drop HUFFMAN_TABLE_BITS. */
- val = (val & BitMask(table->bits)) >> HUFFMAN_TABLE_BITS;
+ val = (val & BitMask(BROTLI_HC_FAST_LOAD_BITS(table))) >> HUFFMAN_TABLE_BITS;
available_bits -= HUFFMAN_TABLE_BITS;
- table += table->value + val;
- if (available_bits < table->bits) {
+ BROTLI_HC_ADJUST_TABLE_INDEX(table, BROTLI_HC_FAST_LOAD_VALUE(table) + val);
+ if (available_bits < BROTLI_HC_FAST_LOAD_BITS(table)) {
return BROTLI_FALSE; /* Not enough bits for the second level. */
}
- BrotliDropBits(br, HUFFMAN_TABLE_BITS + table->bits);
- *result = table->value;
+ BrotliDropBits(br, HUFFMAN_TABLE_BITS + BROTLI_HC_FAST_LOAD_BITS(table));
+ *result = BROTLI_HC_FAST_LOAD_VALUE(table);
return BROTLI_TRUE;
}
@@ -425,9 +428,10 @@ static BROTLI_INLINE void PreloadSymbol(int safe,
if (safe) {
return;
}
- table += BrotliGetBits(br, HUFFMAN_TABLE_BITS);
- *bits = table->bits;
- *value = table->value;
+ BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+ BROTLI_HC_ADJUST_TABLE_INDEX(table, BrotliGetBits(br, HUFFMAN_TABLE_BITS));
+ *bits = BROTLI_HC_FAST_LOAD_BITS(table);
+ *value = BROTLI_HC_FAST_LOAD_VALUE(table);
}
/* Decodes the next Huffman code using data prepared by PreloadSymbol.
@@ -441,10 +445,11 @@ static BROTLI_INLINE uint32_t ReadPreloadedSymbol(const HuffmanCode* table,
uint32_t val = BrotliGet16BitsUnmasked(br);
const HuffmanCode* ext = table + (val & HUFFMAN_TABLE_MASK) + *value;
uint32_t mask = BitMask((*bits - HUFFMAN_TABLE_BITS));
+ BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(ext);
BrotliDropBits(br, HUFFMAN_TABLE_BITS);
- ext += (val >> HUFFMAN_TABLE_BITS) & mask;
- BrotliDropBits(br, ext->bits);
- result = ext->value;
+ BROTLI_HC_ADJUST_TABLE_INDEX(ext, (val >> HUFFMAN_TABLE_BITS) & mask);
+ BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(ext));
+ result = BROTLI_HC_FAST_LOAD_VALUE(ext);
} else {
BrotliDropBits(br, *bits);
}
@@ -597,6 +602,7 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths(
while (symbol < alphabet_size && space > 0) {
const HuffmanCode* p = s->table;
uint32_t code_len;
+ BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p);
if (!BrotliCheckInputAmount(br, BROTLI_SHORT_FILL_BIT_WINDOW_READ)) {
s->symbol = symbol;
s->repeat = repeat;
@@ -606,10 +612,10 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths(
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
BrotliFillBitWindow16(br);
- p += BrotliGetBitsUnmasked(br) &
- BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
- BrotliDropBits(br, p->bits); /* Use 1..5 bits. */
- code_len = p->value; /* code_len == 0..17 */
+ BROTLI_HC_ADJUST_TABLE_INDEX(p, BrotliGetBitsUnmasked(br) &
+ BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH));
+ BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p)); /* Use 1..5 bits. */
+ code_len = BROTLI_HC_FAST_LOAD_VALUE(p); /* code_len == 0..17 */
if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
ProcessSingleCodeLength(code_len, &symbol, &repeat, &space,
&prev_code_len, symbol_lists, code_length_histo, next_symbol);
@@ -637,31 +643,34 @@ static BrotliDecoderErrorCode SafeReadSymbolCodeLengths(
uint32_t code_len;
uint32_t available_bits;
uint32_t bits = 0;
+ BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p);
if (get_byte && !BrotliPullByte(br)) return BROTLI_DECODER_NEEDS_MORE_INPUT;
get_byte = BROTLI_FALSE;
available_bits = BrotliGetAvailableBits(br);
if (available_bits != 0) {
bits = (uint32_t)BrotliGetBitsUnmasked(br);
}
- p += bits & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
- if (p->bits > available_bits) {
+ BROTLI_HC_ADJUST_TABLE_INDEX(p,
+ bits & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH));
+ if (BROTLI_HC_FAST_LOAD_BITS(p) > available_bits) {
get_byte = BROTLI_TRUE;
continue;
}
- code_len = p->value; /* code_len == 0..17 */
+ code_len = BROTLI_HC_FAST_LOAD_VALUE(p); /* code_len == 0..17 */
if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
- BrotliDropBits(br, p->bits);
+ BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p));
ProcessSingleCodeLength(code_len, &s->symbol, &s->repeat, &s->space,
&s->prev_code_len, s->symbol_lists, s->code_length_histo,
s->next_symbol);
} else { /* code_len == 16..17, extra_bits == 2..3 */
uint32_t extra_bits = code_len - 14U;
- uint32_t repeat_delta = (bits >> p->bits) & BitMask(extra_bits);
- if (available_bits < p->bits + extra_bits) {
+ uint32_t repeat_delta = (bits >> BROTLI_HC_FAST_LOAD_BITS(p)) &
+ BitMask(extra_bits);
+ if (available_bits < BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits) {
get_byte = BROTLI_TRUE;
continue;
}
- BrotliDropBits(br, p->bits + extra_bits);
+ BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits);
ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size,
&s->symbol, &s->repeat, &s->space, &s->prev_code_len,
&s->repeat_code_len, s->symbol_lists, s->code_length_histo,
diff --git a/c/dec/huffman.c b/c/dec/huffman.c
index c5eafad..9deb42c 100644
--- a/c/dec/huffman.c
+++ b/c/dec/huffman.c
@@ -142,8 +142,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
/* Special case: all symbols but one have 0 code length. */
if (offset[0] == 0) {
- code.bits = 0;
- code.value = (uint16_t)sorted[0];
+ code = ConstructHuffmanCode(0, (uint16_t)sorted[0]);
for (key = 0; key < (brotli_reg_t)table_size; ++key) {
table[key] = code;
}
@@ -157,9 +156,9 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
bits = 1;
step = 2;
do {
- code.bits = (uint8_t)bits;
for (bits_count = count[bits]; bits_count != 0; --bits_count) {
- code.value = (uint16_t)sorted[symbol++];
+ code = ConstructHuffmanCode((uint8_t)bits,
+ (uint16_t)sorted[symbol++]);
ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
key += key_step;
}
@@ -211,11 +210,10 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
bits = 1;
step = 2;
do {
- code.bits = (uint8_t)bits;
symbol = bits - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
for (bits_count = count[bits]; bits_count != 0; --bits_count) {
symbol = symbol_lists[symbol];
- code.value = (uint16_t)symbol;
+ code = ConstructHuffmanCode((uint8_t)bits, (uint16_t)symbol);
ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
key += key_step;
}
@@ -244,14 +242,14 @@ uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
total_size += table_size;
sub_key = BrotliReverseBits(key);
key += key_step;
- root_table[sub_key].bits = (uint8_t)(table_bits + root_bits);
- root_table[sub_key].value =
- (uint16_t)(((size_t)(table - root_table)) - sub_key);
+ root_table[sub_key] = ConstructHuffmanCode(
+ (uint8_t)(table_bits + root_bits),
+ (uint16_t)(((size_t)(table - root_table)) - sub_key));
sub_key = 0;
}
- code.bits = (uint8_t)(len - root_bits);
symbol = symbol_lists[symbol];
- code.value = (uint16_t)symbol;
+ code = ConstructHuffmanCode((uint8_t)(len - root_bits),
+ (uint16_t)symbol);
ReplicateValue(
&table[BrotliReverseBits(sub_key)], step, table_size, code);
sub_key += sub_key_step;
@@ -270,35 +268,28 @@ uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
const uint32_t goal_size = 1U << root_bits;
switch (num_symbols) {
case 0:
- table[0].bits = 0;
- table[0].value = val[0];
+ table[0] = ConstructHuffmanCode(0, val[0]);
break;
case 1:
- table[0].bits = 1;
- table[1].bits = 1;
if (val[1] > val[0]) {
- table[0].value = val[0];
- table[1].value = val[1];
+ table[0] = ConstructHuffmanCode(1, val[0]);
+ table[1] = ConstructHuffmanCode(1, val[1]);
} else {
- table[0].value = val[1];
- table[1].value = val[0];
+ table[0] = ConstructHuffmanCode(1, val[1]);
+ table[1] = ConstructHuffmanCode(1, val[0]);
}
table_size = 2;
break;
case 2:
- table[0].bits = 1;
- table[0].value = val[0];
- table[2].bits = 1;
- table[2].value = val[0];
+ table[0] = ConstructHuffmanCode(1, val[0]);
+ table[2] = ConstructHuffmanCode(1, val[0]);
if (val[2] > val[1]) {
- table[1].value = val[1];
- table[3].value = val[2];
+ table[1] = ConstructHuffmanCode(2, val[1]);
+ table[3] = ConstructHuffmanCode(2, val[2]);
} else {
- table[1].value = val[2];
- table[3].value = val[1];
+ table[1] = ConstructHuffmanCode(2, val[2]);
+ table[3] = ConstructHuffmanCode(2, val[1]);
}
- table[1].bits = 2;
- table[3].bits = 2;
table_size = 4;
break;
case 3: {
@@ -312,33 +303,27 @@ uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
}
}
}
- for (i = 0; i < 4; ++i) {
- table[i].bits = 2;
- }
- table[0].value = val[0];
- table[2].value = val[1];
- table[1].value = val[2];
- table[3].value = val[3];
+ table[0] = ConstructHuffmanCode(2, val[0]);
+ table[2] = ConstructHuffmanCode(2, val[1]);
+ table[1] = ConstructHuffmanCode(2, val[2]);
+ table[3] = ConstructHuffmanCode(2, val[3]);
table_size = 4;
break;
}
case 4: {
- int i;
if (val[3] < val[2]) {
uint16_t t = val[3];
val[3] = val[2];
val[2] = t;
}
- for (i = 0; i < 7; ++i) {
- table[i].value = val[0];
- table[i].bits = (uint8_t)(1 + (i & 1));
- }
- table[1].value = val[1];
- table[3].value = val[2];
- table[5].value = val[1];
- table[7].value = val[3];
- table[3].bits = 3;
- table[7].bits = 3;
+ table[0] = ConstructHuffmanCode(1, val[0]);
+ table[1] = ConstructHuffmanCode(2, val[1]);
+ table[2] = ConstructHuffmanCode(1, val[0]);
+ table[3] = ConstructHuffmanCode(3, val[2]);
+ table[4] = ConstructHuffmanCode(1, val[0]);
+ table[5] = ConstructHuffmanCode(2, val[1]);
+ table[6] = ConstructHuffmanCode(1, val[0]);
+ table[7] = ConstructHuffmanCode(3, val[3]);
table_size = 8;
break;
}
diff --git a/c/dec/huffman.h b/c/dec/huffman.h
index 521ec6e..b9f0716 100644
--- a/c/dec/huffman.h
+++ b/c/dec/huffman.h
@@ -33,11 +33,66 @@ static const uint16_t kMaxHuffmanTableSize[] = {
#define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5
+#if ((defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_32)) && \
+ BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0))
+#define BROTLI_HUFFMAN_CODE_FAST_LOAD
+#endif
+
+#if !defined(BROTLI_HUFFMAN_CODE_FAST_LOAD)
+/* Do not create this struct directly - use the ConstructHuffmanCode
+ * constructor below! */
typedef struct {
uint8_t bits; /* number of bits used for this symbol */
uint16_t value; /* symbol value or table offset */
} HuffmanCode;
+static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
+ const uint16_t value) {
+ HuffmanCode h;
+ h.bits = bits;
+ h.value = value;
+ return h;
+}
+
+/* Please use the following macros to optimize HuffmanCode accesses in hot
+ * paths.
+ *
+ * For example, assuming |table| contains a HuffmanCode pointer:
+ *
+ * BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
+ * BROTLI_HC_ADJUST_TABLE_INDEX(table, index_into_table);
+ * *bits = BROTLI_HC_GET_BITS(table);
+ * *value = BROTLI_HC_GET_VALUE(table);
+ * BROTLI_HC_ADJUST_TABLE_INDEX(table, offset);
+ * *bits2 = BROTLI_HC_GET_BITS(table);
+ * *value2 = BROTLI_HC_GET_VALUE(table);
+ *
+ */
+
+#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H)
+#define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V)
+
+/* These must be given a HuffmanCode pointer! */
+#define BROTLI_HC_FAST_LOAD_BITS(H) (H->bits)
+#define BROTLI_HC_FAST_LOAD_VALUE(H) (H->value)
+
+#else /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
+
+typedef BROTLI_ALIGNED(4) uint32_t HuffmanCode;
+
+static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
+ const uint16_t value) {
+ return ((value & 0xFFFF) << 16) | (bits & 0xFF);
+}
+
+#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H) uint32_t __fastload_##H = (*H)
+#define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V); __fastload_##H = (*H)
+
+/* These must be given a HuffmanCode pointer! */
+#define BROTLI_HC_FAST_LOAD_BITS(H) ((__fastload_##H) & 0xFF)
+#define BROTLI_HC_FAST_LOAD_VALUE(H) ((__fastload_##H) >> 16)
+#endif /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
+
/* Builds Huffman lookup table assuming code lengths are in symbol order. */
BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table,
const uint8_t* const code_lengths, uint16_t* count);