aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZoltan Szabadka <szabadka@google.com>2015-05-07 16:53:43 +0200
committerZoltan Szabadka <szabadka@google.com>2015-05-07 16:53:43 +0200
commit83aa24dc8686b62177d776a3baab5f69badc1a78 (patch)
tree1ed5472993dd575baaeb0185ffa23a5a40ae86bb
parent47ea76186903fac8c6ed5b3a9f4e6fa5a801f824 (diff)
downloadbrotli-83aa24dc8686b62177d776a3baab5f69badc1a78.zip
brotli-83aa24dc8686b62177d776a3baab5f69badc1a78.tar.gz
brotli-83aa24dc8686b62177d776a3baab5f69badc1a78.tar.bz2
Speed and memory usage improvements for the decoder.
* Change order of members of bit reader state structure. * Remove unused includes for assert. Add BROTLI_DCHECK macros and use it instead of assert. * Do not calculate nbits in common case of ReadSymbol. * Introduce and use PREDICT_TRUE / PREDICT_FALSE macros. * Allocate less memory in the brotli decoder if it knows the result size beforehand. Before this, the decoder would always allocate 16MB if the encoder annotated the window size as 22 bit (which is the default), even if the file is only a few KB uncompressed. Now, it'll only allocate a ringbuffer as large as needed for the result file. But only if it can know the filesize, it's not possible to know that if there are multiple metablocks or too large uncompressed metablock.
-rw-r--r--.gitignore1
-rw-r--r--dec/bit_reader.c4
-rw-r--r--dec/bit_reader.h16
-rw-r--r--dec/decode.c51
-rw-r--r--dec/huffman.c1
-rw-r--r--dec/huffman.h1
-rw-r--r--dec/port.h60
-rw-r--r--dec/safe_malloc.c3
-rw-r--r--dec/safe_malloc.h2
-rw-r--r--enc/port.h12
-rw-r--r--setup.py1
11 files changed, 117 insertions, 35 deletions
diff --git a/.gitignore b/.gitignore
index 83973eb..162f036 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
*.o
+*.pyc
*.txt.uncompressed
*.bro
*.unbro
diff --git a/dec/bit_reader.c b/dec/bit_reader.c
index 8369397..64736ab 100644
--- a/dec/bit_reader.c
+++ b/dec/bit_reader.c
@@ -15,10 +15,10 @@
/* Bit reading helpers */
-#include <assert.h>
#include <stdlib.h>
#include "./bit_reader.h"
+#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -26,7 +26,7 @@ extern "C" {
void BrotliInitBitReader(BrotliBitReader* const br,
BrotliInput input, int finish) {
- assert(br != NULL);
+ BROTLI_DCHECK(br != NULL);
br->finish_ = finish;
br->tmp_bytes_read_ = 0;
diff --git a/dec/bit_reader.h b/dec/bit_reader.h
index 84cddb4..cbd9fae 100644
--- a/dec/bit_reader.h
+++ b/dec/bit_reader.h
@@ -19,6 +19,7 @@
#define BROTLI_DEC_BIT_READER_H_
#include <string.h>
+#include "./port.h"
#include "./streams.h"
#include "./types.h"
@@ -46,11 +47,6 @@ static const uint32_t kBitMask[BROTLI_MAX_NUM_BIT_READ] = {
};
typedef struct {
- /* Input byte buffer, consist of a ringbuffer and a "slack" region where */
- /* bytes from the start of the ringbuffer are copied. */
- uint8_t buf_[BROTLI_IBUF_SIZE];
- uint8_t* buf_ptr_; /* next input will write here */
- BrotliInput input_; /* input callback */
#if (BROTLI_USE_64_BITS)
uint64_t val_; /* pre-fetched bits */
#else
@@ -60,12 +56,18 @@ typedef struct {
uint32_t bit_pos_; /* current bit-reading position in val_ */
uint32_t bit_end_pos_; /* bit-reading end position from LSB of val_ */
int eos_; /* input stream is finished */
+ uint8_t* buf_ptr_; /* next input will write here */
+ BrotliInput input_; /* input callback */
/* Set to 0 to support partial data streaming. Set to 1 to expect full data or
for the last chunk of partial data. */
int finish_;
/* indicates how much bytes already read when reading partial data */
int tmp_bytes_read_;
+
+ /* Input byte buffer, consist of a ringbuffer and a "slack" region where */
+ /* bytes from the start of the ringbuffer are copied. */
+ uint8_t buf_[BROTLI_IBUF_SIZE];
} BrotliBitReader;
/* Initializes the bitreader fields. After this, BrotliWarmupBitReader must
@@ -126,9 +128,9 @@ static BROTLI_INLINE void ShiftBytes32(BrotliBitReader* const br) {
every 32 bytes of input is read.
*/
static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
- if (br->bit_end_pos_ > 256) {
+ if (PREDICT_TRUE(br->bit_end_pos_ > 256)) {
return 1;
- } else if (br->eos_) {
+ } else if (PREDICT_FALSE(br->eos_)) {
return br->bit_pos_ <= br->bit_end_pos_;
} else {
uint8_t* dst = br->buf_ptr_;
diff --git a/dec/decode.c b/dec/decode.c
index e94c340..cc1bab5 100644
--- a/dec/decode.c
+++ b/dec/decode.c
@@ -20,6 +20,7 @@
#include "./context.h"
#include "./decode.h"
#include "./dictionary.h"
+#include "./port.h"
#include "./transform.h"
#include "./huffman.h"
#include "./prefix.h"
@@ -149,9 +150,9 @@ static BROTLI_INLINE int ReadSymbol(const HuffmanCode* table,
int nbits;
BrotliFillBitWindow(br);
table += (int)(br->val_ >> br->bit_pos_) & HUFFMAN_TABLE_MASK;
- nbits = table->bits - HUFFMAN_TABLE_BITS;
- if (nbits > 0) {
+ if (PREDICT_FALSE(table->bits > HUFFMAN_TABLE_BITS)) {
br->bit_pos_ += HUFFMAN_TABLE_BITS;
+ nbits = table->bits - HUFFMAN_TABLE_BITS;
table += table->value;
table += (int)(br->val_ >> br->bit_pos_) & ((1 << nbits) - 1);
}
@@ -938,6 +939,7 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
- flushing the input s->ringbuffer when decoding uncompressed blocks */
static const int kRingBufferWriteAheadSlack = 128 + BROTLI_READ_SIZE;
+ s->br.input_ = input;
s->br.finish_ = finish;
/* State machine */
@@ -979,17 +981,6 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
s->window_bits = DecodeWindowBits(br);
s->max_backward_distance = (1 << s->window_bits) - 16;
- s->ringbuffer_size = 1 << s->window_bits;
- s->ringbuffer_mask = s->ringbuffer_size - 1;
- s->ringbuffer = (uint8_t*)malloc((size_t)(s->ringbuffer_size +
- kRingBufferWriteAheadSlack +
- kMaxDictionaryWordLength));
- if (!s->ringbuffer) {
- result = BROTLI_RESULT_ERROR;
- break;
- }
- s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
-
s->block_type_trees = (HuffmanCode*)malloc(
3 * BROTLI_HUFFMAN_MAX_TABLE_SIZE * sizeof(HuffmanCode));
s->block_len_trees = (HuffmanCode*)malloc(
@@ -1002,10 +993,6 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
s->state = BROTLI_STATE_METABLOCK_BEGIN;
/* No break, continue to next state */
case BROTLI_STATE_METABLOCK_BEGIN:
- if (!BrotliReadMoreInput(br)) {
- result = BROTLI_RESULT_NEEDS_MORE_INPUT;
- break;
- }
if (s->input_end) {
s->partially_written = 0;
s->state = BROTLI_STATE_DONE;
@@ -1058,6 +1045,34 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
break;
}
BROTLI_LOG_UINT(s->meta_block_remaining_len);
+ /* If it is the first metablock, allocate the ringbuffer */
+ if (s->ringbuffer == NULL) {
+ size_t known_size = 0;
+ s->ringbuffer_size = 1 << s->window_bits;
+
+ /* If we know the data size is small, do not allocate more ringbuffer
+ size than needed to reduce memory usage. Since this happens after
+ the first BrotliReadMoreInput call, we can read the bitreader
+ buffer at position 0. */
+ if (BrotliDecompressedSize(BROTLI_READ_SIZE, br->buf_, &known_size)
+ == BROTLI_RESULT_SUCCESS) {
+ while (s->ringbuffer_size >= known_size * 2
+ && s->ringbuffer_size > 0) {
+ s->ringbuffer_size /= 2;
+ }
+ }
+
+ s->ringbuffer_mask = s->ringbuffer_size - 1;
+ s->ringbuffer = (uint8_t*)malloc((size_t)(s->ringbuffer_size +
+ kRingBufferWriteAheadSlack +
+ kMaxDictionaryWordLength));
+ if (!s->ringbuffer) {
+ result = BROTLI_RESULT_ERROR;
+ break;
+ }
+ s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
+ }
+
if (s->is_metadata) {
if (!JumpToByteBoundary(&s->br)) {
result = BROTLI_RESULT_ERROR;
@@ -1348,7 +1363,7 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
result = BROTLI_RESULT_NEEDS_MORE_INPUT;
break;
}
- assert(s->distance_code < 0);
+ BROTLI_DCHECK(s->distance_code < 0);
if (s->block_length[2] == 0) {
DecodeBlockType(s->num_block_types[2],
diff --git a/dec/huffman.c b/dec/huffman.c
index 9590c76..3925087 100644
--- a/dec/huffman.c
+++ b/dec/huffman.c
@@ -15,7 +15,6 @@
/* Utilities for building Huffman decoding tables. */
-#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
diff --git a/dec/huffman.h b/dec/huffman.h
index edadac3..0da2aad 100644
--- a/dec/huffman.h
+++ b/dec/huffman.h
@@ -18,7 +18,6 @@
#ifndef BROTLI_DEC_HUFFMAN_H_
#define BROTLI_DEC_HUFFMAN_H_
-#include <assert.h>
#include "./types.h"
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/dec/port.h b/dec/port.h
new file mode 100644
index 0000000..5e3baf0
--- /dev/null
+++ b/dec/port.h
@@ -0,0 +1,60 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/* Macros for branch prediction. */
+
+#ifndef BROTLI_DEC_PORT_H_
+#define BROTLI_DEC_PORT_H_
+
+#include<assert.h>
+
+/* Compatibility with non-clang compilers. */
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+/* Define "PREDICT_TRUE" and "PREDICT_FALSE" macros for capable compilers.
+
+To apply compiler hint, enclose the branching condition into macros, like this:
+
+ if (PREDICT_TRUE(zero == 0)) {
+ // main execution path
+ } else {
+ // compiler should place this code outside of main execution path
+ }
+
+OR:
+
+ if (PREDICT_FALSE(something_rare_or_unexpected_happens)) {
+ // compiler should place this code outside of main execution path
+ }
+
+*/
+#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
+ (defined(__llvm__) && __has_builtin(__builtin_expect))
+#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#else
+#define PREDICT_FALSE(x) (x)
+#define PREDICT_TRUE(x) (x)
+#endif
+
+#ifdef BROTLI_DECODE_DEBUG
+#define BROTLI_DCHECK(x) assert(x)
+#else
+#define BROTLI_DCHECK(x)
+#endif
+
+#endif /* BROTLI_DEC_PORT_H_ */
diff --git a/dec/safe_malloc.c b/dec/safe_malloc.c
index a2ee504..fba00f8 100644
--- a/dec/safe_malloc.c
+++ b/dec/safe_malloc.c
@@ -16,6 +16,7 @@
/* Size-checked memory allocation. */
#include <stdlib.h>
+#include "./port.h"
#include "./safe_malloc.h"
#if defined(__cplusplus) || defined(c_plusplus)
@@ -33,7 +34,7 @@ static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
void* BrotliSafeMalloc(uint64_t nmemb, size_t size) {
if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
- assert(nmemb * size > 0);
+ BROTLI_DCHECK(nmemb * size > 0);
return malloc((size_t)(nmemb * size));
}
diff --git a/dec/safe_malloc.h b/dec/safe_malloc.h
index 0698239..3558bb8 100644
--- a/dec/safe_malloc.h
+++ b/dec/safe_malloc.h
@@ -18,8 +18,6 @@
#ifndef BROTLI_DEC_SAFE_MALLOC_H_
#define BROTLI_DEC_SAFE_MALLOC_H_
-#include <assert.h>
-
#include "./types.h"
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/enc/port.h b/enc/port.h
index 1b178de..91076a3 100644
--- a/enc/port.h
+++ b/enc/port.h
@@ -55,12 +55,18 @@
#define IS_LITTLE_ENDIAN
#endif
-#if defined(COMPILER_GCC3)
+/* Compatibility with non-clang compilers. */
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
+ (defined(__llvm__) && __has_builtin(__builtin_expect))
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#else
-#define PREDICT_FALSE(x) x
-#define PREDICT_TRUE(x) x
+#define PREDICT_FALSE(x) (x)
+#define PREDICT_TRUE(x) (x)
#endif
// Portable handling of unaligned loads, stores, and copies.
diff --git a/setup.py b/setup.py
index 1f99f6e..37a9e2b 100644
--- a/setup.py
+++ b/setup.py
@@ -141,6 +141,7 @@ brotli = Extension("brotli",
"dec/dictionary.h",
"dec/huffman.h",
"dec/prefix.h",
+ "dec/port.h",
"dec/safe_malloc.h",
"dec/streams.h",
"dec/transform.h",