aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvgenii Kliuchnikov <eustas.ru@gmail.com>2022-11-17 13:03:09 +0000
committerEvgenii Kliuchnikov <eustas.ru@gmail.com>2022-11-17 13:03:09 +0000
commita8f5813b843b7ec469dbd3d8a6a8743395359964 (patch)
treee349a984eac9404db22a662e579fe47953dc47fe
parent388d0d53fb29271492537015beeed91b74076411 (diff)
downloadbrotli-a8f5813b843b7ec469dbd3d8a6a8743395359964.zip
brotli-a8f5813b843b7ec469dbd3d8a6a8743395359964.tar.gz
brotli-a8f5813b843b7ec469dbd3d8a6a8743395359964.tar.bz2
Update
Documentation: - add note that brotli is a "stream" format, not an archive-like - regenerate .1 with Pandoc Build: - drop legacy "BROTLI_BUILD_PORTABLE" option - drop "BROTLI_SANITIZED" definition Code: - c: comb includes - c/enc: extract encoder state into separate header - c/enc: drop designated q10 codepath - c/enc: dealing better with flushing of empty stream - fix MSVC compilation API: - py: use library version instead of one in version.h - c: add plugable API to report consumed input / produced output - c/java: support "lean" prepared dictionaries (without copy of source)
-rw-r--r--CMakeLists.txt6
-rw-r--r--README.md5
-rw-r--r--c/common/constants.h3
-rw-r--r--c/common/platform.c3
-rw-r--r--c/common/platform.h123
-rw-r--r--c/common/shared_dictionary_internal.h5
-rw-r--r--c/dec/bit_reader.c5
-rw-r--r--c/dec/bit_reader.h17
-rw-r--r--c/dec/decode.c23
-rw-r--r--c/dec/huffman.c9
-rw-r--r--c/dec/huffman.h3
-rw-r--r--c/dec/prefix.h3
-rw-r--r--c/dec/state.c16
-rw-r--r--c/dec/state.h8
-rw-r--r--c/enc/backward_references.c3
-rw-r--r--c/enc/backward_references.h3
-rw-r--r--c/enc/backward_references_hq.c3
-rw-r--r--c/enc/backward_references_hq.h3
-rw-r--r--c/enc/bit_cost.c3
-rw-r--r--c/enc/bit_cost.h3
-rw-r--r--c/enc/block_splitter.h3
-rw-r--r--c/enc/brotli_bit_stream.c3
-rw-r--r--c/enc/brotli_bit_stream.h3
-rw-r--r--c/enc/cluster.c3
-rw-r--r--c/enc/cluster.h3
-rw-r--r--c/enc/command.h3
-rw-r--r--c/enc/compound_dictionary.c25
-rw-r--r--c/enc/compound_dictionary.h22
-rw-r--r--c/enc/compress_fragment.c3
-rw-r--r--c/enc/compress_fragment.h3
-rw-r--r--c/enc/compress_fragment_two_pass.c3
-rw-r--r--c/enc/compress_fragment_two_pass.h3
-rw-r--r--c/enc/encode.c419
-rw-r--r--c/enc/encoder_dict.h8
-rw-r--r--c/enc/entropy_encode.c3
-rw-r--r--c/enc/entropy_encode.h3
-rw-r--r--c/enc/entropy_encode_static.h3
-rw-r--r--c/enc/fast_log.h3
-rw-r--r--c/enc/find_match_length.h3
-rw-r--r--c/enc/hash.h28
-rw-r--r--c/enc/histogram.h3
-rw-r--r--c/enc/literal_cost.c3
-rw-r--r--c/enc/literal_cost.h3
-rw-r--r--c/enc/memory.c3
-rw-r--r--c/enc/memory.h3
-rw-r--r--c/enc/metablock.c3
-rw-r--r--c/enc/metablock.h3
-rw-r--r--c/enc/params.h1
-rw-r--r--c/enc/prefix.h3
-rw-r--r--c/enc/quality.h3
-rw-r--r--c/enc/ringbuffer.h3
-rw-r--r--c/enc/state.h104
-rw-r--r--c/enc/static_dict.h3
-rw-r--r--c/enc/utf8_util.h3
-rw-r--r--c/enc/write_bits.h3
-rw-r--r--c/include/brotli/port.h8
-rw-r--r--c/tools/brotli.c5
-rw-r--r--c/tools/brotli.md24
-rw-r--r--docs/brotli.1253
-rw-r--r--go/WORKSPACE21
-rw-r--r--java/org/brotli/dec/BrotliInputStream.java18
-rw-r--r--java/org/brotli/dec/Decode.java1
-rw-r--r--java/org/brotli/dec/build_defs.bzl13
-rw-r--r--java/org/brotli/wrapper/dec/decoder_jni.cc6
-rw-r--r--java/org/brotli/wrapper/dec/decoder_jni_onload.cc4
-rw-r--r--java/org/brotli/wrapper/enc/EncoderJNI.java7
-rw-r--r--java/org/brotli/wrapper/enc/encoder_jni.cc3
-rw-r--r--python/_brotli.cc8
-rw-r--r--research/brotli_decoder.c4
-rw-r--r--scripts/sources.lst1
70 files changed, 616 insertions, 698 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e86b13b..2776f9f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -105,12 +105,6 @@ if (ENABLE_SANITIZER)
set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fsanitize=${ENABLE_SANITIZER}")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fsanitize=${ENABLE_SANITIZER}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=${ENABLE_SANITIZER}")
-
- # By default, brotli depends on undefined behavior, but setting
- # BROTLI_BUILD_PORTABLE should result in a build which does not.
- if(ENABLE_SANITIZER STREQUAL "undefined")
- add_definitions(-DBROTLI_BUILD_PORTABLE)
- endif()
endif ()
include(CheckFunctionExists)
diff --git a/README.md b/README.md
index 6d8219e..0f905e3 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,11 @@ The specification of the Brotli Compressed Data Format is defined in [RFC 7932](
Brotli is open-sourced under the MIT License, see the LICENSE file.
+> **Please note:** brotli is a "stream" format; it does not contain
+> meta-information, like checksums or uncompresssed data length. It is possible
+> to modify "raw" ranges of the compressed stream and the decoder will not
+> notice that.
+
Brotli mailing list:
https://groups.google.com/forum/#!forum/brotli
diff --git a/c/common/constants.h b/c/common/constants.h
index 433c7b2..31e5bd3 100644
--- a/c/common/constants.h
+++ b/c/common/constants.h
@@ -12,10 +12,11 @@
#ifndef BROTLI_COMMON_CONSTANTS_H_
#define BROTLI_COMMON_CONSTANTS_H_
-#include "platform.h"
#include <brotli/port.h>
#include <brotli/types.h>
+#include "platform.h"
+
/* Specification: 7.3. Encoding of the context map */
#define BROTLI_CONTEXT_MAP_MAX_RLE 16
diff --git a/c/common/platform.c b/c/common/platform.c
index acdc452..25d84a9 100644
--- a/c/common/platform.c
+++ b/c/common/platform.c
@@ -6,9 +6,10 @@
#include <stdlib.h>
-#include "platform.h"
#include <brotli/types.h>
+#include "platform.h"
+
/* Default brotli_alloc_func */
void* BrotliDefaultAllocFunc(void* opaque, size_t size) {
BROTLI_UNUSED(opaque);
diff --git a/c/common/platform.h b/c/common/platform.h
index 0e0e8aa..4186a8e 100644
--- a/c/common/platform.h
+++ b/c/common/platform.h
@@ -12,9 +12,9 @@
* BROTLI_BUILD_BIG_ENDIAN forces to use big-endian optimizations
* BROTLI_BUILD_ENDIAN_NEUTRAL disables endian-aware optimizations
* BROTLI_BUILD_LITTLE_ENDIAN forces to use little-endian optimizations
- * BROTLI_BUILD_PORTABLE disables dangerous optimizations, like unaligned
- read and overlapping memcpy; this reduces decompression speed by 5%
* BROTLI_BUILD_NO_RBIT disables "rbit" optimization for ARM CPUs
+ * BROTLI_BUILD_NO_UNALIGNED_READ_FAST forces off the fast-unaligned-read
+ optimizations (mainly for testing purposes).
* BROTLI_DEBUG dumps file name and line number when decoder detects stream
or memory error
* BROTLI_ENABLE_LOG enables asserts and dumps various state information
@@ -208,15 +208,19 @@ OR:
#define BROTLI_TARGET_RISCV64
#endif
+#if defined(BROTLI_TARGET_X64) || defined(BROTLI_TARGET_ARMV8_64) || \
+ defined(BROTLI_TARGET_POWERPC64) || defined(BROTLI_TARGET_RISCV64)
+#define BROTLI_TARGET_64_BITS 1
+#else
+#define BROTLI_TARGET_64_BITS 0
+#endif
+
#if defined(BROTLI_BUILD_64_BIT)
#define BROTLI_64_BITS 1
#elif defined(BROTLI_BUILD_32_BIT)
#define BROTLI_64_BITS 0
-#elif defined(BROTLI_TARGET_X64) || defined(BROTLI_TARGET_ARMV8_64) || \
- defined(BROTLI_TARGET_POWERPC64) || defined(BROTLI_TARGET_RISCV64)
-#define BROTLI_64_BITS 1
#else
-#define BROTLI_64_BITS 0
+#define BROTLI_64_BITS BROTLI_TARGET_64_BITS
#endif
#if (BROTLI_64_BITS)
@@ -260,18 +264,19 @@ OR:
#undef BROTLI_X_BIG_ENDIAN
#endif
-#if defined(BROTLI_BUILD_PORTABLE)
-#define BROTLI_ALIGNED_READ (!!1)
-#elif defined(BROTLI_TARGET_X86) || defined(BROTLI_TARGET_X64) || \
+#if defined(BROTLI_BUILD_NO_UNALIGNED_READ_FAST)
+#define BROTLI_UNALIGNED_READ_FAST (!!0)
+#elif defined(BROTLI_TARGET_X86) || defined(BROTLI_TARGET_X64) || \
defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY) || \
defined(BROTLI_TARGET_RISCV64)
-/* Allow unaligned read only for white-listed CPUs. */
-#define BROTLI_ALIGNED_READ (!!0)
+/* These targets are known to generate efficient code for unaligned reads
+ * (e.g. a single instruction, not multiple 1-byte loads, shifted and or'd
+ * together). */
+#define BROTLI_UNALIGNED_READ_FAST (!!1)
#else
-#define BROTLI_ALIGNED_READ (!!1)
+#define BROTLI_UNALIGNED_READ_FAST (!!0)
#endif
-#if BROTLI_ALIGNED_READ
/* Portable unaligned memory access: read / write values via memcpy. */
static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) {
uint16_t t;
@@ -291,75 +296,6 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
-#else /* BROTLI_ALIGNED_READ */
-/* Unaligned memory access is allowed: just cast pointer to requested type. */
-#if BROTLI_SANITIZED
-/* Consider we have an unaligned load/store of 4 bytes from address 0x...05.
- AddressSanitizer will treat it as a 3-byte access to the range 05:07 and
- will miss a bug if 08 is the first unaddressable byte.
- ThreadSanitizer will also treat this as a 3-byte access to 05:07 and will
- miss a race between this access and some other accesses to 08.
- MemorySanitizer will correctly propagate the shadow on unaligned stores
- and correctly report bugs on unaligned loads, but it may not properly
- update and report the origin of the uninitialized memory.
- For all three tools, replacing an unaligned access with a tool-specific
- callback solves the problem. */
-#if defined(__cplusplus)
-extern "C" {
-#endif /* __cplusplus */
- uint16_t __sanitizer_unaligned_load16(const void* p);
- uint32_t __sanitizer_unaligned_load32(const void* p);
- uint64_t __sanitizer_unaligned_load64(const void* p);
- void __sanitizer_unaligned_store64(void* p, uint64_t v);
-#if defined(__cplusplus)
-} /* extern "C" */
-#endif /* __cplusplus */
-#define BrotliUnalignedRead16 __sanitizer_unaligned_load16
-#define BrotliUnalignedRead32 __sanitizer_unaligned_load32
-#define BrotliUnalignedRead64 __sanitizer_unaligned_load64
-#define BrotliUnalignedWrite64 __sanitizer_unaligned_store64
-#else /* BROTLI_SANITIZED */
-static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) {
- return *(const uint16_t*)p;
-}
-static BROTLI_INLINE uint32_t BrotliUnalignedRead32(const void* p) {
- return *(const uint32_t*)p;
-}
-#if (BROTLI_64_BITS)
-static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
- return *(const uint64_t*)p;
-}
-static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
- *(uint64_t*)p = v;
-}
-#else /* BROTLI_64_BITS */
-/* Avoid emitting LDRD / STRD, which require properly aligned address. */
-/* If __attribute__(aligned) is available, use that. Otherwise, memcpy. */
-
-#if BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)
-typedef BROTLI_ALIGNED(1) uint64_t brotli_unaligned_uint64_t;
-
-static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
- return (uint64_t) ((const brotli_unaligned_uint64_t*) p)[0];
-}
-static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
- brotli_unaligned_uint64_t* dwords = (brotli_unaligned_uint64_t*) p;
- dwords[0] = (brotli_unaligned_uint64_t) v;
-}
-#else /* BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) */
-static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) {
- uint64_t v;
- memcpy(&v, p, sizeof(uint64_t));
- return v;
-}
-
-static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) {
- memcpy(p, &v, sizeof(uint64_t));
-}
-#endif /* BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) */
-#endif /* BROTLI_64_BITS */
-#endif /* BROTLI_SANITIZED */
-#endif /* BROTLI_ALIGNED_READ */
#if BROTLI_LITTLE_ENDIAN
/* Straight endianness. Just read / write values. */
@@ -435,6 +371,16 @@ static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) {
}
#endif /* BROTLI_LITTLE_ENDIAN */
+static BROTLI_INLINE void* BROTLI_UNALIGNED_LOAD_PTR(const void* p) {
+ void* v;
+ memcpy(&v, p, sizeof(void*));
+ return v;
+}
+
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE_PTR(void* p, const void* v) {
+ memcpy(p, &v, sizeof(void*));
+}
+
/* BROTLI_IS_CONSTANT macros returns true for compile-time constants. */
#if BROTLI_GNUC_HAS_BUILTIN(__builtin_constant_p, 3, 0, 1) || \
BROTLI_INTEL_VERSION_CHECK(16, 0, 0)
@@ -467,6 +413,8 @@ static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) {
#define BROTLI_DUMP() (void)(0)
#endif
+/* BrotliRBit assumes brotli_reg_t fits native CPU register type. */
+#if (BROTLI_64_BITS == BROTLI_TARGET_64_BITS)
/* TODO(eustas): add appropriate icc/sunpro/arm/ibm/ti checks. */
#if (BROTLI_GNUC_VERSION_CHECK(3, 0, 0) || defined(__llvm__)) && \
!defined(BROTLI_BUILD_NO_RBIT)
@@ -480,15 +428,14 @@ static BROTLI_INLINE brotli_reg_t BrotliRBit(brotli_reg_t input) {
#define BROTLI_RBIT(x) BrotliRBit(x)
#endif /* armv7 / armv8 */
#endif /* gcc || clang */
+#endif /* brotli_reg_t is native */
#if !defined(BROTLI_RBIT)
static BROTLI_INLINE void BrotliRBit(void) { /* Should break build if used. */ }
#endif /* BROTLI_RBIT */
-#define BROTLI_REPEAT(N, X) { \
- if ((N & 1) != 0) {X;} \
- if ((N & 2) != 0) {X; X;} \
- if ((N & 4) != 0) {X; X; X; X;} \
-}
+#define BROTLI_REPEAT_4(X) {X; X; X; X;}
+#define BROTLI_REPEAT_5(X) {X; X; X; X; X;}
+#define BROTLI_REPEAT_6(X) {X; X; X; X; X; X;}
#define BROTLI_UNUSED(X) (void)(X)
@@ -553,6 +500,8 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD32LE);
BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD64LE);
BROTLI_UNUSED(&BROTLI_UNALIGNED_STORE64LE);
+ BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD_PTR);
+ BROTLI_UNUSED(&BROTLI_UNALIGNED_STORE_PTR);
BROTLI_UNUSED(&BrotliRBit);
BROTLI_UNUSED(&brotli_min_double);
BROTLI_UNUSED(&brotli_max_double);
diff --git a/c/common/shared_dictionary_internal.h b/c/common/shared_dictionary_internal.h
index 87ab13b..963762e 100644
--- a/c/common/shared_dictionary_internal.h
+++ b/c/common/shared_dictionary_internal.h
@@ -9,11 +9,12 @@
#ifndef BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_
#define BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_
-#include "dictionary.h"
#include <brotli/shared_dictionary.h>
-#include "transform.h"
#include <brotli/types.h>
+#include "dictionary.h"
+#include "transform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
diff --git a/c/dec/bit_reader.c b/c/dec/bit_reader.c
index 3dc848b..97e21f5 100644
--- a/c/dec/bit_reader.c
+++ b/c/dec/bit_reader.c
@@ -8,9 +8,10 @@
#include "bit_reader.h"
-#include "../common/platform.h"
#include <brotli/types.h>
+#include "../common/platform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
@@ -36,7 +37,7 @@ BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) {
/* Fixing alignment after unaligned BrotliFillWindow would result accumulator
overflow. If unalignment is caused by BrotliSafeReadBits, then there is
enough space in accumulator to fix alignment. */
- if (!BROTLI_ALIGNED_READ) {
+ if (BROTLI_UNALIGNED_READ_FAST) {
aligned_read_mask = 0;
}
if (BrotliGetAvailableBits(br) == 0) {
diff --git a/c/dec/bit_reader.h b/c/dec/bit_reader.h
index 3906455..c737bda 100644
--- a/c/dec/bit_reader.h
+++ b/c/dec/bit_reader.h
@@ -11,9 +11,10 @@
#include <string.h> /* memcpy */
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -53,8 +54,8 @@ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br);
/* Ensures that accumulator is not empty.
May consume up to sizeof(brotli_reg_t) - 1 bytes of input.
Returns BROTLI_FALSE if data is required but there is no input available.
- For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned
- reading. */
+ For !BROTLI_UNALIGNED_READ_FAST this function also prepares bit reader for
+ aligned reading. */
BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br);
/* Fallback for BrotliSafeReadBits32. Extracted as noninlined method to unburden
@@ -107,7 +108,8 @@ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount(
static BROTLI_INLINE void BrotliFillBitWindow(
BrotliBitReader* const br, uint32_t n_bits) {
#if (BROTLI_64_BITS)
- if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) {
+ if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) &&
+ (n_bits <= 8)) {
uint32_t bit_pos = br->bit_pos_;
if (bit_pos >= 56) {
br->val_ =
@@ -117,8 +119,8 @@ static BROTLI_INLINE void BrotliFillBitWindow(
br->avail_in -= 7;
br->next_in += 7;
}
- } else if (
- !BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 16)) {
+ } else if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) &&
+ (n_bits <= 16)) {
uint32_t bit_pos = br->bit_pos_;
if (bit_pos >= 48) {
br->val_ =
@@ -140,7 +142,8 @@ static BROTLI_INLINE void BrotliFillBitWindow(
}
}
#else
- if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) {
+ if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) &&
+ (n_bits <= 8)) {
uint32_t bit_pos = br->bit_pos_;
if (bit_pos >= 24) {
br->val_ =
diff --git a/c/dec/decode.c b/c/dec/decode.c
index 41166f9..845f556 100644
--- a/c/dec/decode.c
+++ b/c/dec/decode.c
@@ -113,8 +113,9 @@ void BrotliDecoderDestroyInstance(BrotliDecoderState* state) {
/* Saves error code and converts it to BrotliDecoderResult. */
static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode(
- BrotliDecoderState* s, BrotliDecoderErrorCode e) {
+ BrotliDecoderState* s, BrotliDecoderErrorCode e, size_t consumed_input) {
s->error_code = (int)e;
+ s->used_input += consumed_input;
switch (e) {
case BROTLI_DECODER_SUCCESS:
return BROTLI_DECODER_RESULT_SUCCESS;
@@ -1172,7 +1173,7 @@ static BROTLI_INLINE void DetectTrivialLiteralBlockTypes(
size_t sample = s->context_map[offset];
size_t j;
for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS);) {
- BROTLI_REPEAT(4, error |= s->context_map[offset + j++] ^ sample;)
+ BROTLI_REPEAT_4({ error |= s->context_map[offset + j++] ^ sample; })
}
if (error == 0) {
s->trivial_literal_contexts[i >> 5] |= 1u << (i & 31);
@@ -2243,6 +2244,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
size_t* available_out, uint8_t** next_out, size_t* total_out) {
BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
BrotliBitReader* br = &s->br;
+ size_t input_size = *available_in;
+#define BROTLI_SAVE_ERROR_CODE(code) \
+ SaveErrorCode(s, (code), input_size - *available_in)
/* Ensure that |total_out| is set, even if no data will ever be pushed out. */
if (total_out) {
*total_out = s->partial_pos_out;
@@ -2252,8 +2256,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
return BROTLI_DECODER_RESULT_ERROR;
}
if (*available_out && (!next_out || !*next_out)) {
- return SaveErrorCode(
- s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS));
+ return BROTLI_SAVE_ERROR_CODE(
+ BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS));
}
if (!*available_out) next_out = 0;
if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */
@@ -2586,7 +2590,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s, &s->distance_hgroup, distance_alphabet_size_max,
distance_alphabet_size_limit, s->num_dist_htrees);
if (!allocation_success) {
- return SaveErrorCode(s,
+ return BROTLI_SAVE_ERROR_CODE(
BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS));
}
s->loop_counter = 0;
@@ -2600,7 +2604,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
case 0: hgroup = &s->literal_hgroup; break;
case 1: hgroup = &s->insert_copy_hgroup; break;
case 2: hgroup = &s->distance_hgroup; break;
- default: return SaveErrorCode(s, BROTLI_FAILURE(
+ default: return BROTLI_SAVE_ERROR_CODE(BROTLI_FAILURE(
BROTLI_DECODER_ERROR_UNREACHABLE)); /* COV_NF_LINE */
}
result = HuffmanTreeGroupDecode(hgroup, s);
@@ -2710,10 +2714,11 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
break;
}
}
- return SaveErrorCode(s, result);
+ return BROTLI_SAVE_ERROR_CODE(result);
}
}
- return SaveErrorCode(s, result);
+ return BROTLI_SAVE_ERROR_CODE(result);
+#undef BROTLI_SAVE_ERROR_CODE
}
BROTLI_BOOL BrotliDecoderHasMoreOutput(const BrotliDecoderState* s) {
@@ -2743,7 +2748,7 @@ const uint8_t* BrotliDecoderTakeOutput(BrotliDecoderState* s, size_t* size) {
} else {
/* ... or stream is broken. Normally this should be caught by
BrotliDecoderDecompressStream, this is just a safeguard. */
- if ((int)status < 0) SaveErrorCode(s, status);
+ if ((int)status < 0) SaveErrorCode(s, status, 0);
*size = 0;
result = 0;
}
diff --git a/c/dec/huffman.c b/c/dec/huffman.c
index 8f127d7..3806454 100644
--- a/c/dec/huffman.c
+++ b/c/dec/huffman.c
@@ -10,9 +10,10 @@
#include <string.h> /* memcpy, memset */
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -117,11 +118,13 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
int bits_count;
BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <=
BROTLI_REVERSE_BITS_MAX);
+ BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH == 5);
/* Generate offsets into sorted symbol table by code length. */
symbol = -1;
bits = 1;
- BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, {
+ /* BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH == 5 */
+ BROTLI_REPEAT_5({
symbol += count[bits];
offset[bits] = symbol;
bits++;
@@ -132,7 +135,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
/* Sort symbols by length, by symbol order within each length. */
symbol = BROTLI_CODE_LENGTH_CODES;
do {
- BROTLI_REPEAT(6, {
+ BROTLI_REPEAT_6({
symbol--;
sorted[offset[code_lengths[symbol]]--] = symbol;
});
diff --git a/c/dec/huffman.h b/c/dec/huffman.h
index a8fbc45..5036096 100644
--- a/c/dec/huffman.h
+++ b/c/dec/huffman.h
@@ -9,9 +9,10 @@
#ifndef BROTLI_DEC_HUFFMAN_H_
#define BROTLI_DEC_HUFFMAN_H_
-#include "../common/platform.h"
#include <brotli/types.h>
+#include "../common/platform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
diff --git a/c/dec/prefix.h b/c/dec/prefix.h
index 481a2c7..e8acf07 100644
--- a/c/dec/prefix.h
+++ b/c/dec/prefix.h
@@ -10,9 +10,10 @@
#ifndef BROTLI_DEC_PREFIX_H_
#define BROTLI_DEC_PREFIX_H_
-#include "../common/constants.h"
#include <brotli/types.h>
+#include "../common/constants.h"
+
typedef struct CmdLutElement {
uint8_t insert_len_extra_bits;
uint8_t copy_len_extra_bits;
diff --git a/c/dec/state.c b/c/dec/state.c
index e3170c1..08d4c8b 100644
--- a/c/dec/state.c
+++ b/c/dec/state.c
@@ -8,8 +8,9 @@
#include <stdlib.h> /* free, malloc */
-#include "../common/dictionary.h"
#include <brotli/types.h>
+
+#include "../common/dictionary.h"
#include "huffman.h"
#if defined(__cplusplus) || defined(c_plusplus)
@@ -43,6 +44,7 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
s->pos = 0;
s->rb_roundtrips = 0;
s->partial_pos_out = 0;
+ s->used_input = 0;
s->block_type_trees = NULL;
s->block_len_trees = NULL;
@@ -129,9 +131,21 @@ void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) {
BROTLI_DECODER_FREE(s, s->distance_hgroup.htrees);
}
+#ifdef BROTLI_REPORTING
+/* When BROTLI_REPORTING is defined extra reporting module have to be linked. */
+void BrotliDecoderOnFinish(const BrotliDecoderState* s);
+#define BROTLI_DECODER_ON_FINISH(s) BrotliDecoderOnFinish(s);
+#else
+#if !defined(BROTLI_DECODER_ON_FINISH)
+#define BROTLI_DECODER_ON_FINISH(s) (void)(s);
+#endif
+#endif
+
void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
BrotliDecoderStateCleanupAfterMetablock(s);
+ BROTLI_DECODER_ON_FINISH(s);
+
BROTLI_DECODER_FREE(s, s->compound_dictionary);
BrotliSharedDictionaryDestroyInstance(s->dictionary);
s->dictionary = NULL;
diff --git a/c/dec/state.h b/c/dec/state.h
index 81e6bb6..6ec5c8f 100644
--- a/c/dec/state.h
+++ b/c/dec/state.h
@@ -9,12 +9,13 @@
#ifndef BROTLI_DEC_STATE_H_
#define BROTLI_DEC_STATE_H_
+#include <brotli/shared_dictionary.h>
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
-#include <brotli/shared_dictionary.h>
#include "../common/transform.h"
-#include <brotli/types.h>
#include "bit_reader.h"
#include "huffman.h"
@@ -321,6 +322,9 @@ struct BrotliDecoderStateStruct {
/* Less used attributes are at the end of this struct. */
+ /* For reporting. */
+ uint64_t used_input; /* how many bytes of input are consumed */
+
/* States inside function calls. */
BrotliRunningMetablockHeaderState substate_metablock_header;
BrotliRunningUncompressedState substate_uncompressed;
diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c
index 2cf01d8..ff5b7be 100644
--- a/c/enc/backward_references.c
+++ b/c/enc/backward_references.c
@@ -8,10 +8,11 @@
#include "backward_references.h"
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "command.h"
#include "compound_dictionary.h"
#include "dictionary_hash.h"
diff --git a/c/enc/backward_references.h b/c/enc/backward_references.h
index b051e18..20fb98a 100644
--- a/c/enc/backward_references.h
+++ b/c/enc/backward_references.h
@@ -9,11 +9,12 @@
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "command.h"
#include "hash.h"
#include "quality.h"
diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c
index c6a6c8c..6325032 100644
--- a/c/enc/backward_references_hq.c
+++ b/c/enc/backward_references_hq.c
@@ -10,9 +10,10 @@
#include <string.h> /* memcpy, memset */
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "command.h"
#include "compound_dictionary.h"
#include "encoder_dict.h"
diff --git a/c/enc/backward_references_hq.h b/c/enc/backward_references_hq.h
index c9dcc80..8acf975 100644
--- a/c/enc/backward_references_hq.h
+++ b/c/enc/backward_references_hq.h
@@ -9,11 +9,12 @@
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "command.h"
#include "hash.h"
#include "memory.h"
diff --git a/c/enc/bit_cost.c b/c/enc/bit_cost.c
index 8ca4ab1..6b7c904 100644
--- a/c/enc/bit_cost.c
+++ b/c/enc/bit_cost.c
@@ -8,9 +8,10 @@
#include "bit_cost.h"
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "fast_log.h"
#include "histogram.h"
diff --git a/c/enc/bit_cost.h b/c/enc/bit_cost.h
index 4cf3b18..f6f2773 100644
--- a/c/enc/bit_cost.h
+++ b/c/enc/bit_cost.h
@@ -9,8 +9,9 @@
#ifndef BROTLI_ENC_BIT_COST_H_
#define BROTLI_ENC_BIT_COST_H_
-#include "../common/platform.h"
#include <brotli/types.h>
+
+#include "../common/platform.h"
#include "fast_log.h"
#include "histogram.h"
diff --git a/c/enc/block_splitter.h b/c/enc/block_splitter.h
index 1de072f..6046b90 100644
--- a/c/enc/block_splitter.h
+++ b/c/enc/block_splitter.h
@@ -9,8 +9,9 @@
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
#define BROTLI_ENC_BLOCK_SPLITTER_H_
-#include "../common/platform.h"
#include <brotli/types.h>
+
+#include "../common/platform.h"
#include "command.h"
#include "memory.h"
#include "quality.h"
diff --git a/c/enc/brotli_bit_stream.c b/c/enc/brotli_bit_stream.c
index d105102..5fa0c69 100644
--- a/c/enc/brotli_bit_stream.c
+++ b/c/enc/brotli_bit_stream.c
@@ -12,10 +12,11 @@
#include <string.h> /* memcpy, memset */
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "entropy_encode.h"
#include "entropy_encode_static.h"
#include "fast_log.h"
diff --git a/c/enc/brotli_bit_stream.h b/c/enc/brotli_bit_stream.h
index 4285b7f..a289509 100644
--- a/c/enc/brotli_bit_stream.h
+++ b/c/enc/brotli_bit_stream.h
@@ -16,9 +16,10 @@
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
+#include <brotli/types.h>
+
#include "../common/context.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "command.h"
#include "entropy_encode.h"
#include "memory.h"
diff --git a/c/enc/cluster.c b/c/enc/cluster.c
index b86bbfb..b0faf81 100644
--- a/c/enc/cluster.c
+++ b/c/enc/cluster.c
@@ -8,8 +8,9 @@
#include "cluster.h"
-#include "../common/platform.h"
#include <brotli/types.h>
+
+#include "../common/platform.h"
#include "bit_cost.h" /* BrotliPopulationCost */
#include "fast_log.h"
#include "histogram.h"
diff --git a/c/enc/cluster.h b/c/enc/cluster.h
index 107e8a3..013629c 100644
--- a/c/enc/cluster.h
+++ b/c/enc/cluster.h
@@ -9,8 +9,9 @@
#ifndef BROTLI_ENC_CLUSTER_H_
#define BROTLI_ENC_CLUSTER_H_
-#include "../common/platform.h"
#include <brotli/types.h>
+
+#include "../common/platform.h"
#include "histogram.h"
#include "memory.h"
diff --git a/c/enc/command.h b/c/enc/command.h
index 43e35d7..ba4de7e 100644
--- a/c/enc/command.h
+++ b/c/enc/command.h
@@ -9,9 +9,10 @@
#ifndef BROTLI_ENC_COMMAND_H_
#define BROTLI_ENC_COMMAND_H_
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "fast_log.h"
#include "params.h"
#include "prefix.h"
diff --git a/c/enc/compound_dictionary.c b/c/enc/compound_dictionary.c
index d82772f..824e515 100644
--- a/c/enc/compound_dictionary.c
+++ b/c/enc/compound_dictionary.c
@@ -6,8 +6,9 @@
#include "compound_dictionary.h"
-#include "../common/platform.h"
#include <brotli/types.h>
+
+#include "../common/platform.h"
#include "memory.h"
#include "quality.h"
@@ -33,7 +34,7 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
uint32_t* slot_offsets = NULL;
uint16_t* heads = NULL;
uint32_t* items = NULL;
- uint8_t* source_copy = NULL;
+ const uint8_t** source_ref = NULL;
uint32_t i;
uint32_t* slot_size = NULL;
uint32_t* slot_limit = NULL;
@@ -97,7 +98,7 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
/* Step 3: transfer data to "slim" hasher. */
alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
(sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
- source_size;
+ sizeof(uint8_t*);
result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
@@ -107,14 +108,15 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
slot_offsets = (uint32_t*)(&result[1]);
heads = (uint16_t*)(&slot_offsets[num_slots]);
items = (uint32_t*)(&heads[num_buckets]);
- source_copy = (uint8_t*)(&items[total_items]);
+ source_ref = (const uint8_t**)(&items[total_items]);
- result->magic = kPreparedDictionaryMagic;
- result->source_offset = total_items;
+ result->magic = kLeanPreparedDictionaryMagic;
+ result->num_items = total_items;
result->source_size = (uint32_t)source_size;
result->hash_bits = hash_bits;
result->bucket_bits = bucket_bits;
result->slot_bits = slot_bits;
+ BROTLI_UNALIGNED_STORE_PTR(source_ref, source);
total_items = 0;
for (i = 0; i < num_slots; ++i) {
@@ -145,7 +147,6 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
}
BROTLI_FREE(m, flat);
- memcpy(source_copy, source, source_size);
return result;
}
@@ -192,8 +193,14 @@ BROTLI_BOOL AttachPreparedDictionary(
uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]);
uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]);
- compound->chunk_source[index] =
- (const uint8_t*)(&items[dictionary->source_offset]);
+ const void* tail = (void*)&items[dictionary->num_items];
+ if (dictionary->magic == kPreparedDictionaryMagic) {
+ compound->chunk_source[index] = (const uint8_t*)tail;
+ } else {
+ /* dictionary->magic == kLeanPreparedDictionaryMagic */
+ compound->chunk_source[index] =
+ (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
+ }
}
compound->num_chunks++;
return BROTLI_TRUE;
diff --git a/c/enc/compound_dictionary.h b/c/enc/compound_dictionary.h
index 60b12d2..9c531d5 100644
--- a/c/enc/compound_dictionary.h
+++ b/c/enc/compound_dictionary.h
@@ -7,19 +7,32 @@
#ifndef BROTLI_ENC_PREPARED_DICTIONARY_H_
#define BROTLI_ENC_PREPARED_DICTIONARY_H_
-#include "../common/platform.h"
-#include "../common/constants.h"
#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
+
+#include "../common/platform.h"
+#include "../common/constants.h"
#include "memory.h"
+/* "Fat" prepared dictionary, could be cooked outside of C implementation,
+ * e.g. on Java side. LZ77 data is copied inside PreparedDictionary struct. */
static const uint32_t kPreparedDictionaryMagic = 0xDEBCEDE0;
+
+static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1;
+
+static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2;
+
+/* "Lean" prepared dictionary. LZ77 data is referenced. It is the responsibility
+ * of caller of "prepare dictionary" to keep the LZ77 data while prepared
+ * dictionary is in use. */
+static const uint32_t kLeanPreparedDictionaryMagic = 0xDEBCEDE3;
+
static const uint64_t kPreparedDictionaryHashMul64Long =
BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
typedef struct PreparedDictionary {
uint32_t magic;
- uint32_t source_offset;
+ uint32_t num_items;
uint32_t source_size;
uint32_t hash_bits;
uint32_t bucket_bits;
@@ -31,7 +44,8 @@ typedef struct PreparedDictionary {
/* uint16_t heads[1 << bucket_bits]; */
/* uint32_t items[variable]; */
- /* uint8_t source[source_size] */
+ /* [maybe] uint8_t* source_ref, depending on magic. */
+ /* [maybe] uint8_t source[source_size], depending on magic. */
} PreparedDictionary;
BROTLI_INTERNAL PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
diff --git a/c/enc/compress_fragment.c b/c/enc/compress_fragment.c
index 1f478ca..13890ea 100644
--- a/c/enc/compress_fragment.c
+++ b/c/enc/compress_fragment.c
@@ -16,8 +16,9 @@
#include <string.h> /* memcmp, memcpy, memset */
-#include "../common/platform.h"
#include <brotli/types.h>
+
+#include "../common/platform.h"
#include "brotli_bit_stream.h"
#include "entropy_encode.h"
#include "fast_log.h"
diff --git a/c/enc/compress_fragment.h b/c/enc/compress_fragment.h
index 099a979..9c0780f 100644
--- a/c/enc/compress_fragment.h
+++ b/c/enc/compress_fragment.h
@@ -12,9 +12,10 @@
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "entropy_encode.h"
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/compress_fragment_two_pass.c b/c/enc/compress_fragment_two_pass.c
index 4cbb418..a762679 100644
--- a/c/enc/compress_fragment_two_pass.c
+++ b/c/enc/compress_fragment_two_pass.c
@@ -14,9 +14,10 @@
#include <string.h> /* memcmp, memcpy, memset */
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "bit_cost.h"
#include "brotli_bit_stream.h"
#include "entropy_encode.h"
diff --git a/c/enc/compress_fragment_two_pass.h b/c/enc/compress_fragment_two_pass.h
index f5d0741..6d28d9b 100644
--- a/c/enc/compress_fragment_two_pass.h
+++ b/c/enc/compress_fragment_two_pass.h
@@ -13,9 +13,10 @@
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "entropy_encode.h"
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/encode.c b/c/enc/encode.c
index afceba4..a8ac09a 100644
--- a/c/enc/encode.c
+++ b/c/enc/encode.c
@@ -30,6 +30,7 @@
#include "memory.h"
#include "metablock.h"
#include "prefix.h"
+#include "state.h"
#include "quality.h"
#include "ringbuffer.h"
#include "utf8_util.h"
@@ -41,84 +42,6 @@ extern "C" {
#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
-typedef enum BrotliEncoderStreamState {
- /* Default state. */
- BROTLI_STREAM_PROCESSING = 0,
- /* Intermediate state; after next block is emitted, byte-padding should be
- performed before getting back to default state. */
- BROTLI_STREAM_FLUSH_REQUESTED = 1,
- /* Last metablock was produced; no more input is acceptable. */
- BROTLI_STREAM_FINISHED = 2,
- /* Flushing compressed block and writing meta-data block header. */
- BROTLI_STREAM_METADATA_HEAD = 3,
- /* Writing metadata block body. */
- BROTLI_STREAM_METADATA_BODY = 4
-} BrotliEncoderStreamState;
-
-typedef enum BrotliEncoderFlintState {
- BROTLI_FLINT_NEEDS_2_BYTES = 2,
- BROTLI_FLINT_NEEDS_1_BYTE = 1,
- BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
- BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
- BROTLI_FLINT_DONE = -2
-} BrotliEncoderFlintState;
-
-typedef struct BrotliEncoderStateStruct {
- BrotliEncoderParams params;
-
- MemoryManager memory_manager_;
-
- uint64_t input_pos_;
- RingBuffer ringbuffer_;
- size_t cmd_alloc_size_;
- Command* commands_;
- size_t num_commands_;
- size_t num_literals_;
- size_t last_insert_len_;
- uint64_t last_flush_pos_;
- uint64_t last_processed_pos_;
- int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
- int saved_dist_cache_[4];
- uint16_t last_bytes_;
- uint8_t last_bytes_bits_;
- /* "Flint" is a tiny uncompressed block emitted before the continuation
- block to unwire literal context from previous data. Despite being int8_t,
- field is actually BrotliEncoderFlintState enum. */
- int8_t flint_;
- uint8_t prev_byte_;
- uint8_t prev_byte2_;
- size_t storage_size_;
- uint8_t* storage_;
-
- Hasher hasher_;
-
- /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
- int small_table_[1 << 10]; /* 4KiB */
- int* large_table_; /* Allocated only when needed */
- size_t large_table_size_;
-
- BrotliOnePassArena* one_pass_arena_;
- BrotliTwoPassArena* two_pass_arena_;
-
- /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
- uint32_t* command_buf_;
- uint8_t* literal_buf_;
-
- uint8_t* next_out_;
- size_t available_out_;
- size_t total_out_;
- /* Temporary buffer for padding flush bits or metadata block header / body. */
- union {
- uint64_t u64[2];
- uint8_t u8[16];
- } tiny_buf_;
- uint32_t remaining_metadata_bytes_;
- BrotliEncoderStreamState stream_state_;
-
- BROTLI_BOOL is_last_block_emitted_;
- BROTLI_BOOL is_initialized_;
-} BrotliEncoderStateStruct;
-
static size_t InputBlockSize(BrotliEncoderState* s) {
return (size_t)1 << s->params.lgblock;
}
@@ -780,6 +703,7 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
s->two_pass_arena_ = NULL;
s->command_buf_ = NULL;
s->literal_buf_ = NULL;
+ s->total_in_ = 0;
s->next_out_ = NULL;
s->available_out_ = 0;
s->total_out_ = 0;
@@ -816,12 +740,26 @@ BrotliEncoderState* BrotliEncoderCreateInstance(
return state;
}
+#ifdef BROTLI_REPORTING
+/* When BROTLI_REPORTING is defined extra reporting module have to be linked. */
+void BrotliEncoderOnFinish(const BrotliEncoderState* s);
+#define BROTLI_ENCODER_ON_FINISH(s) BrotliEncoderOnFinish(s);
+#else
+#if !defined(BROTLI_ENCODER_ON_FINISH)
+#define BROTLI_ENCODER_ON_FINISH(s) (void)(s);
+#endif
+#endif
+
static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
MemoryManager* m = &s->memory_manager_;
+
+ BROTLI_ENCODER_ON_FINISH(s);
+
if (BROTLI_IS_OOM(m)) {
BrotliWipeOutMemoryManager(m);
return;
}
+
BROTLI_FREE(m, s->storage_);
BROTLI_FREE(m, s->commands_);
RingBufferFree(m, &s->ringbuffer_);
@@ -1006,10 +944,38 @@ static BROTLI_BOOL EncodeData(
MemoryManager* m = &s->memory_manager_;
ContextType literal_context_mode;
ContextLut literal_context_lut;
+ BROTLI_BOOL fast_compress =
+ s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
+ s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY;
data = s->ringbuffer_.buffer_;
mask = s->ringbuffer_.mask_;
+ if (delta == 0) { /* No new input; still might want to flush or finish. */
+ if (!data) { /* No input has been processed so far. */
+ if (is_last) { /* Emit complete finalized stream. */
+ BROTLI_DCHECK(s->last_bytes_bits_ <= 14);
+ s->last_bytes_ |= (uint16_t)(3u << s->last_bytes_bits_);
+ s->last_bytes_bits_ = (uint8_t)(s->last_bytes_bits_ + 2u);
+ s->tiny_buf_.u8[0] = (uint8_t)s->last_bytes_;
+ s->tiny_buf_.u8[1] = (uint8_t)(s->last_bytes_ >> 8);
+ *output = s->tiny_buf_.u8;
+ *out_size = (s->last_bytes_bits_ + 7u) >> 3u;
+ return BROTLI_TRUE;
+ } else { /* No data, not last -> no-op. */
+ *out_size = 0;
+ return BROTLI_TRUE;
+ }
+ } else {
+ /* Fast compress performs flush every block -> flush is no-op. */
+ if (!is_last && (!force_flush || fast_compress)) { /* Another no-op. */
+ *out_size = 0;
+ return BROTLI_TRUE;
+ }
+ }
+ }
+ BROTLI_DCHECK(data);
+
if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE;
/* Adding more blocks after "last" block is forbidden. */
if (s->is_last_block_emitted_) return BROTLI_FALSE;
@@ -1030,19 +996,12 @@ static BROTLI_BOOL EncodeData(
}
}
- if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
- s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+ if (fast_compress) {
uint8_t* storage;
size_t storage_ix = s->last_bytes_bits_;
size_t table_size;
int* table;
- if (delta == 0 && !is_last) {
- /* We have no new input data and we don't have to finish the stream, so
- nothing to do. */
- *out_size = 0;
- return BROTLI_TRUE;
- }
storage = GetBrotliStorage(s, 2 * bytes + 503);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
storage[0] = (uint8_t)s->last_bytes_;
@@ -1238,242 +1197,6 @@ static size_t WriteMetadataHeader(
return (storage_ix + 7u) >> 3;
}
-static BROTLI_NOINLINE BROTLI_BOOL BrotliCompressBufferQuality10(
- int lgwin, size_t input_size, const uint8_t* input_buffer,
- size_t* encoded_size, uint8_t* encoded_buffer) {
- MemoryManager* m =
- (MemoryManager*)BrotliBootstrapAlloc(sizeof(MemoryManager), 0, 0, 0);
-
- const size_t mask = BROTLI_SIZE_MAX >> 1;
- int dist_cache[4] = { 4, 11, 15, 16 };
- int saved_dist_cache[4] = { 4, 11, 15, 16 };
- BROTLI_BOOL ok = BROTLI_TRUE;
- const size_t max_out_size = *encoded_size;
- size_t total_out_size = 0;
- uint16_t last_bytes;
- uint8_t last_bytes_bits;
-
- const size_t hasher_eff_size = BROTLI_MIN(size_t,
- input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP);
-
- const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
- size_t max_block_size;
- const size_t max_metablock_size = (size_t)1 << lgmetablock;
- const size_t max_literals_per_metablock = max_metablock_size / 8;
- const size_t max_commands_per_metablock = max_metablock_size / 8;
- size_t metablock_start = 0;
- uint8_t prev_byte = 0;
- uint8_t prev_byte2 = 0;
-
- BrotliEncoderParams* params = NULL;
- Hasher* hasher = NULL;
-
- if (m == NULL) return BROTLI_FALSE;
- BrotliInitMemoryManager(m, 0, 0, 0);
- params = BROTLI_ALLOC(m, BrotliEncoderParams, 2);
- hasher = BROTLI_ALLOC(m, Hasher, 1);
- if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(params) || BROTLI_IS_NULL(hasher)) {
- goto oom;
- }
- BrotliEncoderInitParams(params);
- HasherInit(hasher);
-
- params->quality = 10;
- params->lgwin = lgwin;
- if (lgwin > BROTLI_MAX_WINDOW_BITS) {
- params->large_window = BROTLI_TRUE;
- }
- SanitizeParams(params);
- params->lgblock = ComputeLgBlock(params);
- ChooseDistanceParams(params);
- max_block_size = (size_t)1 << params->lgblock;
-
- /* Since default static dictionary is used we assume that
- * params->quality < params->dictionary.max_quality. */
-
- BROTLI_DCHECK(input_size <= mask + 1);
- EncodeWindowBits(lgwin, params->large_window, &last_bytes, &last_bytes_bits);
- InitOrStitchToPreviousBlock(m, hasher, input_buffer, mask, params,
- 0, hasher_eff_size, BROTLI_TRUE);
- if (BROTLI_IS_OOM(m)) goto oom;
-
- while (ok && metablock_start < input_size) {
- const size_t metablock_end =
- BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);
- const size_t expected_num_commands =
- (metablock_end - metablock_start) / 12 + 16;
- Command* commands = 0;
- size_t num_commands = 0;
- size_t last_insert_len = 0;
- size_t num_literals = 0;
- size_t metablock_size = 0;
- size_t cmd_alloc_size = 0;
- BROTLI_BOOL is_last;
- uint8_t* storage;
- size_t storage_ix;
-
- ContextType literal_context_mode = ChooseContextMode(params,
- input_buffer, metablock_start, mask, metablock_end - metablock_start);
- ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
-
- size_t block_start;
- for (block_start = metablock_start; block_start < metablock_end; ) {
- size_t block_size =
- BROTLI_MIN(size_t, metablock_end - block_start, max_block_size);
- ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);
- size_t path_size;
- size_t new_cmd_alloc_size;
- if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) goto oom;
- BrotliInitZopfliNodes(nodes, block_size + 1);
- StitchToPreviousBlockH10(&hasher->privat._H10, block_size, block_start,
- input_buffer, mask);
- path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start,
- input_buffer, mask, literal_context_lut, params, dist_cache, hasher,
- nodes);
- if (BROTLI_IS_OOM(m)) goto oom;
- /* We allocate a command buffer in the first iteration of this loop that
- will be likely big enough for the whole metablock, so that for most
- inputs we will not have to reallocate in later iterations. We do the
- allocation here and not before the loop, because if the input is small,
- this will be allocated after the Zopfli cost model is freed, so this
- will not increase peak memory usage.
- TODO(eustas): If the first allocation is too small, increase command
- buffer size exponentially. */
- new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands,
- num_commands + path_size + 1);
- if (cmd_alloc_size != new_cmd_alloc_size) {
- Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);
- if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) goto oom;
- cmd_alloc_size = new_cmd_alloc_size;
- if (commands) {
- memcpy(new_commands, commands, sizeof(Command) * num_commands);
- BROTLI_FREE(m, commands);
- }
- commands = new_commands;
- }
- BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache,
- &last_insert_len, params, &commands[num_commands], &num_literals);
- num_commands += path_size;
- block_start += block_size;
- metablock_size += block_size;
- BROTLI_FREE(m, nodes);
- if (num_literals > max_literals_per_metablock ||
- num_commands > max_commands_per_metablock) {
- break;
- }
- }
-
- if (last_insert_len > 0) {
- InitInsertCommand(&commands[num_commands++], last_insert_len);
- num_literals += last_insert_len;
- }
-
- is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);
- storage = NULL;
- storage_ix = last_bytes_bits;
-
- if (metablock_size == 0) {
- /* Write the ISLAST and ISEMPTY bits. */
- storage = BROTLI_ALLOC(m, uint8_t, 16);
- if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
- storage[0] = (uint8_t)last_bytes;
- storage[1] = (uint8_t)(last_bytes >> 8);
- BrotliWriteBits(2, 3, &storage_ix, storage);
- storage_ix = (storage_ix + 7u) & ~7u;
- } else if (!ShouldCompress(input_buffer, mask, metablock_start,
- metablock_size, num_literals, num_commands)) {
- /* Restore the distance cache, as its last update by
- CreateBackwardReferences is now unused. */
- memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
- storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
- if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
- storage[0] = (uint8_t)last_bytes;
- storage[1] = (uint8_t)(last_bytes >> 8);
- BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
- metablock_start, mask, metablock_size,
- &storage_ix, storage);
- } else {
- MetaBlockSplit mb;
- BrotliEncoderParams* block_params = params + 1;
- *block_params = *params; /* shallow copy */
- InitMetaBlockSplit(&mb);
- BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask,
- block_params,
- prev_byte, prev_byte2,
- commands, num_commands,
- literal_context_mode,
- &mb);
- if (BROTLI_IS_OOM(m)) goto oom;
- {
- /* The number of distance symbols effectively used for distance
- histograms. It might be less than distance alphabet size
- for "Large Window Brotli" (32-bit). */
- BrotliOptimizeHistograms(block_params->dist.alphabet_size_limit, &mb);
- }
- storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
- if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
- storage[0] = (uint8_t)last_bytes;
- storage[1] = (uint8_t)(last_bytes >> 8);
- BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
- mask, prev_byte, prev_byte2,
- is_last,
- block_params,
- literal_context_mode,
- commands, num_commands,
- &mb,
- &storage_ix, storage);
- if (BROTLI_IS_OOM(m)) goto oom;
- if (metablock_size + 4 < (storage_ix >> 3)) {
- /* Restore the distance cache and last byte. */
- memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
- storage[0] = (uint8_t)last_bytes;
- storage[1] = (uint8_t)(last_bytes >> 8);
- storage_ix = last_bytes_bits;
- BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
- metablock_start, mask,
- metablock_size, &storage_ix, storage);
- }
- DestroyMetaBlockSplit(m, &mb);
- }
- last_bytes = (uint16_t)(storage[storage_ix >> 3]);
- last_bytes_bits = storage_ix & 7u;
- metablock_start += metablock_size;
- if (metablock_start < input_size) {
- prev_byte = input_buffer[metablock_start - 1];
- prev_byte2 = input_buffer[metablock_start - 2];
- }
- /* Save the state of the distance cache in case we need to restore it for
- emitting an uncompressed block. */
- memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
-
- {
- const size_t out_size = storage_ix >> 3;
- total_out_size += out_size;
- if (total_out_size <= max_out_size) {
- memcpy(encoded_buffer, storage, out_size);
- encoded_buffer += out_size;
- } else {
- ok = BROTLI_FALSE;
- }
- }
- BROTLI_FREE(m, storage);
- BROTLI_FREE(m, commands);
- }
-
- *encoded_size = total_out_size;
- DestroyHasher(m, hasher);
- BROTLI_FREE(m, hasher);
- BrotliEncoderCleanupParams(m, params);
- BROTLI_FREE(m, params);
- BrotliBootstrapFree(m, m);
- return ok;
-
-oom:
- BrotliWipeOutMemoryManager(m);
- BrotliBootstrapFree(m, m);
- return BROTLI_FALSE;
-}
-
size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
/* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
size_t num_large_blocks = input_size >> 14;
@@ -1539,17 +1262,6 @@ BROTLI_BOOL BrotliEncoderCompress(
*encoded_buffer = 6;
return BROTLI_TRUE;
}
- if (quality == 10) {
- /* TODO(eustas): Implement this direct path for all quality levels. */
- const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,
- BROTLI_MAX(int, 16, lgwin));
- int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
- encoded_size, encoded_buffer);
- if (!ok || (max_out_size && *encoded_size > max_out_size)) {
- goto fallback;
- }
- return BROTLI_TRUE;
- }
s = BrotliEncoderCreateInstance(0, 0, 0);
if (!s) {
@@ -1561,6 +1273,7 @@ BROTLI_BOOL BrotliEncoderCompress(
uint8_t* next_out = encoded_buffer;
size_t total_out = 0;
BROTLI_BOOL result = BROTLI_FALSE;
+ /* TODO(eustas): check that parameters are sane. */
BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
@@ -1612,6 +1325,18 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) {
s->available_out_ += (seal_bits + 7) >> 3;
}
+/* Fills the |total_out|, if it is not NULL. */
+static void SetTotalOut(BrotliEncoderState* s, size_t* total_out) {
+ if (total_out) {
+ /* Saturating conversion uint64_t -> size_t */
+ size_t result = (size_t)-1;
+ if (s->total_out_ < result) {
+ result = (size_t)s->total_out_;
+ }
+ *total_out = result;
+ }
+}
+
/* Injects padding bits or pushes compressed data to output.
Returns false if nothing is done. */
static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
@@ -1631,7 +1356,7 @@ static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
s->next_out_ += copy_output_size;
s->available_out_ -= copy_output_size;
s->total_out_ += copy_output_size;
- if (total_out) *total_out = s->total_out_;
+ SetTotalOut(s, total_out);
return BROTLI_TRUE;
}
@@ -1740,6 +1465,7 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
if (block_size != 0) {
*next_in += block_size;
*available_in -= block_size;
+ s->total_in_ += block_size;
}
if (inplace) {
size_t out_bytes = storage_ix >> 3;
@@ -1748,7 +1474,7 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
*next_out += out_bytes;
*available_out -= out_bytes;
s->total_out_ += out_bytes;
- if (total_out) *total_out = s->total_out_;
+ SetTotalOut(s, total_out);
} else {
size_t out_bytes = storage_ix >> 3;
s->next_out_ = storage;
@@ -1817,6 +1543,7 @@ static BROTLI_BOOL ProcessMetadata(
memcpy(*next_out, *next_in, copy);
*next_in += copy;
*available_in -= copy;
+ s->total_in_ += copy; /* not actually data input, though */
s->remaining_metadata_bytes_ -= copy;
*next_out += copy;
*available_out -= copy;
@@ -1827,6 +1554,7 @@ static BROTLI_BOOL ProcessMetadata(
memcpy(s->next_out_, *next_in, copy);
*next_in += copy;
*available_in -= copy;
+ s->total_in_ += copy; /* not actually data input, though */
s->remaining_metadata_bytes_ -= copy;
s->available_out_ = copy;
}
@@ -1854,7 +1582,7 @@ static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
BROTLI_BOOL BrotliEncoderCompressStream(
BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
- const uint8_t** next_in, size_t* available_out,uint8_t** next_out,
+ const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
size_t* total_out) {
if (!EnsureInitialized(s)) return BROTLI_FALSE;
@@ -1896,6 +1624,7 @@ BROTLI_BOOL BrotliEncoderCompressStream(
CopyInputToRingBuffer(s, copy_input_size, *next_in);
*next_in += copy_input_size;
*available_in -= copy_input_size;
+ s->total_in_ += copy_input_size;
if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
continue;
}
@@ -2021,7 +1750,7 @@ void BrotliEncoderDestroyPreparedDictionary(
}
if (dict->dictionary == NULL) {
/* This should never ever happen. */
- } else if (*dict->dictionary == kPreparedDictionaryMagic) {
+ } else if (*dict->dictionary == kLeanPreparedDictionaryMagic) {
DestroyPreparedDictionary(
&dict->memory_manager_, (PreparedDictionary*)dict->dictionary);
} else if (*dict->dictionary == kSharedDictionaryMagic) {
@@ -2029,7 +1758,8 @@ void BrotliEncoderDestroyPreparedDictionary(
(SharedEncoderDictionary*)dict->dictionary);
BrotliFree(&dict->memory_manager_, dict->dictionary);
} else {
- /* This should never ever happen. */
+ /* There is also kPreparedDictionaryMagic, but such instances should be
+ * constructed and destroyed by different means. */
}
dict->dictionary = NULL;
BrotliDestroyManagedDictionary(dict);
@@ -2048,7 +1778,8 @@ BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state,
dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary;
}
current = &state->params.dictionary;
- if (magic == kPreparedDictionaryMagic) {
+ if (magic == kPreparedDictionaryMagic ||
+ magic == kLeanPreparedDictionaryMagic) {
const PreparedDictionary* prepared = (const PreparedDictionary*)dict;
if (!AttachPreparedDictionary(&current->compound, prepared)) {
return BROTLI_FALSE;
@@ -2176,7 +1907,15 @@ size_t BrotliEncoderGetPreparedDictionarySize(
return sizeof(PreparedDictionary) + dictionary->source_size +
(sizeof(uint32_t) << dictionary->slot_bits) +
(sizeof(uint16_t) << dictionary->bucket_bits) +
- (sizeof(uint32_t) * dictionary->source_offset) + overhead;
+ (sizeof(uint32_t) * dictionary->num_items) + overhead;
+ } else if (magic == kLeanPreparedDictionaryMagic) {
+ const PreparedDictionary* dictionary =
+ (const PreparedDictionary*)prepared;
+ /* Keep in sync with step 3 of CreatePreparedDictionary */
+ return sizeof(PreparedDictionary) + sizeof(uint8_t*) +
+ (sizeof(uint32_t) << dictionary->slot_bits) +
+ (sizeof(uint16_t) << dictionary->bucket_bits) +
+ (sizeof(uint32_t) * dictionary->num_items) + overhead;
} else if (magic == kSharedDictionaryMagic) {
const SharedEncoderDictionary* dictionary =
(const SharedEncoderDictionary*)prepared;
diff --git a/c/enc/encoder_dict.h b/c/enc/encoder_dict.h
index b5b591d..b291f98 100644
--- a/c/enc/encoder_dict.h
+++ b/c/enc/encoder_dict.h
@@ -7,10 +7,11 @@
#ifndef BROTLI_ENC_ENCODER_DICT_H_
#define BROTLI_ENC_ENCODER_DICT_H_
-#include "../common/dictionary.h"
-#include "../common/platform.h"
#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
+
+#include "../common/dictionary.h"
+#include "../common/platform.h"
#include "compound_dictionary.h"
#include "memory.h"
#include "static_dict_lut.h"
@@ -103,9 +104,6 @@ typedef struct ContextualEncoderDictionary {
BrotliEncoderDictionary* instances_;
} ContextualEncoderDictionary;
-static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1;
-static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2;
-
typedef struct SharedEncoderDictionary {
/* Magic value to distinguish this struct from PreparedDictionary for
certain external usages. */
diff --git a/c/enc/entropy_encode.c b/c/enc/entropy_encode.c
index b2dcbbd..9aed43b 100644
--- a/c/enc/entropy_encode.c
+++ b/c/enc/entropy_encode.c
@@ -10,9 +10,10 @@
#include <string.h> /* memset */
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
diff --git a/c/enc/entropy_encode.h b/c/enc/entropy_encode.h
index 9618e1d..e1c779c 100644
--- a/c/enc/entropy_encode.h
+++ b/c/enc/entropy_encode.h
@@ -9,9 +9,10 @@
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
#define BROTLI_ENC_ENTROPY_ENCODE_H_
-#include "../common/platform.h"
#include <brotli/types.h>
+#include "../common/platform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
diff --git a/c/enc/entropy_encode_static.h b/c/enc/entropy_encode_static.h
index 2be1c6d..ecff1fe 100644
--- a/c/enc/entropy_encode_static.h
+++ b/c/enc/entropy_encode_static.h
@@ -9,9 +9,10 @@
#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "write_bits.h"
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/fast_log.h b/c/enc/fast_log.h
index 2094f13..f82f4cf 100644
--- a/c/enc/fast_log.h
+++ b/c/enc/fast_log.h
@@ -11,9 +11,10 @@
#include <math.h>
-#include "../common/platform.h"
#include <brotli/types.h>
+#include "../common/platform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
diff --git a/c/enc/find_match_length.h b/c/enc/find_match_length.h
index f8853a7..dee0414 100644
--- a/c/enc/find_match_length.h
+++ b/c/enc/find_match_length.h
@@ -9,9 +9,10 @@
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
-#include "../common/platform.h"
#include <brotli/types.h>
+#include "../common/platform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
diff --git a/c/enc/hash.h b/c/enc/hash.h
index 9ead9e6..fc6e334 100644
--- a/c/enc/hash.h
+++ b/c/enc/hash.h
@@ -13,10 +13,12 @@
#include <stdlib.h> /* exit */
#include <string.h> /* memcmp, memset */
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
-#include <brotli/types.h>
+#include "compound_dictionary.h"
#include "encoder_dict.h"
#include "fast_log.h"
#include "find_match_length.h"
@@ -511,7 +513,6 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch(
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length, const size_t distance_offset,
const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
- const uint32_t source_offset = self->source_offset;
const uint32_t source_size = self->source_size;
const size_t boundary = distance_offset - source_size;
const uint32_t hash_bits = self->hash_bits;
@@ -525,7 +526,7 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch(
const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
- const uint8_t* source = (uint8_t*)(&items[source_offset]);
+ const uint8_t* source = NULL;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
score_t best_score = out->score;
@@ -539,6 +540,15 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch(
const uint32_t head = heads[key];
const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
uint32_t item = (head == 0xFFFF) ? 1 : 0;
+
+ const void* tail = (void*)&items[self->num_items];
+ if (self->magic == kPreparedDictionaryMagic) {
+ source = (const uint8_t*)tail;
+ } else {
+ /* kLeanPreparedDictionaryMagic */
+ source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
+ }
+
for (i = 0; i < 4; ++i) {
const size_t distance = (size_t)distance_cache[i];
size_t offset;
@@ -608,7 +618,6 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length,
const size_t max_length, const size_t distance_offset,
const size_t max_distance, BackwardMatch* matches, size_t match_limit) {
- const uint32_t source_offset = self->source_offset;
const uint32_t source_size = self->source_size;
const uint32_t hash_bits = self->hash_bits;
const uint32_t bucket_bits = self->bucket_bits;
@@ -621,7 +630,7 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
- const uint8_t* source = (uint8_t*)(&items[source_offset]);
+ const uint8_t* source = NULL;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
size_t best_len = min_length;
@@ -634,6 +643,15 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
uint32_t item = (head == 0xFFFF) ? 1 : 0;
size_t found = 0;
+
+ const void* tail = (void*)&items[self->num_items];
+ if (self->magic == kPreparedDictionaryMagic) {
+ source = (const uint8_t*)tail;
+ } else {
+ /* kLeanPreparedDictionaryMagic */
+ source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
+ }
+
while (item == 0) {
size_t offset;
size_t distance;
diff --git a/c/enc/histogram.h b/c/enc/histogram.h
index b213a8b..d1abd97 100644
--- a/c/enc/histogram.h
+++ b/c/enc/histogram.h
@@ -11,10 +11,11 @@
#include <string.h> /* memset */
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "block_splitter.h"
#include "command.h"
diff --git a/c/enc/literal_cost.c b/c/enc/literal_cost.c
index 4e5068e..2ac847f 100644
--- a/c/enc/literal_cost.c
+++ b/c/enc/literal_cost.c
@@ -11,8 +11,9 @@
#include <string.h> /* memset */
-#include "../common/platform.h"
#include <brotli/types.h>
+
+#include "../common/platform.h"
#include "fast_log.h"
#include "utf8_util.h"
diff --git a/c/enc/literal_cost.h b/c/enc/literal_cost.h
index efc8e17..284a8e5 100644
--- a/c/enc/literal_cost.h
+++ b/c/enc/literal_cost.h
@@ -10,9 +10,10 @@
#ifndef BROTLI_ENC_LITERAL_COST_H_
#define BROTLI_ENC_LITERAL_COST_H_
-#include "../common/platform.h"
#include <brotli/types.h>
+#include "../common/platform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
diff --git a/c/enc/memory.c b/c/enc/memory.c
index f3afebc..51e1b7f 100644
--- a/c/enc/memory.c
+++ b/c/enc/memory.c
@@ -12,9 +12,10 @@
#include <stdlib.h> /* exit, free, malloc */
#include <string.h> /* memcpy */
-#include "../common/platform.h"
#include <brotli/types.h>
+#include "../common/platform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
diff --git a/c/enc/memory.h b/c/enc/memory.h
index 13b23d4..cbe4e30 100644
--- a/c/enc/memory.h
+++ b/c/enc/memory.h
@@ -11,9 +11,10 @@
#include <string.h> /* memcpy */
-#include "../common/platform.h"
#include <brotli/types.h>
+#include "../common/platform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
diff --git a/c/enc/metablock.c b/c/enc/metablock.c
index 47b577b..0c5c078 100644
--- a/c/enc/metablock.c
+++ b/c/enc/metablock.c
@@ -9,10 +9,11 @@
#include "metablock.h"
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "bit_cost.h"
#include "block_splitter.h"
#include "cluster.h"
diff --git a/c/enc/metablock.h b/c/enc/metablock.h
index 50bd294..db38f8f 100644
--- a/c/enc/metablock.h
+++ b/c/enc/metablock.h
@@ -10,9 +10,10 @@
#ifndef BROTLI_ENC_METABLOCK_H_
#define BROTLI_ENC_METABLOCK_H_
+#include <brotli/types.h>
+
#include "../common/context.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "block_splitter.h"
#include "command.h"
#include "histogram.h"
diff --git a/c/enc/params.h b/c/enc/params.h
index cc74279..baeb319 100644
--- a/c/enc/params.h
+++ b/c/enc/params.h
@@ -10,6 +10,7 @@
#define BROTLI_ENC_PARAMS_H_
#include <brotli/encode.h>
+
#include "encoder_dict.h"
typedef struct BrotliHasherParams {
diff --git a/c/enc/prefix.h b/c/enc/prefix.h
index b58d50b..0f006f1 100644
--- a/c/enc/prefix.h
+++ b/c/enc/prefix.h
@@ -10,9 +10,10 @@
#ifndef BROTLI_ENC_PREFIX_H_
#define BROTLI_ENC_PREFIX_H_
+#include <brotli/types.h>
+
#include "../common/constants.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "fast_log.h"
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/quality.h b/c/enc/quality.h
index 392ab00..99891b4 100644
--- a/c/enc/quality.h
+++ b/c/enc/quality.h
@@ -10,8 +10,9 @@
#ifndef BROTLI_ENC_QUALITY_H_
#define BROTLI_ENC_QUALITY_H_
-#include "../common/platform.h"
#include <brotli/encode.h>
+
+#include "../common/platform.h"
#include "params.h"
#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
diff --git a/c/enc/ringbuffer.h b/c/enc/ringbuffer.h
index 0db88cf..27245b7 100644
--- a/c/enc/ringbuffer.h
+++ b/c/enc/ringbuffer.h
@@ -11,8 +11,9 @@
#include <string.h> /* memcpy */
-#include "../common/platform.h"
#include <brotli/types.h>
+
+#include "../common/platform.h"
#include "memory.h"
#include "quality.h"
diff --git a/c/enc/state.h b/c/enc/state.h
new file mode 100644
index 0000000..cb82987
--- /dev/null
+++ b/c/enc/state.h
@@ -0,0 +1,104 @@
+/* Copyright 2022 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Encoder state. */
+
+#ifndef BROTLI_ENC_STATE_H_
+#define BROTLI_ENC_STATE_H_
+
+#include <brotli/types.h>
+
+#include "command.h"
+#include "compress_fragment.h"
+#include "compress_fragment_two_pass.h"
+#include "hash.h"
+#include "memory.h"
+#include "params.h"
+#include "ringbuffer.h"
+
+typedef enum BrotliEncoderStreamState {
+ /* Default state. */
+ BROTLI_STREAM_PROCESSING = 0,
+ /* Intermediate state; after next block is emitted, byte-padding should be
+ performed before getting back to default state. */
+ BROTLI_STREAM_FLUSH_REQUESTED = 1,
+ /* Last metablock was produced; no more input is acceptable. */
+ BROTLI_STREAM_FINISHED = 2,
+ /* Flushing compressed block and writing meta-data block header. */
+ BROTLI_STREAM_METADATA_HEAD = 3,
+ /* Writing metadata block body. */
+ BROTLI_STREAM_METADATA_BODY = 4
+} BrotliEncoderStreamState;
+
+typedef enum BrotliEncoderFlintState {
+ BROTLI_FLINT_NEEDS_2_BYTES = 2,
+ BROTLI_FLINT_NEEDS_1_BYTE = 1,
+ BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
+ BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
+ BROTLI_FLINT_DONE = -2
+} BrotliEncoderFlintState;
+
+typedef struct BrotliEncoderStateStruct {
+ BrotliEncoderParams params;
+
+ MemoryManager memory_manager_;
+
+ uint64_t input_pos_;
+ RingBuffer ringbuffer_;
+ size_t cmd_alloc_size_;
+ Command* commands_;
+ size_t num_commands_;
+ size_t num_literals_;
+ size_t last_insert_len_;
+ uint64_t last_flush_pos_;
+ uint64_t last_processed_pos_;
+ int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
+ int saved_dist_cache_[4];
+ uint16_t last_bytes_;
+ uint8_t last_bytes_bits_;
+ /* "Flint" is a tiny uncompressed block emitted before the continuation
+ block to unwire literal context from previous data. Despite being int8_t,
+ field is actually BrotliEncoderFlintState enum. */
+ int8_t flint_;
+ uint8_t prev_byte_;
+ uint8_t prev_byte2_;
+ size_t storage_size_;
+ uint8_t* storage_;
+
+ Hasher hasher_;
+
+ /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
+ int small_table_[1 << 10]; /* 4KiB */
+ int* large_table_; /* Allocated only when needed */
+ size_t large_table_size_;
+
+ BrotliOnePassArena* one_pass_arena_;
+ BrotliTwoPassArena* two_pass_arena_;
+
+ /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
+ uint32_t* command_buf_;
+ uint8_t* literal_buf_;
+
+ uint64_t total_in_;
+ uint8_t* next_out_;
+ size_t available_out_;
+ uint64_t total_out_;
+ /* Temporary buffer for padding flush bits or metadata block header / body. */
+ union {
+ uint64_t u64[2];
+ uint8_t u8[16];
+ } tiny_buf_;
+ uint32_t remaining_metadata_bytes_;
+ BrotliEncoderStreamState stream_state_;
+
+ BROTLI_BOOL is_last_block_emitted_;
+ BROTLI_BOOL is_initialized_;
+} BrotliEncoderStateStruct;
+
+typedef struct BrotliEncoderStateStruct BrotliEncoderStateInternal;
+#define BrotliEncoderState BrotliEncoderStateInternal
+
+#endif // BROTLI_ENC_STATE_H_
diff --git a/c/enc/static_dict.h b/c/enc/static_dict.h
index f572bc6..ab83220 100644
--- a/c/enc/static_dict.h
+++ b/c/enc/static_dict.h
@@ -9,9 +9,10 @@
#ifndef BROTLI_ENC_STATIC_DICT_H_
#define BROTLI_ENC_STATIC_DICT_H_
+#include <brotli/types.h>
+
#include "../common/dictionary.h"
#include "../common/platform.h"
-#include <brotli/types.h>
#include "encoder_dict.h"
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/utf8_util.h b/c/enc/utf8_util.h
index 8fda80c..a38a953 100644
--- a/c/enc/utf8_util.h
+++ b/c/enc/utf8_util.h
@@ -9,9 +9,10 @@
#ifndef BROTLI_ENC_UTF8_UTIL_H_
#define BROTLI_ENC_UTF8_UTIL_H_
-#include "../common/platform.h"
#include <brotli/types.h>
+#include "../common/platform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
diff --git a/c/enc/write_bits.h b/c/enc/write_bits.h
index f6f88b4..242754b 100644
--- a/c/enc/write_bits.h
+++ b/c/enc/write_bits.h
@@ -9,9 +9,10 @@
#ifndef BROTLI_ENC_WRITE_BITS_H_
#define BROTLI_ENC_WRITE_BITS_H_
-#include "../common/platform.h"
#include <brotli/types.h>
+#include "../common/platform.h"
+
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
diff --git a/c/include/brotli/port.h b/c/include/brotli/port.h
index a681ac4..0d50019 100644
--- a/c/include/brotli/port.h
+++ b/c/include/brotli/port.h
@@ -224,14 +224,6 @@
#define BROTLI_HAS_FEATURE(feature) (0)
#endif
-#if defined(ADDRESS_SANITIZER) || BROTLI_HAS_FEATURE(address_sanitizer) || \
- defined(THREAD_SANITIZER) || BROTLI_HAS_FEATURE(thread_sanitizer) || \
- defined(MEMORY_SANITIZER) || BROTLI_HAS_FEATURE(memory_sanitizer)
-#define BROTLI_SANITIZED 1
-#else
-#define BROTLI_SANITIZED 0
-#endif
-
#if defined(_WIN32) || defined(__CYGWIN__)
#define BROTLI_PUBLIC
#elif BROTLI_GNUC_VERSION_CHECK(3, 3, 0) || \
diff --git a/c/tools/brotli.c b/c/tools/brotli.c
index 0ea45d3..80ead72 100644
--- a/c/tools/brotli.c
+++ b/c/tools/brotli.c
@@ -20,11 +20,12 @@
#include <sys/types.h>
#include <time.h>
-#include "../common/constants.h"
-#include "../common/version.h"
#include <brotli/decode.h>
#include <brotli/encode.h>
+#include "../common/constants.h"
+#include "../common/version.h"
+
#if !defined(_WIN32)
#include <unistd.h>
#include <utime.h>
diff --git a/c/tools/brotli.md b/c/tools/brotli.md
index 895c955..cb6d6f3 100644
--- a/c/tools/brotli.md
+++ b/c/tools/brotli.md
@@ -1,15 +1,15 @@
+# NAME
+
brotli(1) -- brotli, unbrotli - compress or decompress files
-================================================================
-SYNOPSIS
---------
+# SYNOPSIS
`brotli` [*OPTION|FILE*]...
`unbrotli` is equivalent to `brotli --decompress`
-DESCRIPTION
------------
+# DESCRIPTION
+
`brotli` is a generic-purpose lossless compression algorithm that compresses
data using a combination of a modern variant of the **LZ77** algorithm, Huffman
coding and 2-nd order context modeling, with a compression ratio comparable to
@@ -52,8 +52,7 @@ Default suffix is `.br`, but it could be specified with `--suffix` option.
Conflicting or duplicate _options_ are not allowed.
-OPTIONS
--------
+# OPTIONS
* `-#`:
compression level (0-9); bigger values cause denser, but slower compression
@@ -81,8 +80,8 @@ OPTIONS
increase output verbosity
* `-w NUM`, `--lgwin=NUM`:
set LZ77 window size (0, 10-24) (default: 24); window size is
- `(2**NUM - 16)`; 0 lets compressor decide over the optimal value; bigger
- windows size improve density; decoder might require up to window size
+ `(pow(2, NUM) - 16)`; 0 lets compressor decide over the optimal value;
+ bigger windows size improve density; decoder might require up to window size
memory to operate
* `-D FILE`, `--dictionary=FILE`:
use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for
@@ -94,8 +93,7 @@ OPTIONS
* `-Z`, `--best`:
use best compression level (default); same as "`-q 11`"
-SEE ALSO
---------
+# SEE ALSO
`brotli` file format is defined in
[RFC 7932](https://www.ietf.org/rfc/rfc7932.txt).
@@ -105,6 +103,6 @@ SEE ALSO
Mailing list: https://groups.google.com/forum/#!forum/brotli
-BUGS
-----
+# BUGS
+
Report bugs at: https://github.com/google/brotli/issues
diff --git a/docs/brotli.1 b/docs/brotli.1
index 1970606..7ca1355 100644
--- a/docs/brotli.1
+++ b/docs/brotli.1
@@ -1,136 +1,129 @@
-.TH "BROTLI" "1" "August 2021" "" "User commands"
-.SH "NAME"
-\fBbrotli\fR \- brotli, unbrotli \- compress or decompress files
+.\" Automatically generated by Pandoc 2.7.3
+.\"
+.TH "brotli" "1" "August 14 2021" "brotli 1.0.9" "User Manual"
+.hy
+.SH NAME
+.PP
+brotli(1) -- brotli, unbrotli - compress or decompress files
.SH SYNOPSIS
-.P
-\fBbrotli\fP [\fIOPTION|FILE\fR]\.\.\.
-.P
-\fBunbrotli\fP is equivalent to \fBbrotli \-\-decompress\fP
+.PP
+\f[B]brotli\f[R] [\f[I]OPTION|FILE\f[R]]\&...
+.PP
+\f[B]unbrotli\f[R] is equivalent to \f[B]brotli --decompress\f[R]
.SH DESCRIPTION
-.P
-\fBbrotli\fP is a generic\-purpose lossless compression algorithm that compresses
-data using a combination of a modern variant of the \fBLZ77\fR algorithm, Huffman
-coding and 2\-nd order context modeling, with a compression ratio comparable to
-the best currently available general\-purpose compression methods\. It is similar
-in speed with deflate but offers more dense compression\.
-.P
-\fBbrotli\fP command line syntax similar to \fBgzip (1)\fP and \fBzstd (1)\fP\|\.
-Unlike \fBgzip (1)\fP, source files are preserved by default\. It is possible to
-remove them after processing by using the \fB\-\-rm\fP \fIoption\fR\|\.
-.P
-Arguments that look like "\fB\-\-name\fP" or "\fB\-\-name=value\fP" are \fIoptions\fR\|\. Every
-\fIoption\fR has a short form "\fB\-x\fP" or "\fB\-x value\fP"\. Multiple short form \fIoptions\fR
-could be coalesced:
-.RS 0
-.IP \(bu 2
-"\fB\-\-decompress \-\-stdout \-\-suffix=\.b\fP" works the same as
-.IP \(bu 2
-"\fB\-d \-s \-S \.b\fP" and
-.IP \(bu 2
-"\fB\-dsS \.b\fP"
-
-.RE
-.P
-\fBbrotli\fP has 3 operation modes:
-.RS 0
-.IP \(bu 2
+.PP
+\f[B]brotli\f[R] is a generic-purpose lossless compression algorithm
+that compresses data using a combination of a modern variant of the
+\f[B]LZ77\f[R] algorithm, Huffman coding and 2-nd order context
+modeling, with a compression ratio comparable to the best currently
+available general-purpose compression methods.
+It is similar in speed with deflate but offers more dense compression.
+.PP
+\f[B]brotli\f[R] command line syntax similar to \f[B]gzip (1)\f[R] and
+\f[B]zstd (1)\f[R].
+Unlike \f[B]gzip (1)\f[R], source files are preserved by default.
+It is possible to remove them after processing by using the
+\f[B]--rm\f[R] \f[I]option\f[R].
+.PP
+Arguments that look like \[lq]\f[B]--name\f[R]\[rq] or
+\[lq]\f[B]--name=value\f[R]\[rq] are \f[I]options\f[R].
+Every \f[I]option\f[R] has a short form \[lq]\f[B]-x\f[R]\[rq] or
+\[lq]\f[B]-x value\f[R]\[rq].
+Multiple short form \f[I]options\f[R] could be coalesced:
+.IP \[bu] 2
+\[lq]\f[B]--decompress --stdout --suffix=.b\f[R]\[rq] works the same as
+.IP \[bu] 2
+\[lq]\f[B]-d -s -S .b\f[R]\[rq] and
+.IP \[bu] 2
+\[lq]\f[B]-dsS .b\f[R]\[rq]
+.PP
+\f[B]brotli\f[R] has 3 operation modes:
+.IP \[bu] 2
default mode is compression;
-.IP \(bu 2
-\fB\-\-decompress\fP option activates decompression mode;
-.IP \(bu 2
-\fB\-\-test\fP option switches to integrity test mode; this option is equivalent to
-"\fB\-\-decompress \-\-stdout\fP" except that the decompressed data is discarded
-instead of being written to standard output\.
-
-.RE
-.P
-Every non\-option argument is a \fIfile\fR entry\. If no \fIfiles\fR are given or \fIfile\fR
-is "\fB\-\fP", \fBbrotli\fP reads from standard input\. All arguments after "\fB\-\-\fP" are
-\fIfile\fR entries\.
-.P
-Unless \fB\-\-stdout\fP or \fB\-\-output\fP is specified, \fIfiles\fR are written to a new file
-whose name is derived from the source \fIfile\fR name:
-.RS 0
-.IP \(bu 2
-when compressing, a suffix is appended to the source filename to
-get the target filename
-.IP \(bu 2
-when decompressing, a suffix is removed from the source filename to
-get the target filename
-
-.RE
-.P
-Default suffix is \fB\|\.br\fP, but it could be specified with \fB\-\-suffix\fP option\.
-.P
-Conflicting or duplicate \fIoptions\fR are not allowed\.
+.IP \[bu] 2
+\f[B]--decompress\f[R] option activates decompression mode;
+.IP \[bu] 2
+\f[B]--test\f[R] option switches to integrity test mode; this option is
+equivalent to \[lq]\f[B]--decompress --stdout\f[R]\[rq] except that the
+decompressed data is discarded instead of being written to standard
+output.
+.PP
+Every non-option argument is a \f[I]file\f[R] entry.
+If no \f[I]files\f[R] are given or \f[I]file\f[R] is
+\[lq]\f[B]-\f[R]\[rq], \f[B]brotli\f[R] reads from standard input.
+All arguments after \[lq]\f[B]--\f[R]\[rq] are \f[I]file\f[R] entries.
+.PP
+Unless \f[B]--stdout\f[R] or \f[B]--output\f[R] is specified,
+\f[I]files\f[R] are written to a new file whose name is derived from the
+source \f[I]file\f[R] name:
+.IP \[bu] 2
+when compressing, a suffix is appended to the source filename to get the
+target filename
+.IP \[bu] 2
+when decompressing, a suffix is removed from the source filename to get
+the target filename
+.PP
+Default suffix is \f[B].br\f[R], but it could be specified with
+\f[B]--suffix\f[R] option.
+.PP
+Conflicting or duplicate \f[I]options\f[R] are not allowed.
.SH OPTIONS
-.RS 0
-.IP \(bu 2
-\fB\-#\fP:
- compression level (0\-9); bigger values cause denser, but slower compression
-.IP \(bu 2
-\fB\-c\fP, \fB\-\-stdout\fP:
- write on standard output
-.IP \(bu 2
-\fB\-d\fP, \fB\-\-decompress\fP:
- decompress mode
-.IP \(bu 2
-\fB\-f\fP, \fB\-\-force\fP:
- force output file overwrite
-.IP \(bu 2
-\fB\-h\fP, \fB\-\-help\fP:
- display this help and exit
-.IP \(bu 2
-\fB\-j\fP, \fB\-\-rm\fP:
- remove source file(s); \fBgzip (1)\fP\-like behaviour
-.IP \(bu 2
-\fB\-k\fP, \fB\-\-keep\fP:
- keep source file(s); \fBzstd (1)\fP\-like behaviour
-.IP \(bu 2
-\fB\-n\fP, \fB\-\-no\-copy\-stat\fP:
- do not copy source file(s) attributes
-.IP \(bu 2
-\fB\-o FILE\fP, \fB\-\-output=FILE\fP
- output file; valid only if there is a single input entry
-.IP \(bu 2
-\fB\-q NUM\fP, \fB\-\-quality=NUM\fP:
- compression level (0\-11); bigger values cause denser, but slower compression
-.IP \(bu 2
-\fB\-t\fP, \fB\-\-test\fP:
- test file integrity mode
-.IP \(bu 2
-\fB\-v\fP, \fB\-\-verbose\fP:
- increase output verbosity
-.IP \(bu 2
-\fB\-w NUM\fP, \fB\-\-lgwin=NUM\fP:
- set LZ77 window size (0, 10\-24) (default: 24); window size is
- \fB(2**NUM \- 16)\fP; 0 lets compressor decide over the optimal value; bigger
- windows size improve density; decoder might require up to window size
- memory to operate
-.IP \(bu 2
-\fB\-D FILE\fP, \fB\-\-dictionary=FILE\fP:
- use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for
- compression and decompression
-.IP \(bu 2
-\fB\-S SUF\fP, \fB\-\-suffix=SUF\fP:
- output file suffix (default: \fB\|\.br\fP)
-.IP \(bu 2
-\fB\-V\fP, \fB\-\-version\fP:
- display version and exit
-.IP \(bu 2
-\fB\-Z\fP, \fB\-\-best\fP:
- use best compression level (default); same as "\fB\-q 11\fP"
-
-.RE
+.IP \[bu] 2
+\f[B]-#\f[R]: compression level (0-9); bigger values cause denser, but
+slower compression
+.IP \[bu] 2
+\f[B]-c\f[R], \f[B]--stdout\f[R]: write on standard output
+.IP \[bu] 2
+\f[B]-d\f[R], \f[B]--decompress\f[R]: decompress mode
+.IP \[bu] 2
+\f[B]-f\f[R], \f[B]--force\f[R]: force output file overwrite
+.IP \[bu] 2
+\f[B]-h\f[R], \f[B]--help\f[R]: display this help and exit
+.IP \[bu] 2
+\f[B]-j\f[R], \f[B]--rm\f[R]: remove source file(s); \f[B]gzip
+(1)\f[R]-like behaviour
+.IP \[bu] 2
+\f[B]-k\f[R], \f[B]--keep\f[R]: keep source file(s); \f[B]zstd
+(1)\f[R]-like behaviour
+.IP \[bu] 2
+\f[B]-n\f[R], \f[B]--no-copy-stat\f[R]: do not copy source file(s)
+attributes
+.IP \[bu] 2
+\f[B]-o FILE\f[R], \f[B]--output=FILE\f[R] output file; valid only if
+there is a single input entry
+.IP \[bu] 2
+\f[B]-q NUM\f[R], \f[B]--quality=NUM\f[R]: compression level (0-11);
+bigger values cause denser, but slower compression
+.IP \[bu] 2
+\f[B]-t\f[R], \f[B]--test\f[R]: test file integrity mode
+.IP \[bu] 2
+\f[B]-v\f[R], \f[B]--verbose\f[R]: increase output verbosity
+.IP \[bu] 2
+\f[B]-w NUM\f[R], \f[B]--lgwin=NUM\f[R]: set LZ77 window size (0, 10-24)
+(default: 24); window size is \f[B](pow(2, NUM) - 16)\f[R]; 0 lets
+compressor decide over the optimal value; bigger windows size improve
+density; decoder might require up to window size memory to operate
+.IP \[bu] 2
+\f[B]-D FILE\f[R], \f[B]--dictionary=FILE\f[R]: use FILE as raw (LZ77)
+dictionary; same dictionary MUST be used both for compression and
+decompression
+.IP \[bu] 2
+\f[B]-S SUF\f[R], \f[B]--suffix=SUF\f[R]: output file suffix (default:
+\f[B].br\f[R])
+.IP \[bu] 2
+\f[B]-V\f[R], \f[B]--version\f[R]: display version and exit
+.IP \[bu] 2
+\f[B]-Z\f[R], \f[B]--best\f[R]: use best compression level (default);
+same as \[lq]\f[B]-q 11\f[R]\[rq]
.SH SEE ALSO
-.P
-\fBbrotli\fP file format is defined in
-RFC 7932 \fIhttps://www\.ietf\.org/rfc/rfc7932\.txt\fR\|\.
-.P
-\fBbrotli\fP is open\-sourced under the
-MIT License \fIhttps://opensource\.org/licenses/MIT\fR\|\.
-.P
-Mailing list: https://groups\.google\.com/forum/#!forum/brotli
+.PP
+\f[B]brotli\f[R] file format is defined in RFC
+7932 (https://www.ietf.org/rfc/rfc7932.txt).
+.PP
+\f[B]brotli\f[R] is open-sourced under the MIT
+License (https://opensource.org/licenses/MIT).
+.PP
+Mailing list: https://groups.google.com/forum/#!forum/brotli
.SH BUGS
-.P
-Report bugs at: https://github\.com/google/brotli/issues
+.PP
+Report bugs at: https://github.com/google/brotli/issues
diff --git a/go/WORKSPACE b/go/WORKSPACE
index 03d38da..570e250 100644
--- a/go/WORKSPACE
+++ b/go/WORKSPACE
@@ -9,10 +9,10 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "io_bazel_rules_go",
- sha256 = "69de5c704a05ff37862f7e0f5534d4f479418afc21806c887db544a316f3cb6b",
+ sha256 = "2b1641428dff9018f9e85c0384f03ec6c10660d935b750e3fa1492a281a53b0f",
urls = [
- "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.27.0/rules_go-v0.27.0.tar.gz",
- "https://github.com/bazelbuild/rules_go/releases/download/v0.27.0/rules_go-v0.27.0.tar.gz",
+ "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.29.0/rules_go-v0.29.0.zip",
+ "https://github.com/bazelbuild/rules_go/releases/download/v0.29.0/rules_go-v0.29.0.zip",
],
)
@@ -20,4 +20,17 @@ load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains", "go_rules_depe
go_rules_dependencies()
-go_register_toolchains(version = "1.16")
+go_register_toolchains(version = "1.17.1")
+
+http_archive(
+ name = "bazel_gazelle",
+ sha256 = "de69a09dc70417580aabf20a28619bb3ef60d038470c7cf8442fafcf627c21cb",
+ urls = [
+ "https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.24.0/bazel-gazelle-v0.24.0.tar.gz",
+ "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.24.0/bazel-gazelle-v0.24.0.tar.gz",
+ ],
+)
+
+load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies")
+
+gazelle_dependencies()
diff --git a/java/org/brotli/dec/BrotliInputStream.java b/java/org/brotli/dec/BrotliInputStream.java
index 5eca238..7bbe2f6 100644
--- a/java/org/brotli/dec/BrotliInputStream.java
+++ b/java/org/brotli/dec/BrotliInputStream.java
@@ -19,6 +19,14 @@ public class BrotliInputStream extends InputStream {
public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 256;
/**
+ * Value expected by InputStream contract when stream is over.
+ *
+ * In Java it is -1.
+ * In C# it is 0 (should be patched during transpilation).
+ */
+ private static final int END_OF_STREAM_MARKER = -1;
+
+ /**
* Internal buffer used for efficient byte-by-byte reading.
*/
private byte[] buffer;
@@ -112,7 +120,8 @@ public class BrotliInputStream extends InputStream {
if (bufferOffset >= remainingBufferBytes) {
remainingBufferBytes = read(buffer, 0, buffer.length);
bufferOffset = 0;
- if (remainingBufferBytes == -1) {
+ if (remainingBufferBytes == END_OF_STREAM_MARKER) {
+ // Both Java and C# return the same value for EOF on single-byte read.
return -1;
}
}
@@ -151,10 +160,9 @@ public class BrotliInputStream extends InputStream {
state.outputLength = destLen;
state.outputUsed = 0;
Decode.decompress(state);
- if (state.outputUsed == 0) {
- return -1;
- }
- return state.outputUsed + copyLen;
+ copyLen += state.outputUsed;
+ copyLen = (copyLen > 0) ? copyLen : END_OF_STREAM_MARKER;
+ return copyLen;
} catch (BrotliRuntimeException ex) {
throw new IOException("Brotli stream decoding failed", ex);
}
diff --git a/java/org/brotli/dec/Decode.java b/java/org/brotli/dec/Decode.java
index b139ef7..c386995 100644
--- a/java/org/brotli/dec/Decode.java
+++ b/java/org/brotli/dec/Decode.java
@@ -919,6 +919,7 @@ final class Decode {
private static int writeRingBuffer(State s) {
int toWrite = Math.min(s.outputLength - s.outputUsed,
s.ringBufferBytesReady - s.ringBufferBytesWritten);
+ // TODO(eustas): DCHECK(toWrite >= 0)
if (toWrite != 0) {
System.arraycopy(s.ringBuffer, s.ringBufferBytesWritten, s.output,
s.outputOffset + s.outputUsed, toWrite);
diff --git a/java/org/brotli/dec/build_defs.bzl b/java/org/brotli/dec/build_defs.bzl
index fd23a0d..d4f280b 100644
--- a/java/org/brotli/dec/build_defs.bzl
+++ b/java/org/brotli/dec/build_defs.bzl
@@ -5,13 +5,20 @@ _TEST_JVM_FLAGS = [
]
def brotli_java_test(name, main_class = None, jvm_flags = None, **kwargs):
- """test duplication rule that creates 32/64-bit test pair."""
+ """test duplication rule that creates 32/64-bit test pair.
+
+ Args:
+ name: target name prefix
+ main_class: override for test_class
+ jvm_flags: base Java VM options
+ **kwargs: pass-through
+ """
if jvm_flags == None:
jvm_flags = []
jvm_flags = jvm_flags + _TEST_JVM_FLAGS
- test_package = native.package_name().replace("/", ".").replace("javatests.", "")
+ test_package = native.package_name().replace("/", ".").replace("third_party.brotli.java.", "")
if main_class == None:
test_class = test_package + "." + name
@@ -23,6 +30,7 @@ def brotli_java_test(name, main_class = None, jvm_flags = None, **kwargs):
main_class = main_class,
test_class = test_class,
jvm_flags = jvm_flags + ["-DBROTLI_32_BIT_CPU=true"],
+ visibility = ["//visibility:private"],
**kwargs
)
@@ -31,5 +39,6 @@ def brotli_java_test(name, main_class = None, jvm_flags = None, **kwargs):
main_class = main_class,
test_class = test_class,
jvm_flags = jvm_flags + ["-DBROTLI_32_BIT_CPU=false"],
+ visibility = ["//visibility:private"],
**kwargs
)
diff --git a/java/org/brotli/wrapper/dec/decoder_jni.cc b/java/org/brotli/wrapper/dec/decoder_jni.cc
index 3328a1a..42e6bae 100644
--- a/java/org/brotli/wrapper/dec/decoder_jni.cc
+++ b/java/org/brotli/wrapper/dec/decoder_jni.cc
@@ -4,12 +4,12 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
-#include "decoder_jni.h"
-
-#include <new>
+#include "decoder_jni.h" // NOLINT: build/include
#include <brotli/decode.h>
+#include <new>
+
namespace {
/* A structure used to persist the decoder's state in between calls. */
typedef struct DecoderHandle {
diff --git a/java/org/brotli/wrapper/dec/decoder_jni_onload.cc b/java/org/brotli/wrapper/dec/decoder_jni_onload.cc
index 2f93de0..b69f954 100644
--- a/java/org/brotli/wrapper/dec/decoder_jni_onload.cc
+++ b/java/org/brotli/wrapper/dec/decoder_jni_onload.cc
@@ -6,7 +6,7 @@
#include <jni.h>
-#include "decoder_jni.h"
+#include "decoder_jni.h" // NOLINT: build/include
#ifdef __cplusplus
extern "C" {
@@ -36,7 +36,7 @@ JNIEXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved) {
}
jclass clazz =
- env->FindClass("com/google/compression/brotli/wrapper/dec/DecoderJNI");
+ env->FindClass("org/brotli/wrapper/dec/DecoderJNI");
if (clazz == nullptr) {
return -1;
}
diff --git a/java/org/brotli/wrapper/enc/EncoderJNI.java b/java/org/brotli/wrapper/enc/EncoderJNI.java
index 3e77207..b8e32d2 100644
--- a/java/org/brotli/wrapper/enc/EncoderJNI.java
+++ b/java/org/brotli/wrapper/enc/EncoderJNI.java
@@ -30,8 +30,10 @@ class EncoderJNI {
private static class PreparedDictionaryImpl implements PreparedDictionary {
private ByteBuffer data;
+ /** Reference to (non-copied) LZ data. */
+ private ByteBuffer rawData;
- private PreparedDictionaryImpl(ByteBuffer data) {
+ private PreparedDictionaryImpl(ByteBuffer data, ByteBuffer rawData) {
this.data = data;
}
@@ -45,6 +47,7 @@ class EncoderJNI {
try {
ByteBuffer data = this.data;
this.data = null;
+ this.rawData = null;
nativeDestroyDictionary(data);
} finally {
super.finalize();
@@ -66,7 +69,7 @@ class EncoderJNI {
if (dictionaryData == null) {
throw new IllegalStateException("OOM");
}
- return new PreparedDictionaryImpl(dictionaryData);
+ return new PreparedDictionaryImpl(dictionaryData, dictionary);
}
static class Wrapper {
diff --git a/java/org/brotli/wrapper/enc/encoder_jni.cc b/java/org/brotli/wrapper/enc/encoder_jni.cc
index adcc7bf..796908b 100644
--- a/java/org/brotli/wrapper/enc/encoder_jni.cc
+++ b/java/org/brotli/wrapper/enc/encoder_jni.cc
@@ -4,12 +4,11 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
+#include <brotli/encode.h>
#include <jni.h>
#include <new>
-#include <brotli/encode.h>
-
namespace {
/* A structure used to persist the encoder's state in between calls. */
typedef struct EncoderHandle {
diff --git a/python/_brotli.cc b/python/_brotli.cc
index d4075bd..54c7363 100644
--- a/python/_brotli.cc
+++ b/python/_brotli.cc
@@ -2,11 +2,12 @@
#include <Python.h>
#include <bytesobject.h>
#include <structmember.h>
-#include <vector>
-#include "../common/version.h"
+
#include <brotli/decode.h>
#include <brotli/encode.h>
+#include <vector>
+
#if PY_MAJOR_VERSION >= 3
#define PyInt_Check PyLong_Check
#define PyInt_AsLong PyLong_AsLong
@@ -745,8 +746,9 @@ PyMODINIT_FUNC INIT_BROTLI(void) {
PyModule_AddIntConstant(m, "MODE_FONT", (int) BROTLI_MODE_FONT);
char version[16];
+ uint32_t decoderVersion = BrotliDecoderVersion();
snprintf(version, sizeof(version), "%d.%d.%d",
- BROTLI_VERSION >> 24, (BROTLI_VERSION >> 12) & 0xFFF, BROTLI_VERSION & 0xFFF);
+ decoderVersion >> 24, (decoderVersion >> 12) & 0xFFF, decoderVersion & 0xFFF);
PyModule_AddStringConstant(m, "__version__", version);
RETURN_BROTLI;
diff --git a/research/brotli_decoder.c b/research/brotli_decoder.c
index 3febcbd..f50fb34 100644
--- a/research/brotli_decoder.c
+++ b/research/brotli_decoder.c
@@ -7,6 +7,8 @@
#include <stdio.h>
#include <stdlib.h>
+#include <brotli/decode.h>
+
#if !defined(_WIN32)
#include <unistd.h>
#else
@@ -18,8 +20,6 @@
#endif
#endif
-#include <brotli/decode.h>
-
#define BUFFER_SIZE (1u << 20)
typedef struct Context {
diff --git a/scripts/sources.lst b/scripts/sources.lst
index dd50a45..2848cc5 100644
--- a/scripts/sources.lst
+++ b/scripts/sources.lst
@@ -97,6 +97,7 @@ BROTLI_ENC_H = \
c/enc/prefix.h \
c/enc/quality.h \
c/enc/ringbuffer.h \
+ c/enc/state.h \
c/enc/static_dict.h \
c/enc/static_dict_lut.h \
c/enc/utf8_util.h \