aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugene Kliuchnikov <eustas.ru@gmail.com>2021-08-04 14:42:02 +0200
committerGitHub <noreply@github.com>2021-08-04 14:42:02 +0200
commit19d86fb9a60aa7034d4981b69a5b656f5b90017e (patch)
tree603765aee7a83b4777b88152bbe38fa5c25710cb
parent630b5084ee255549d25d6da7ec50b7a53861d95a (diff)
downloadbrotli-19d86fb9a60aa7034d4981b69a5b656f5b90017e.zip
brotli-19d86fb9a60aa7034d4981b69a5b656f5b90017e.tar.gz
brotli-19d86fb9a60aa7034d4981b69a5b656f5b90017e.tar.bz2
Merge-in SharedDictionary feature (#916)
Co-authored-by: Eugene Kliuchnikov <eustas@chromium.org>
-rw-r--r--c/common/shared_dictionary.c515
-rw-r--r--c/common/shared_dictionary_internal.h74
-rw-r--r--c/dec/decode.c194
-rw-r--r--c/dec/state.c10
-rw-r--r--c/dec/state.h19
-rw-r--r--c/enc/backward_references.c61
-rw-r--r--c/enc/backward_references_hq.c98
-rw-r--r--c/enc/backward_references_inc.h36
-rw-r--r--c/enc/compound_dictionary.c200
-rw-r--r--c/enc/compound_dictionary.h60
-rw-r--r--c/enc/encode.c163
-rw-r--r--c/enc/encoder_dict.c597
-rw-r--r--c/enc/encoder_dict.h113
-rw-r--r--c/enc/hash.h200
-rw-r--r--c/enc/params.h3
-rw-r--r--c/enc/static_dict.c56
-rw-r--r--c/include/brotli/decode.h26
-rw-r--r--c/include/brotli/encode.h46
-rw-r--r--c/include/brotli/shared_dictionary.h97
-rw-r--r--c/tools/brotli.c97
-rw-r--r--c/tools/brotli.md3
-rw-r--r--java/org/brotli/wrapper/dec/BUILD14
-rw-r--r--java/org/brotli/wrapper/dec/BrotliDecoderChannel.java5
-rw-r--r--java/org/brotli/wrapper/dec/BrotliInputStream.java5
-rw-r--r--java/org/brotli/wrapper/dec/CornerCasesTest.java33
-rw-r--r--java/org/brotli/wrapper/dec/Decoder.java6
-rw-r--r--java/org/brotli/wrapper/dec/DecoderJNI.java14
-rw-r--r--java/org/brotli/wrapper/dec/decoder_jni.cc50
-rw-r--r--java/org/brotli/wrapper/enc/BUILD7
-rw-r--r--java/org/brotli/wrapper/enc/BrotliEncoderChannel.java6
-rw-r--r--java/org/brotli/wrapper/enc/BrotliOutputStream.java5
-rw-r--r--java/org/brotli/wrapper/enc/Encoder.java23
-rw-r--r--java/org/brotli/wrapper/enc/EncoderJNI.java58
-rw-r--r--java/org/brotli/wrapper/enc/UseCompoundDictionaryTest.java114
-rw-r--r--java/org/brotli/wrapper/enc/encoder_jni.cc87
-rw-r--r--scripts/sources.lst5
-rw-r--r--setup.py4
37 files changed, 3066 insertions, 38 deletions
diff --git a/c/common/shared_dictionary.c b/c/common/shared_dictionary.c
new file mode 100644
index 0000000..c7d05c5
--- /dev/null
+++ b/c/common/shared_dictionary.c
@@ -0,0 +1,515 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Shared Dictionary definition and utilities. */
+
+#include <brotli/shared_dictionary.h>
+
+#include <memory.h>
+#include <stdlib.h> /* malloc, free */
+#include <stdio.h>
+
+#include "./dictionary.h"
+#include "./platform.h"
+#include "./shared_dictionary_internal.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \
+ - SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)
+
+/* Max allowed by spec */
+#define BROTLI_MAX_SIZE_BITS 15u
+
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,
+ BROTLI_BOOL* result) {
+ uint8_t value;
+ size_t position = *pos;
+ if (position >= size) return BROTLI_FALSE; /* past file end */
+ value = encoded[position++];
+ if (value > 1) return BROTLI_FALSE; /* invalid bool */
+ *result = TO_BROTLI_BOOL(value);
+ *pos = position;
+ return BROTLI_TRUE; /* success */
+}
+
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,
+ uint8_t* result) {
+ size_t position = *pos;
+ if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;
+ *result = encoded[position++];
+ *pos = position;
+ return BROTLI_TRUE;
+}
+
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,
+ uint16_t* result) {
+ size_t position = *pos;
+ if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;
+ *result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);
+ position += 2;
+ *pos = position;
+ return BROTLI_TRUE;
+}
+
+/* Reads a varint into a uint32_t, and returns error if it's too large */
+/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
+static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,
+ size_t* pos, uint32_t* result) {
+ int num = 0;
+ uint8_t byte;
+ *result = 0;
+ for (;;) {
+ if (*pos >= size) return BROTLI_FALSE;
+ byte = encoded[(*pos)++];
+ if (num == 4 && byte > 15) return BROTLI_FALSE;
+ *result |= (uint32_t)(byte & 127) << (num * 7);
+ if (byte < 128) return BROTLI_TRUE;
+ num++;
+ }
+}
+
+/* Returns the total length of word list. */
+static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,
+ uint32_t* offsets_by_length) {
+ uint32_t pos = 0;
+ uint32_t i;
+ for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
+ offsets_by_length[i] = pos;
+ if (size_bits_by_length[i] != 0) {
+ pos += i << size_bits_by_length[i];
+ }
+ }
+ return pos;
+}
+
+static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,
+ size_t* pos, BrotliDictionary* out) {
+ size_t offset;
+ size_t i;
+ size_t position = *pos;
+ if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {
+ return BROTLI_FALSE;
+ }
+
+ memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);
+ memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,
+ &encoded[position], BROTLI_NUM_ENCODED_LENGTHS);
+ for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
+ i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
+ if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {
+ return BROTLI_FALSE;
+ }
+ }
+ position += BROTLI_NUM_ENCODED_LENGTHS;
+ offset = BrotliSizeBitsToOffsets(
+ out->size_bits_by_length, out->offsets_by_length);
+
+ out->data = &encoded[position];
+ out->data_size = offset;
+ position += offset;
+ if (position > size) return BROTLI_FALSE;
+ *pos = position;
+ return BROTLI_TRUE;
+}
+
+/* Computes the cutOffTransforms of a BrotliTransforms which already has the
+ transforms data correctly filled in. */
+static void ComputeCutoffTransforms(BrotliTransforms* transforms) {
+ uint32_t i;
+ for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
+ transforms->cutOffTransforms[i] = -1;
+ }
+ for (i = 0; i < transforms->num_transforms; i++) {
+ const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);
+ uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);
+ const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);
+ if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&
+ transforms->cutOffTransforms[type] == -1) {
+ transforms->cutOffTransforms[type] = (int16_t)i;
+ }
+ }
+}
+
+static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,
+ size_t* pos, BrotliTransforms* out, uint16_t* out_table,
+ size_t* out_table_size) {
+ size_t position = *pos;
+ size_t offset = 0;
+ size_t stringlet_count = 0; /* NUM_PREFIX_SUFFIX */
+ size_t data_length = 0;
+
+ /* PREFIX_SUFFIX_LENGTH */
+ if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {
+ return BROTLI_FALSE;
+ }
+ data_length = out->prefix_suffix_size;
+
+ /* Must at least have space for null terminator. */
+ if (data_length < 1) return BROTLI_FALSE;
+ out->prefix_suffix = &encoded[position];
+ if (position + data_length >= size) return BROTLI_FALSE;
+ while (BROTLI_TRUE) {
+ /* STRING_LENGTH */
+ size_t stringlet_len = encoded[position + offset];
+ out_table[stringlet_count] = (uint16_t)offset;
+ stringlet_count++;
+ offset++;
+ if (stringlet_len == 0) {
+ if (offset == data_length) {
+ break;
+ } else {
+ return BROTLI_FALSE;
+ }
+ }
+ if (stringlet_count > 255) return BROTLI_FALSE;
+ offset += stringlet_len;
+ if (offset >= data_length) return BROTLI_FALSE;
+ }
+
+ position += data_length;
+ *pos = position;
+ *out_table_size = (uint16_t)stringlet_count;
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,
+ size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,
+ size_t* prefix_suffix_count) {
+ uint32_t i;
+ BROTLI_BOOL has_params = BROTLI_FALSE;
+ BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;
+ size_t position = *pos;
+ size_t stringlet_cnt = 0;
+ if (position >= size) return BROTLI_FALSE;
+
+ prefix_suffix_ok = ParsePrefixSuffixTable(
+ size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);
+ if (!prefix_suffix_ok) return BROTLI_FALSE;
+ out->prefix_suffix_map = prefix_suffix_table;
+ *prefix_suffix_count = stringlet_cnt;
+
+ out->num_transforms = encoded[position++];
+ out->transforms = &encoded[position];
+ position += (size_t)out->num_transforms * 3;
+ if (position > size) return BROTLI_FALSE;
+ /* Check for errors and read extra parameters. */
+ for (i = 0; i < out->num_transforms; i++) {
+ uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);
+ uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
+ uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);
+ if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;
+ if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;
+ if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;
+ if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||
+ type == BROTLI_TRANSFORM_SHIFT_ALL) {
+ has_params = BROTLI_TRUE;
+ }
+ }
+ if (has_params) {
+ out->params = &encoded[position];
+ position += (size_t)out->num_transforms * 2;
+ if (position > size) return BROTLI_FALSE;
+ for (i = 0; i < out->num_transforms; i++) {
+ uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
+ if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&
+ type != BROTLI_TRANSFORM_SHIFT_ALL) {
+ if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {
+ return BROTLI_FALSE;
+ }
+ }
+ }
+ } else {
+ out->params = NULL;
+ }
+ ComputeCutoffTransforms(out);
+ *pos = position;
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,
+ size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {
+ size_t pos = 0;
+ uint32_t chunk_size = 0;
+ uint8_t num_word_lists;
+ uint8_t num_transform_lists;
+ *is_custom_static_dict = BROTLI_FALSE;
+ *num_prefix = 0;
+
+ /* Skip magic header bytes. */
+ pos += 2;
+
+ /* LZ77_DICTIONARY_LENGTH */
+ if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
+ if (chunk_size != 0) {
+ /* This limitation is not specified but the 32-bit Brotli decoder for now */
+ if (chunk_size > 1073741823) return BROTLI_FALSE;
+ *num_prefix = 1;
+ if (pos + chunk_size > size) return BROTLI_FALSE;
+ pos += chunk_size;
+ }
+
+ if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {
+ return BROTLI_FALSE;
+ }
+ if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {
+ return BROTLI_FALSE;
+ }
+
+ if (num_word_lists > 0 || num_transform_lists > 0) {
+ *is_custom_static_dict = BROTLI_TRUE;
+ }
+
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,
+ BrotliSharedDictionary* dict) {
+ uint32_t i;
+ size_t pos = 0;
+ uint32_t chunk_size = 0;
+ size_t total_prefix_suffix_count = 0;
+ size_t trasform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+ uint16_t temporary_prefix_suffix_table[256];
+
+ /* Skip magic header bytes. */
+ pos += 2;
+
+ /* LZ77_DICTIONARY_LENGTH */
+ if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
+ if (chunk_size != 0) {
+ if (pos + chunk_size > size) return BROTLI_FALSE;
+ dict->prefix_size[dict->num_prefix] = chunk_size;
+ dict->prefix[dict->num_prefix] = &encoded[pos];
+ dict->num_prefix++;
+ /* LZ77_DICTIONARY_LENGTH bytes. */
+ pos += chunk_size;
+ }
+
+ /* NUM_WORD_LISTS */
+ if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {
+ return BROTLI_FALSE;
+ }
+ if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
+ return BROTLI_FALSE;
+ }
+
+ if (dict->num_word_lists != 0) {
+ dict->words_instances = (BrotliDictionary*)dict->alloc_func(
+ dict->memory_manager_opaque,
+ dict->num_word_lists * sizeof(*dict->words_instances));
+ if (!dict->words_instances) return BROTLI_FALSE; /* OOM */
+ }
+ for (i = 0; i < dict->num_word_lists; i++) {
+ if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {
+ return BROTLI_FALSE;
+ }
+ }
+
+ /* NUM_TRANSFORM_LISTS */
+ if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {
+ return BROTLI_FALSE;
+ }
+ if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
+ return BROTLI_FALSE;
+ }
+
+ if (dict->num_transform_lists != 0) {
+ dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(
+ dict->memory_manager_opaque,
+ dict->num_transform_lists * sizeof(*dict->transforms_instances));
+ if (!dict->transforms_instances) return BROTLI_FALSE; /* OOM */
+ }
+ for (i = 0; i < dict->num_transform_lists; i++) {
+ BROTLI_BOOL ok = BROTLI_FALSE;
+ size_t prefix_suffix_count = 0;
+ trasform_list_start[i] = pos;
+ dict->transforms_instances[i].prefix_suffix_map =
+ temporary_prefix_suffix_table;
+ ok = ParseTransformsList(
+ size, encoded, &pos, &dict->transforms_instances[i],
+ temporary_prefix_suffix_table, &prefix_suffix_count);
+ if (!ok) return BROTLI_FALSE;
+ total_prefix_suffix_count += prefix_suffix_count;
+ }
+ if (total_prefix_suffix_count != 0) {
+ dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(
+ dict->memory_manager_opaque,
+ total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));
+ if (!dict->prefix_suffix_maps) return BROTLI_FALSE; /* OOM */
+ }
+ total_prefix_suffix_count = 0;
+ for (i = 0; i < dict->num_transform_lists; i++) {
+ size_t prefix_suffix_count = 0;
+ size_t position = trasform_list_start[i];
+ uint16_t* prefix_suffix_map =
+ &dict->prefix_suffix_maps[total_prefix_suffix_count];
+ BROTLI_BOOL ok = ParsePrefixSuffixTable(
+ size, encoded, &position, &dict->transforms_instances[i],
+ prefix_suffix_map, &prefix_suffix_count);
+ if (!ok) return BROTLI_FALSE;
+ dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;
+ total_prefix_suffix_count += prefix_suffix_count;
+ }
+
+ if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {
+ if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {
+ return BROTLI_FALSE;
+ }
+ if (dict->num_dictionaries == 0 ||
+ dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
+ return BROTLI_FALSE;
+ }
+ for (i = 0; i < dict->num_dictionaries; i++) {
+ uint8_t words_index;
+ uint8_t transforms_index;
+ if (!ReadUint8(encoded, size, &pos, &words_index)) {
+ return BROTLI_FALSE;
+ }
+ if (words_index > dict->num_word_lists) return BROTLI_FALSE;
+ if (!ReadUint8(encoded, size, &pos, &transforms_index)) {
+ return BROTLI_FALSE;
+ }
+ if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;
+ dict->words[i] = words_index == dict->num_word_lists ?
+ BrotliGetDictionary() : &dict->words_instances[words_index];
+ dict->transforms[i] = transforms_index == dict->num_transform_lists ?
+ BrotliGetTransforms(): &dict->transforms_instances[transforms_index];
+ }
+ /* CONTEXT_ENABLED */
+ if (!ReadBool(encoded, size, &pos, &dict->context_based)) {
+ return BROTLI_FALSE;
+ }
+
+ /* CONTEXT_MAP */
+ if (dict->context_based) {
+ for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {
+ if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {
+ return BROTLI_FALSE;
+ }
+ if (dict->context_map[i] >= dict->num_dictionaries) {
+ return BROTLI_FALSE;
+ }
+ }
+ }
+ } else {
+ dict->context_based = BROTLI_FALSE;
+ dict->num_dictionaries = 1;
+ dict->words[0] = BrotliGetDictionary();
+ dict->transforms[0] = BrotliGetTransforms();
+ }
+
+ return BROTLI_TRUE;
+}
+
+/* Decodes shared dictionary and verifies correctness.
+ Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.
+ The BrotliSharedDictionary must already have been initialized. If the
+ BrotliSharedDictionary already contains data, compound dictionaries
+ will be appended, but an error will be returned if it already has
+ custom words or transforms.
+ TODO: link to RFC for shared brotli once published. */
+static BROTLI_BOOL DecodeSharedDictionary(
+ const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {
+ uint32_t num_prefix = 0;
+ BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;
+ BROTLI_BOOL has_custom_static_dict =
+ dict->num_word_lists > 0 || dict->num_transform_lists > 0;
+
+ /* Check magic header bytes. */
+ if (size < 2) return BROTLI_FALSE;
+ if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;
+
+ if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {
+ return BROTLI_FALSE;
+ }
+
+ if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {
+ return BROTLI_FALSE;
+ }
+
+ /* Cannot combine different static dictionaries, only prefix dictionaries */
+ if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;
+
+ return ParseDictionary(encoded, size, dict);
+}
+
+void BrotliSharedDictionaryDestroyInstance(
+ BrotliSharedDictionary* dict) {
+ if (!dict) {
+ return;
+ } else {
+ brotli_free_func free_func = dict->free_func;
+ void* opaque = dict->memory_manager_opaque;
+ /* Cleanup. */
+ free_func(opaque, dict->words_instances);
+ free_func(opaque, dict->transforms_instances);
+ free_func(opaque, dict->prefix_suffix_maps);
+ /* Self-destruction. */
+ free_func(opaque, dict);
+ }
+}
+
+BROTLI_BOOL BrotliSharedDictionaryAttach(
+ BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
+ size_t data_size, const uint8_t* data) {
+ if (!dict) {
+ return BROTLI_FALSE;
+ }
+ if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
+ return DecodeSharedDictionary(data, data_size, dict);
+ } else if (type == BROTLI_SHARED_DICTIONARY_RAW) {
+ if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {
+ return BROTLI_FALSE;
+ }
+ dict->prefix_size[dict->num_prefix] = data_size;
+ dict->prefix[dict->num_prefix] = data;
+ dict->num_prefix++;
+ return BROTLI_TRUE;
+ } else {
+ return BROTLI_FALSE;
+ }
+}
+
+BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+ BrotliSharedDictionary* dict = 0;
+ if (!alloc_func && !free_func) {
+ dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));
+ } else if (alloc_func && free_func) {
+ dict = (BrotliSharedDictionary*)alloc_func(
+ opaque, sizeof(BrotliSharedDictionary));
+ }
+ if (dict == 0) {
+ return 0;
+ }
+
+ /* TODO: explicitly initialize all the fields? */
+ memset(dict, 0, sizeof(BrotliSharedDictionary));
+
+ dict->context_based = BROTLI_FALSE;
+ dict->num_dictionaries = 1;
+ dict->num_word_lists = 0;
+ dict->num_transform_lists = 0;
+
+ dict->words[0] = BrotliGetDictionary();
+ dict->transforms[0] = BrotliGetTransforms();
+
+ dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;
+ dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;
+ dict->memory_manager_opaque = opaque;
+
+ return dict;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
diff --git a/c/common/shared_dictionary_internal.h b/c/common/shared_dictionary_internal.h
new file mode 100644
index 0000000..a2b76ee
--- /dev/null
+++ b/c/common/shared_dictionary_internal.h
@@ -0,0 +1,74 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* (Transparent) Shared Dictionary definition. */
+
+#ifndef BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_
+#define BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_
+
+#include "./dictionary.h"
+#include <brotli/shared_dictionary.h>
+#include "./transform.h"
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+struct BrotliSharedDictionaryStruct {
+ /* LZ77 prefixes (compound dictionary). */
+ uint32_t num_prefix; /* max SHARED_BROTLI_MAX_COMPOUND_DICTS */
+ size_t prefix_size[SHARED_BROTLI_MAX_COMPOUND_DICTS];
+ const uint8_t* prefix[SHARED_BROTLI_MAX_COMPOUND_DICTS];
+
+ /* If set, the context map is used to select word and transform list from 64
+ contexts, if not set, the context map is not used and only words[0] and
+ transforms[0] are to be used. */
+ BROTLI_BOOL context_based;
+
+ uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+
+ /* Amount of word_list+transform_list combinations. */
+ uint8_t num_dictionaries;
+
+ /* Must use num_dictionaries values. */
+ const BrotliDictionary* words[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+
+ /* Must use num_dictionaries values. */
+ const BrotliTransforms* transforms[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+
+ /* Amount of custom word lists. May be 0 if only Brotli's built-in is used */
+ uint8_t num_word_lists;
+
+ /* Contents of the custom words lists. Must be NULL if num_word_lists is 0. */
+ BrotliDictionary* words_instances;
+
+ /* Amount of custom transform lists. May be 0 if only Brotli's built-in is
+ used */
+ uint8_t num_transform_lists;
+
+ /* Contents of the custom transform lists. Must be NULL if num_transform_lists
+ is 0. */
+ BrotliTransforms* transforms_instances;
+
+ /* Concatenated prefix_suffix_maps of the custom transform lists. Must be NULL
+ if num_transform_lists is 0. */
+ uint16_t* prefix_suffix_maps;
+
+ /* Memory management */
+ brotli_alloc_func alloc_func;
+ brotli_free_func free_func;
+ void* memory_manager_opaque;
+};
+
+typedef struct BrotliSharedDictionaryStruct BrotliSharedDictionaryInternal;
+#define BrotliSharedDictionary BrotliSharedDictionaryInternal
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_ */
diff --git a/c/dec/decode.c b/c/dec/decode.c
index 7eee968..12cd502 100644
--- a/c/dec/decode.c
+++ b/c/dec/decode.c
@@ -13,6 +13,7 @@
#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
+#include "../common/shared_dictionary_internal.h"
#include "../common/transform.h"
#include "../common/version.h"
#include "./bit_reader.h"
@@ -42,8 +43,8 @@ extern "C" {
/* We need the slack region for the following reasons:
- doing up to two 16-byte copies for fast backward copying
- inserting transformed dictionary word:
- 5 prefix + 24 base + 8 suffix */
-static const uint32_t kRingBufferWriteAheadSlack = 42;
+ 255 prefix + 32 base + 255 suffix */
+static const uint32_t kRingBufferWriteAheadSlack = 542;
static const uint8_t kCodeLengthCodeOrder[BROTLI_CODE_LENGTH_CODES] = {
1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -1403,6 +1404,114 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
BROTLI_DCHECK(0); /* Unreachable */
}
+static BROTLI_BOOL AttachCompoundDictionary(
+ BrotliDecoderState* state, const uint8_t* data, size_t size) {
+ BrotliDecoderCompoundDictionary* addon = state->compound_dictionary;
+ if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
+ if (!addon) {
+ addon = (BrotliDecoderCompoundDictionary*)BROTLI_DECODER_ALLOC(
+ state, sizeof(BrotliDecoderCompoundDictionary));
+ if (!addon) return BROTLI_FALSE;
+ addon->num_chunks = 0;
+ addon->total_size = 0;
+ addon->br_length = 0;
+ addon->br_copied = 0;
+ addon->block_bits = -1;
+ addon->chunk_offsets[0] = 0;
+ state->compound_dictionary = addon;
+ }
+ if (addon->num_chunks == 15) return BROTLI_FALSE;
+ addon->chunks[addon->num_chunks] = data;
+ addon->num_chunks++;
+ addon->total_size += (int)size;
+ addon->chunk_offsets[addon->num_chunks] = addon->total_size;
+ return BROTLI_TRUE;
+}
+
+static void EnsureCoumpoundDictionaryInitialized(BrotliDecoderState* state) {
+ BrotliDecoderCompoundDictionary* addon = state->compound_dictionary;
+ /* 256 = (1 << 8) slots in block map. */
+ int block_bits = 8;
+ int cursor = 0;
+ int index = 0;
+ if (addon->block_bits != -1) return;
+ while (((addon->total_size - 1) >> block_bits) != 0) block_bits++;
+ block_bits -= 8;
+ addon->block_bits = block_bits;
+ while (cursor < addon->total_size) {
+ while (addon->chunk_offsets[index + 1] < cursor) index++;
+ addon->block_map[cursor >> block_bits] = (uint8_t)index;
+ cursor += 1 << block_bits;
+ }
+}
+
+static BROTLI_BOOL InitializeCompoundDictionaryCopy(BrotliDecoderState* s,
+ int address, int length) {
+ BrotliDecoderCompoundDictionary* addon = s->compound_dictionary;
+ int index;
+ EnsureCoumpoundDictionaryInitialized(s);
+ index = addon->block_map[address >> addon->block_bits];
+ while (address >= addon->chunk_offsets[index + 1]) index++;
+ if (addon->total_size < address + length) return BROTLI_FALSE;
+ /* Update the recent distances cache. */
+ s->dist_rb[s->dist_rb_idx & 3] = s->distance_code;
+ ++s->dist_rb_idx;
+ s->meta_block_remaining_len -= length;
+ addon->br_index = index;
+ addon->br_offset = address - addon->chunk_offsets[index];
+ addon->br_length = length;
+ addon->br_copied = 0;
+ return BROTLI_TRUE;
+}
+
+static int GetCompoundDictionarySize(BrotliDecoderState* s) {
+ return s->compound_dictionary ? s->compound_dictionary->total_size : 0;
+}
+
+static int CopyFromCompoundDictionary(BrotliDecoderState* s, int pos) {
+ BrotliDecoderCompoundDictionary* addon = s->compound_dictionary;
+ int orig_pos = pos;
+ while (addon->br_length != addon->br_copied) {
+ uint8_t* copy_dst = &s->ringbuffer[pos];
+ const uint8_t* copy_src =
+ addon->chunks[addon->br_index] + addon->br_offset;
+ int space = s->ringbuffer_size - pos;
+ int rem_chunk_length = (addon->chunk_offsets[addon->br_index + 1] -
+ addon->chunk_offsets[addon->br_index]) - addon->br_offset;
+ int length = addon->br_length - addon->br_copied;
+ if (length > rem_chunk_length) length = rem_chunk_length;
+ if (length > space) length = space;
+ memcpy(copy_dst, copy_src, (size_t)length);
+ pos += length;
+ addon->br_offset += length;
+ addon->br_copied += length;
+ if (length == rem_chunk_length) {
+ addon->br_index++;
+ addon->br_offset = 0;
+ }
+ if (pos == s->ringbuffer_size) break;
+ }
+ return pos - orig_pos;
+}
+
+BROTLI_BOOL BrotliDecoderAttachDictionary(BrotliDecoderState* state,
+ BrotliSharedDictionaryType type, size_t data_size, const uint8_t* data) {
+ uint32_t i;
+ uint32_t num_prefix_before = state->dictionary->num_prefix;
+ if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
+ if (!BrotliSharedDictionaryAttach(state->dictionary, type, data_size, data)) {
+ return BROTLI_FALSE;
+ }
+ for (i = num_prefix_before; i < state->dictionary->num_prefix; i++) {
+ if (!AttachCompoundDictionary(
+ state, state->dictionary->prefix[i],
+ state->dictionary->prefix_size[i])) {
+ return BROTLI_FALSE;
+ }
+ }
+ return BROTLI_TRUE;
+}
+
/* Calculates the smallest feasible ring buffer.
If we know the data size is small, do not allocate more ring buffer
@@ -1737,6 +1846,7 @@ static BROTLI_INLINE BrotliDecoderErrorCode ProcessCommandsInternal(
int i = s->loop_counter;
BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
BrotliBitReader* br = &s->br;
+ int compound_dictionary_size = GetCompoundDictionarySize(s);
if (!CheckInputAmount(safe, br, 28)) {
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
@@ -1903,20 +2013,75 @@ CommandPostDecodeLiterals:
pos, s->distance_code, i, s->meta_block_remaining_len));
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DISTANCE);
}
- if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
- i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
- int address = s->distance_code - s->max_distance - 1;
- const BrotliDictionary* words = s->dictionary;
- const BrotliTransforms* transforms = s->transforms;
- int offset = (int)s->dictionary->offsets_by_length[i];
- uint32_t shift = s->dictionary->size_bits_by_length[i];
-
+ if (s->distance_code - s->max_distance - 1 < compound_dictionary_size) {
+ int address = compound_dictionary_size -
+ (s->distance_code - s->max_distance);
+ if (!InitializeCompoundDictionaryCopy(s, address, i)) {
+ return BROTLI_FAILURE(BROTLI_DECODER_ERROR_COMPOUND_DICTIONARY);
+ }
+ pos += CopyFromCompoundDictionary(s, pos);
+ if (pos >= s->ringbuffer_size) {
+ s->state = BROTLI_STATE_COMMAND_POST_WRITE_1;
+ goto saveStateAndReturn;
+ }
+ } else if (i >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
+ i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
+ uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
+ uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];
+ uint8_t dict_id = s->dictionary->context_based ?
+ s->dictionary->context_map[BROTLI_CONTEXT(p1, p2, s->context_lookup)]
+ : 0;
+ const BrotliDictionary* words = s->dictionary->words[dict_id];
+ const BrotliTransforms* transforms = s->dictionary->transforms[dict_id];
+ int offset = (int)words->offsets_by_length[i];
+ uint32_t shift = words->size_bits_by_length[i];
+ int address =
+ s->distance_code - s->max_distance - 1 - compound_dictionary_size;
int mask = (int)BitMask(shift);
int word_idx = address & mask;
int transform_idx = address >> shift;
/* Compensate double distance-ring-buffer roll. */
s->dist_rb_idx += s->distance_context;
offset += word_idx * i;
+ /* If the distance is out of bound, select a next static dictionary if
+ there exist multiple. */
+ if ((transform_idx >= (int)transforms->num_transforms ||
+ words->size_bits_by_length[i] == 0) &&
+ s->dictionary->num_dictionaries > 1) {
+ uint8_t dict_id2;
+ int dist_remaining = address -
+ (int)(((1u << shift) & ~1u)) * (int)transforms->num_transforms;
+ for (dict_id2 = 0; dict_id2 < s->dictionary->num_dictionaries;
+ dict_id2++) {
+ const BrotliDictionary* words2 = s->dictionary->words[dict_id2];
+ if (dict_id2 != dict_id && words2->size_bits_by_length[i] != 0) {
+ const BrotliTransforms* transforms2 =
+ s->dictionary->transforms[dict_id2];
+ uint32_t shift2 = words2->size_bits_by_length[i];
+ int num = (int)((1u << shift2) & ~1u) *
+ (int)transforms2->num_transforms;
+ if (dist_remaining < num) {
+ dict_id = dict_id2;
+ words = words2;
+ transforms = transforms2;
+ address = dist_remaining;
+ shift = shift2;
+ mask = (int)BitMask(shift);
+ word_idx = address & mask;
+ transform_idx = address >> shift;
+ offset = (int)words->offsets_by_length[i] + word_idx * i;
+ break;
+ }
+ dist_remaining -= num;
+ }
+ }
+ }
+ if (BROTLI_PREDICT_FALSE(words->size_bits_by_length[i] == 0)) {
+ BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+ "len: %d bytes left: %d\n",
+ pos, s->distance_code, i, s->meta_block_remaining_len));
+ return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DICTIONARY);
+ }
if (BROTLI_PREDICT_FALSE(!words->data)) {
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET);
}
@@ -1933,6 +2098,10 @@ CommandPostDecodeLiterals:
BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s],"
" transform_idx = %d, transformed: [%.*s]\n",
i, word, transform_idx, len, &s->ringbuffer[pos]));
+ if (len == 0 && s->distance_code <= 120) {
+ BROTLI_LOG(("Invalid length-0 dictionary word after transform\n"));
+ return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_TRANSFORM);
+ }
}
pos += len;
s->meta_block_remaining_len -= len;
@@ -2483,6 +2652,11 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->max_distance = s->max_backward_distance;
}
if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) {
+ BrotliDecoderCompoundDictionary* addon = s->compound_dictionary;
+ if (addon && (addon->br_length != addon->br_copied)) {
+ s->pos += CopyFromCompoundDictionary(s, s->pos);
+ if (s->pos >= s->ringbuffer_size) continue;
+ }
if (s->meta_block_remaining_len == 0) {
/* Next metablock, if any. */
s->state = BROTLI_STATE_METABLOCK_DONE;
diff --git a/c/dec/state.c b/c/dec/state.c
index f847836..66d6820 100644
--- a/c/dec/state.c
+++ b/c/dec/state.c
@@ -8,6 +8,7 @@
#include <stdlib.h> /* free, malloc */
+#include "../common/dictionary.h"
#include <brotli/types.h>
#include "./huffman.h"
@@ -81,8 +82,10 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
s->mtf_upper_bound = 63;
- s->dictionary = BrotliGetDictionary();
- s->transforms = BrotliGetTransforms();
+ s->compound_dictionary = NULL;
+ s->dictionary =
+ BrotliSharedDictionaryCreateInstance(alloc_func, free_func, opaque);
+ if (!s->dictionary) return BROTLI_FALSE;
return BROTLI_TRUE;
}
@@ -129,6 +132,9 @@ void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) {
void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
BrotliDecoderStateCleanupAfterMetablock(s);
+ BROTLI_DECODER_FREE(s, s->compound_dictionary);
+ BrotliSharedDictionaryDestroyInstance(s->dictionary);
+ s->dictionary = NULL;
BROTLI_DECODER_FREE(s, s->ringbuffer);
BROTLI_DECODER_FREE(s, s->block_type_trees);
}
diff --git a/c/dec/state.h b/c/dec/state.h
index 54dab69..7127581 100644
--- a/c/dec/state.h
+++ b/c/dec/state.h
@@ -12,6 +12,7 @@
#include "../common/constants.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
+#include <brotli/shared_dictionary.h>
#include "../common/transform.h"
#include <brotli/types.h>
#include "./bit_reader.h"
@@ -189,6 +190,20 @@ typedef enum {
BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX
} BrotliRunningReadBlockLengthState;
+/* BrotliDecoderState addon, used for Compound Dictionary functionality. */
+typedef struct BrotliDecoderCompoundDictionary {
+ int num_chunks;
+ int total_size;
+ int br_index;
+ int br_offset;
+ int br_length;
+ int br_copied;
+ const uint8_t* chunks[16];
+ int chunk_offsets[16];
+ int block_bits;
+ uint8_t block_map[256];
+} BrotliDecoderCompoundDictionary;
+
typedef struct BrotliMetablockHeaderArena {
BrotliRunningTreeGroupState substate_tree_group;
BrotliRunningContextMapState substate_context_map;
@@ -327,8 +342,8 @@ struct BrotliDecoderStateStruct {
uint8_t* context_map;
uint8_t* context_modes;
- const BrotliDictionary* dictionary;
- const BrotliTransforms* transforms;
+ BrotliSharedDictionary* dictionary;
+ BrotliDecoderCompoundDictionary* compound_dictionary;
uint32_t trivial_literal_contexts[8]; /* 256 bits */
diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c
index a07a617..1ae1146 100644
--- a/c/enc/backward_references.c
+++ b/c/enc/backward_references.c
@@ -9,12 +9,13 @@
#include "./backward_references.h"
#include "../common/constants.h"
-#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./command.h"
+#include "./compound_dictionary.h"
#include "./dictionary_hash.h"
+#include "./encoder_dict.h"
#include "./memory.h"
#include "./quality.h"
@@ -52,6 +53,7 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
#define EXPORT_FN(X) EXPAND_CAT(X, EXPAND_CAT(PREFIX(), HASHER()))
#define PREFIX() N
+#define ENABLE_COMPOUND_DICTIONARY 0
#define HASHER() H2
/* NOLINTNEXTLINE(build/include) */
@@ -113,6 +115,41 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
#include "./backward_references_inc.h"
#undef HASHER
+#undef ENABLE_COMPOUND_DICTIONARY
+#undef PREFIX
+#define PREFIX() D
+#define ENABLE_COMPOUND_DICTIONARY 1
+
+#define HASHER() H5
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+#define HASHER() H6
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+#define HASHER() H40
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+#define HASHER() H41
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+#define HASHER() H42
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+#define HASHER() H55
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+#define HASHER() H65
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#undef ENABLE_COMPOUND_DICTIONARY
#undef PREFIX
#undef EXPORT_FN
@@ -125,6 +162,28 @@ void BrotliCreateBackwardReferences(size_t num_bytes,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals) {
+ if (params->dictionary.compound.num_chunks != 0) {
+ switch (params->hasher.type) {
+#define CASE_(N) \
+ case N: \
+ CreateBackwardReferencesDH ## N(num_bytes, \
+ position, ringbuffer, ringbuffer_mask, \
+ literal_context_lut, params, hasher, dist_cache, \
+ last_insert_len, commands, num_commands, num_literals); \
+ return;
+ CASE_(5)
+ CASE_(6)
+ CASE_(40)
+ CASE_(41)
+ CASE_(42)
+ CASE_(55)
+ CASE_(65)
+#undef CASE_
+ default:
+ break;
+ }
+ }
+
switch (params->hasher.type) {
#define CASE_(N) \
case N: \
diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c
index 61b7ff1..5e17c84 100644
--- a/c/enc/backward_references_hq.c
+++ b/c/enc/backward_references_hq.c
@@ -11,10 +11,11 @@
#include <string.h> /* memcpy, memset */
#include "../common/constants.h"
-#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./command.h"
+#include "./compound_dictionary.h"
+#include "./encoder_dict.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./literal_cost.h"
@@ -430,7 +431,8 @@ static size_t UpdateNodes(
size_t min_len;
size_t result = 0;
size_t k;
- size_t gap = 0;
+ const CompoundDictionary* addon = &params->dictionary.compound;
+ size_t gap = addon->total_size;
EvaluateNode(block_start + stream_offset, pos, max_backward_limit, gap,
starting_dist_cache, model, queue, nodes);
@@ -484,6 +486,24 @@ static size_t UpdateNodes(
len = FindMatchLengthWithLimit(&ringbuffer[prev_ix],
&ringbuffer[cur_ix_masked],
max_len);
+ } else if (backward > dictionary_start) {
+ size_t d = 0;
+ size_t offset;
+ size_t limit;
+ const uint8_t* source;
+ offset = dictionary_start + 1 + addon->total_size - 1;
+ while (offset >= backward + addon->chunk_offsets[d + 1]) d++;
+ source = addon->chunk_source[d];
+ offset = offset - addon->chunk_offsets[d] - backward;
+ limit = addon->chunk_offsets[d + 1] - addon->chunk_offsets[d] - offset;
+ limit = limit > max_len ? max_len : limit;
+ if (best_len >= limit ||
+ continuation != source[offset + best_len]) {
+ continue;
+ }
+ len = FindMatchLengthWithLimit(&source[offset],
+ &ringbuffer[cur_ix_masked],
+ limit);
} else {
/* "Gray" area. It is addressable by decoder, but this encoder
instance does not have that data -> should not touch it. */
@@ -589,7 +609,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes,
size_t pos = 0;
uint32_t offset = nodes[0].u.next;
size_t i;
- size_t gap = 0;
+ size_t gap = params->dictionary.compound.total_size;
for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
const ZopfliNode* next = &nodes[pos + offset];
size_t copy_length = ZopfliNodeCopyLength(next);
@@ -665,6 +685,23 @@ static size_t ZopfliIterate(size_t num_bytes, size_t position,
return ComputeShortestPathFromNodes(num_bytes, nodes);
}
+static void MergeMatches(BackwardMatch* dst,
+ BackwardMatch* src1, size_t len1, BackwardMatch* src2, size_t len2) {
+ while (len1 > 0 && len2 > 0) {
+ size_t l1 = BackwardMatchLength(src1);
+ size_t l2 = BackwardMatchLength(src2);
+ if (l1 < l2 || ((l1 == l2) && (src1->distance < src2->distance))) {
+ *dst++ = *src1++;
+ len1--;
+ } else {
+ *dst++ = *src2++;
+ len2--;
+ }
+ }
+ while (len1-- > 0) *dst++ = *src1++;
+ while (len2-- > 0) *dst++ = *src2++;
+}
+
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
@@ -679,10 +716,11 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
position + num_bytes - StoreLookaheadH10() + 1 : position;
size_t i;
- size_t gap = 0;
- size_t lz_matches_offset = 0;
+ const CompoundDictionary* addon = &params->dictionary.compound;
+ size_t gap = addon->total_size;
+ size_t lz_matches_offset =
+ (addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
- BROTLI_UNUSED(literal_context_lut);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) || BROTLI_IS_NULL(matches)) {
return 0;
}
@@ -700,10 +738,28 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
pos + stream_offset, max_backward_limit);
size_t skip;
size_t num_matches;
+ int dict_id = 0;
+ if (params->dictionary.contextual.context_based) {
+ uint8_t p1 = pos >= 1 ?
+ ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
+ uint8_t p2 = pos >= 2 ?
+ ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
+ dict_id = params->dictionary.contextual.context_map[
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
+ }
num_matches = FindAllMatchesH10(&hasher->privat._H10,
- &params->dictionary,
+ params->dictionary.contextual.dict[dict_id],
ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
dictionary_start + gap, params, &matches[lz_matches_offset]);
+ if (addon->num_chunks != 0) {
+ size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
+ ringbuffer, ringbuffer_mask, pos, 3, num_bytes - i,
+ dictionary_start, params->dist.max_distance,
+ &matches[lz_matches_offset - 64], 64);
+ MergeMatches(matches, &matches[lz_matches_offset - 64], cd_matches,
+ &matches[lz_matches_offset], num_matches);
+ num_matches += cd_matches;
+ }
if (num_matches > 0 &&
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
matches[0] = matches[num_matches - 1];
@@ -774,9 +830,10 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
ZopfliNode* nodes;
BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
- size_t gap = 0;
- size_t shadow_matches = 0;
- BROTLI_UNUSED(literal_context_lut);
+ const CompoundDictionary* addon = &params->dictionary.compound;
+ size_t gap = addon->total_size;
+ size_t shadow_matches =
+ (addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) ||
BROTLI_IS_NULL(num_matches) || BROTLI_IS_NULL(matches)) {
return;
@@ -790,15 +847,34 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
size_t num_found_matches;
size_t cur_match_end;
size_t j;
+ int dict_id = 0;
+ if (params->dictionary.contextual.context_based) {
+ uint8_t p1 = pos >= 1 ?
+ ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
+ uint8_t p2 = pos >= 2 ?
+ ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
+ dict_id = params->dictionary.contextual.context_map[
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
+ }
/* Ensure that we have enough free slots. */
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
if (BROTLI_IS_OOM(m)) return;
num_found_matches = FindAllMatchesH10(&hasher->privat._H10,
- &params->dictionary,
+ params->dictionary.contextual.dict[dict_id],
ringbuffer, ringbuffer_mask, pos, max_length,
max_distance, dictionary_start + gap, params,
&matches[cur_match_pos + shadow_matches]);
+ if (addon->num_chunks != 0) {
+ size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
+ ringbuffer, ringbuffer_mask, pos, 3, max_length,
+ dictionary_start, params->dist.max_distance,
+ &matches[cur_match_pos + shadow_matches - 64], 64);
+ MergeMatches(&matches[cur_match_pos],
+ &matches[cur_match_pos + shadow_matches - 64], cd_matches,
+ &matches[cur_match_pos + shadow_matches], num_found_matches);
+ num_found_matches += cd_matches;
+ }
cur_match_end = cur_match_pos + num_found_matches;
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
BROTLI_DCHECK(BackwardMatchLength(&matches[j]) <=
diff --git a/c/enc/backward_references_inc.h b/c/enc/backward_references_inc.h
index 766bf91..752c12e 100644
--- a/c/enc/backward_references_inc.h
+++ b/c/enc/backward_references_inc.h
@@ -28,13 +28,11 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
const size_t random_heuristics_window_size =
LiteralSpreeLengthForSparseSearch(params);
size_t apply_random_heuristics = position + random_heuristics_window_size;
- const size_t gap = 0;
+ const size_t gap = params->dictionary.compound.total_size;
/* Minimum score to accept a backward reference. */
const score_t kMinScore = BROTLI_SCORE_BASE + 100;
- BROTLI_UNUSED(literal_context_lut);
-
FN(PrepareDistanceCache)(privat, dist_cache);
while (position + FN(HashTypeLength)() < pos_end) {
@@ -43,13 +41,29 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
size_t dictionary_start = BROTLI_MIN(size_t,
position + position_offset, max_backward_limit);
HasherSearchResult sr;
+ int dict_id = 0;
+ uint8_t p1 = 0;
+ uint8_t p2 = 0;
+ if (params->dictionary.contextual.context_based) {
+ p1 = position >= 1 ?
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
+ p2 = position >= 2 ?
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
+ dict_id = params->dictionary.contextual.context_map[
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
+ }
sr.len = 0;
sr.len_code_delta = 0;
sr.distance = 0;
sr.score = kMinScore;
- FN(FindLongestMatch)(privat, &params->dictionary,
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
+ if (ENABLE_COMPOUND_DICTIONARY) {
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
+ ringbuffer_mask, dist_cache, position, max_length,
+ dictionary_start, params->dist.max_distance, &sr);
+ }
if (sr.score > kMinScore) {
/* Found a match. Let's look for something even better ahead. */
int delayed_backward_references_in_row = 0;
@@ -65,11 +79,23 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
dictionary_start = BROTLI_MIN(size_t,
position + 1 + position_offset, max_backward_limit);
+ if (params->dictionary.contextual.context_based) {
+ p2 = p1;
+ p1 = ringbuffer[position & ringbuffer_mask];
+ dict_id = params->dictionary.contextual.context_map[
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
+ }
FN(FindLongestMatch)(privat,
- &params->dictionary,
+ params->dictionary.contextual.dict[dict_id],
ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
max_distance, dictionary_start + gap, params->dist.max_distance,
&sr2);
+ if (ENABLE_COMPOUND_DICTIONARY) {
+ LookupCompoundDictionaryMatch(
+ &params->dictionary.compound, ringbuffer,
+ ringbuffer_mask, dist_cache, position + 1, max_length,
+ dictionary_start, params->dist.max_distance, &sr2);
+ }
if (sr2.score >= sr.score + cost_diff_lazy) {
/* Ok, let's just write one byte for now and start a match from the
next byte. */
diff --git a/c/enc/compound_dictionary.c b/c/enc/compound_dictionary.c
new file mode 100644
index 0000000..f2a3f37
--- /dev/null
+++ b/c/enc/compound_dictionary.c
@@ -0,0 +1,200 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./compound_dictionary.h"
+
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./memory.h"
+#include "./quality.h"
+
+static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
+ const uint8_t* source, size_t source_size, uint32_t bucket_bits,
+ uint32_t slot_bits, uint32_t hash_bits, uint16_t bucket_limit) {
+ /* Step 1: create "bloated" hasher. */
+ uint32_t num_slots = 1u << slot_bits;
+ uint32_t num_buckets = 1u << bucket_bits;
+ uint32_t hash_shift = 64u - bucket_bits;
+ uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
+ uint32_t slot_mask = num_slots - 1;
+ size_t alloc_size = (sizeof(uint32_t) << slot_bits) +
+ (sizeof(uint32_t) << slot_bits) +
+ (sizeof(uint16_t) << bucket_bits) +
+ (sizeof(uint32_t) << bucket_bits) +
+ (sizeof(uint32_t) * source_size);
+ uint8_t* flat = NULL;
+ PreparedDictionary* result = NULL;
+ uint16_t* num = NULL;
+ uint32_t* bucket_heads = NULL;
+ uint32_t* next_bucket = NULL;
+ uint32_t* slot_offsets = NULL;
+ uint16_t* heads = NULL;
+ uint32_t* items = NULL;
+ uint8_t* source_copy = NULL;
+ uint32_t i;
+ uint32_t* slot_size = NULL;
+ uint32_t* slot_limit = NULL;
+ uint32_t total_items = 0;
+ if (slot_bits > 16) return NULL;
+ if (slot_bits > bucket_bits) return NULL;
+ if (bucket_bits - slot_bits >= 16) return NULL;
+
+ flat = BROTLI_ALLOC(m, uint8_t, alloc_size);
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(flat)) return NULL;
+
+ slot_size = (uint32_t*)flat;
+ slot_limit = (uint32_t*)(&slot_size[num_slots]);
+ num = (uint16_t*)(&slot_limit[num_slots]);
+ bucket_heads = (uint32_t*)(&num[num_buckets]);
+ next_bucket = (uint32_t*)(&bucket_heads[num_buckets]);
+ memset(num, 0, num_buckets * sizeof(num[0]));
+
+ /* TODO: apply custom "store" order. */
+ for (i = 0; i + 7 < source_size; ++i) {
+ const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(&source[i]) & hash_mask) *
+ kPreparedDictionaryHashMul64Long;
+ const uint32_t key = (uint32_t)(h >> hash_shift);
+ uint16_t count = num[key];
+ next_bucket[i] = (count == 0) ? ((uint32_t)(-1)) : bucket_heads[key];
+ bucket_heads[key] = i;
+ count++;
+ if (count > bucket_limit) count = bucket_limit;
+ num[key] = count;
+ }
+
+ /* Step 2: find slot limits. */
+ for (i = 0; i < num_slots; ++i) {
+ BROTLI_BOOL overflow = BROTLI_FALSE;
+ slot_limit[i] = bucket_limit;
+ while (BROTLI_TRUE) {
+ uint32_t limit = slot_limit[i];
+ size_t j;
+ uint32_t count = 0;
+ overflow = BROTLI_FALSE;
+ for (j = i; j < num_buckets; j += num_slots) {
+ uint32_t size = num[j];
+ /* Last chain may span behind 64K limit; overflow happens only if
+ we are about to use 0xFFFF+ as item offset. */
+ if (count >= 0xFFFF) {
+ overflow = BROTLI_TRUE;
+ break;
+ }
+ if (size > limit) size = limit;
+ count += size;
+ }
+ if (!overflow) {
+ slot_size[i] = count;
+ total_items += count;
+ break;
+ }
+ slot_limit[i]--;
+ }
+ }
+
+ /* Step 3: transfer data to "slim" hasher. */
+ alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
+ (sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
+ source_size;
+
+ result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
+ BROTLI_FREE(m, flat);
+ return NULL;
+ }
+ slot_offsets = (uint32_t*)(&result[1]);
+ heads = (uint16_t*)(&slot_offsets[num_slots]);
+ items = (uint32_t*)(&heads[num_buckets]);
+ source_copy = (uint8_t*)(&items[total_items]);
+
+ result->magic = kPreparedDictionaryMagic;
+ result->source_offset = total_items;
+ result->source_size = (uint32_t)source_size;
+ result->hash_bits = hash_bits;
+ result->bucket_bits = bucket_bits;
+ result->slot_bits = slot_bits;
+
+ total_items = 0;
+ for (i = 0; i < num_slots; ++i) {
+ slot_offsets[i] = total_items;
+ total_items += slot_size[i];
+ slot_size[i] = 0;
+ }
+ for (i = 0; i < num_buckets; ++i) {
+ uint32_t slot = i & slot_mask;
+ uint32_t count = num[i];
+ uint32_t pos;
+ size_t j;
+ size_t cursor = slot_size[slot];
+ if (count > slot_limit[slot]) count = slot_limit[slot];
+ if (count == 0) {
+ heads[i] = 0xFFFF;
+ continue;
+ }
+ heads[i] = (uint16_t)cursor;
+ cursor += slot_offsets[slot];
+ slot_size[slot] += count;
+ pos = bucket_heads[i];
+ for (j = 0; j < count; j++) {
+ items[cursor++] = pos;
+ pos = next_bucket[pos];
+ }
+ items[cursor - 1] |= 0x80000000;
+ }
+
+ BROTLI_FREE(m, flat);
+ memcpy(source_copy, source, source_size);
+ return result;
+}
+
+PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
+ const uint8_t* source, size_t source_size) {
+ uint32_t bucket_bits = 17;
+ uint32_t slot_bits = 7;
+ uint32_t hash_bits = 40;
+ uint16_t bucket_limit = 32;
+ size_t volume = 16u << bucket_bits;
+ /* Tune parameters to fit dictionary size. */
+ while (volume < source_size && bucket_bits < 22) {
+ bucket_bits++;
+ slot_bits++;
+ volume <<= 1;
+ }
+ return CreatePreparedDictionaryWithParams(m,
+ source, source_size, bucket_bits, slot_bits, hash_bits, bucket_limit);
+}
+
+void DestroyPreparedDictionary(MemoryManager* m,
+ PreparedDictionary* dictionary) {
+ if (!dictionary) return;
+ BROTLI_FREE(m, dictionary);
+}
+
+BROTLI_BOOL AttachPreparedDictionary(
+ CompoundDictionary* compound, const PreparedDictionary* dictionary) {
+ size_t length = 0;
+ size_t index = 0;
+
+ if (compound->num_chunks == SHARED_BROTLI_MAX_COMPOUND_DICTS) {
+ return BROTLI_FALSE;
+ }
+
+ if (!dictionary) return BROTLI_FALSE;
+
+ length = dictionary->source_size;
+ index = compound->num_chunks;
+ compound->total_size += length;
+ compound->chunks[index] = dictionary;
+ compound->chunk_offsets[index + 1] = compound->total_size;
+ {
+ uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
+ uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]);
+ uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]);
+ compound->chunk_source[index] =
+ (const uint8_t*)(&items[dictionary->source_offset]);
+ }
+ compound->num_chunks++;
+ return BROTLI_TRUE;
+}
diff --git a/c/enc/compound_dictionary.h b/c/enc/compound_dictionary.h
new file mode 100644
index 0000000..5e1dfe2
--- /dev/null
+++ b/c/enc/compound_dictionary.h
@@ -0,0 +1,60 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#ifndef BROTLI_ENC_PREPARED_DICTIONARY_H_
+#define BROTLI_ENC_PREPARED_DICTIONARY_H_
+
+#include "../common/platform.h"
+#include "../common/constants.h"
+#include <brotli/shared_dictionary.h>
+#include <brotli/types.h>
+#include "./memory.h"
+
+static const uint32_t kPreparedDictionaryMagic = 0xDEBCEDE0;
+static const uint64_t kPreparedDictionaryHashMul64Long =
+ BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
+
+typedef struct PreparedDictionary {
+ uint32_t magic;
+ uint32_t source_offset;
+ uint32_t source_size;
+ uint32_t hash_bits;
+ uint32_t bucket_bits;
+ uint32_t slot_bits;
+
+ /* --- Dynamic size members --- */
+
+ /* uint32_t slot_offsets[1 << slot_bits]; */
+ /* uint16_t heads[1 << bucket_bits]; */
+ /* uint32_t items[variable]; */
+
+ /* uint8_t source[source_size] */
+} PreparedDictionary;
+
+BROTLI_INTERNAL PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
+ const uint8_t* source, size_t source_size);
+
+BROTLI_INTERNAL void DestroyPreparedDictionary(MemoryManager* m,
+ PreparedDictionary* dictionary);
+
+typedef struct CompoundDictionary {
+ /* LZ77 prefix, compound dictionary */
+ size_t num_chunks;
+ size_t total_size;
+ /* Client instances. */
+ const PreparedDictionary* chunks[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
+ const uint8_t* chunk_source[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
+ size_t chunk_offsets[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
+
+ size_t num_prepared_instances_;
+ /* Owned instances. */
+ PreparedDictionary* prepared_instances_[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
+} CompoundDictionary;
+
+BROTLI_INTERNAL BROTLI_BOOL AttachPreparedDictionary(
+ CompoundDictionary* compound, const PreparedDictionary* dictionary);
+
+#endif /* BROTLI_ENC_PREPARED_DICTIONARY */
diff --git a/c/enc/encode.c b/c/enc/encode.c
index 23b1d3b..4a68c3c 100644
--- a/c/enc/encode.c
+++ b/c/enc/encode.c
@@ -21,6 +21,7 @@
#include "./brotli_bit_stream.h"
#include "./compress_fragment.h"
#include "./compress_fragment_two_pass.h"
+#include "./dictionary_hash.h"
#include "./encoder_dict.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
@@ -746,7 +747,7 @@ static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
params->stream_offset = 0;
params->size_hint = 0;
params->disable_literal_context_modeling = BROTLI_FALSE;
- BrotliInitEncoderDictionary(&params->dictionary);
+ BrotliInitSharedEncoderDictionary(&params->dictionary);
params->dist.distance_postfix_bits = 0;
params->dist.num_direct_distance_codes = 0;
params->dist.alphabet_size_max =
@@ -755,6 +756,11 @@ static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
params->dist.max_distance = BROTLI_MAX_DISTANCE;
}
+static void BrotliEncoderCleanupParams(MemoryManager* m,
+ BrotliEncoderParams* params) {
+ BrotliCleanupSharedEncoderDictionary(m, &params->dictionary);
+}
+
static void BrotliEncoderInitState(BrotliEncoderState* s) {
BrotliEncoderInitParams(&s->params);
s->input_pos_ = 0;
@@ -825,6 +831,7 @@ static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
BROTLI_FREE(m, s->two_pass_arena_);
BROTLI_FREE(m, s->command_buf_);
BROTLI_FREE(m, s->literal_buf_);
+ BrotliEncoderCleanupParams(m, &s->params);
}
/* Deinitializes and frees BrotliEncoderState instance. */
@@ -922,6 +929,8 @@ static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
uint32_t distance_code = CommandRestoreDistanceCode(last_command,
&s->params.dist);
+ const CompoundDictionary* dict = &s->params.dictionary.compound;
+ size_t compound_dictionary_size = dict->total_size;
if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
if (cmd_dist <= max_distance) {
@@ -932,6 +941,38 @@ static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
(*wrapped_last_processed_pos)++;
}
} else {
+ if ((cmd_dist - max_distance - 1) < compound_dictionary_size &&
+ last_copy_len < cmd_dist - max_distance) {
+ size_t address =
+ compound_dictionary_size - (size_t)(cmd_dist - max_distance) +
+ (size_t)last_copy_len;
+ size_t br_index = 0;
+ size_t br_offset;
+ const uint8_t* chunk;
+ size_t chunk_length;
+ while (address >= dict->chunk_offsets[br_index + 1]) br_index++;
+ br_offset = address - dict->chunk_offsets[br_index];
+ chunk = dict->chunk_source[br_index];
+ chunk_length =
+ dict->chunk_offsets[br_index + 1] - dict->chunk_offsets[br_index];
+ while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
+ chunk[br_offset]) {
+ last_command->copy_len_++;
+ (*bytes)--;
+ (*wrapped_last_processed_pos)++;
+ if (++br_offset == chunk_length) {
+ br_index++;
+ br_offset = 0;
+ if (br_index != dict->num_chunks) {
+ chunk = dict->chunk_source[br_index];
+ chunk_length = dict->chunk_offsets[br_index + 1] -
+ dict->chunk_offsets[br_index];
+ } else {
+ break;
+ }
+ }
+ }
+ }
}
/* The copy length is at most the metablock size, and thus expressible. */
GetLengthCode(last_command->insert_len_,
@@ -969,6 +1010,7 @@ static BROTLI_BOOL EncodeData(
data = s->ringbuffer_.buffer_;
mask = s->ringbuffer_.mask_;
+ if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE;
/* Adding more blocks after "last" block is forbidden. */
if (s->is_last_block_emitted_) return BROTLI_FALSE;
if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
@@ -1421,6 +1463,7 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
*encoded_size = total_out_size;
DestroyHasher(m, hasher);
BROTLI_FREE(m, hasher);
+ BrotliEncoderCleanupParams(m, params);
BROTLI_FREE(m, params);
BrotliBootstrapFree(m, m);
return ok;
@@ -1930,6 +1973,124 @@ uint32_t BrotliEncoderVersion(void) {
return BROTLI_VERSION;
}
+BrotliEncoderPreparedDictionary* BrotliEncoderPrepareDictionary(
+ BrotliSharedDictionaryType type, size_t size, const uint8_t* data,
+ int quality,
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+ ManagedDictionary* managed_dictionary = NULL;
+ if (type != BROTLI_SHARED_DICTIONARY_RAW &&
+ type != BROTLI_SHARED_DICTIONARY_SERIALIZED) {
+ return NULL;
+ }
+ managed_dictionary =
+ BrotliCreateManagedDictionary(alloc_func, free_func, opaque);
+ if (managed_dictionary == NULL) {
+ return NULL;
+ }
+ if (type == BROTLI_SHARED_DICTIONARY_RAW) {
+ managed_dictionary->dictionary = (uint32_t*)CreatePreparedDictionary(
+ &managed_dictionary->memory_manager_, data, size);
+ } else {
+ SharedEncoderDictionary* dict = (SharedEncoderDictionary*)BrotliAllocate(
+ &managed_dictionary->memory_manager_, sizeof(SharedEncoderDictionary));
+ managed_dictionary->dictionary = (uint32_t*)dict;
+ if (dict != NULL) {
+ BROTLI_BOOL ok = BrotliInitCustomSharedEncoderDictionary(
+ &managed_dictionary->memory_manager_, data, size, quality, dict);
+ if (!ok) {
+ BrotliFree(&managed_dictionary->memory_manager_, dict);
+ managed_dictionary->dictionary = NULL;
+ }
+ }
+ }
+ if (managed_dictionary->dictionary == NULL) {
+ BrotliDestroyManagedDictionary(managed_dictionary);
+ return NULL;
+ }
+ return (BrotliEncoderPreparedDictionary*)managed_dictionary;
+}
+
+void BrotliEncoderDestroyPreparedDictionary(
+ BrotliEncoderPreparedDictionary* dictionary) {
+ ManagedDictionary* dict = (ManagedDictionary*)dictionary;
+ if (!dictionary) return;
+ /* First field of dictionary structs. */
+ /* Only managed dictionaries are eligible for destruction by this method. */
+ if (dict->magic != kManagedDictionaryMagic) {
+ return;
+ }
+ if (dict->dictionary == NULL) {
+ /* This should never ever happen. */
+ } else if (*dict->dictionary == kPreparedDictionaryMagic) {
+ DestroyPreparedDictionary(
+ &dict->memory_manager_, (PreparedDictionary*)dict->dictionary);
+ } else if (*dict->dictionary == kSharedDictionaryMagic) {
+ BrotliCleanupSharedEncoderDictionary(&dict->memory_manager_,
+ (SharedEncoderDictionary*)dict->dictionary);
+ BrotliFree(&dict->memory_manager_, dict->dictionary);
+ } else {
+ /* This should never ever happen. */
+ }
+ dict->dictionary = NULL;
+ BrotliDestroyManagedDictionary(dict);
+}
+
+BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state,
+ const BrotliEncoderPreparedDictionary* dictionary) {
+ /* First field of dictionary structs */
+ const BrotliEncoderPreparedDictionary* dict = dictionary;
+ uint32_t magic = *((const uint32_t*)dict);
+ SharedEncoderDictionary* current = NULL;
+ if (magic == kManagedDictionaryMagic) {
+ /* Unwrap managed dictionary. */
+ ManagedDictionary* managed_dictionary = (ManagedDictionary*)dict;
+ magic = *managed_dictionary->dictionary;
+ dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary;
+ }
+ current = &state->params.dictionary;
+ if (magic == kPreparedDictionaryMagic) {
+ const PreparedDictionary* prepared = (const PreparedDictionary*)dict;
+ if (!AttachPreparedDictionary(&current->compound, prepared)) {
+ return BROTLI_FALSE;
+ }
+ } else if (magic == kSharedDictionaryMagic) {
+ const SharedEncoderDictionary* attached =
+ (const SharedEncoderDictionary*)dict;
+ BROTLI_BOOL was_default = !current->contextual.context_based &&
+ current->contextual.num_dictionaries == 1 &&
+ current->contextual.dict[0]->hash_table_words ==
+ kStaticDictionaryHashWords &&
+ current->contextual.dict[0]->hash_table_lengths ==
+ kStaticDictionaryHashLengths;
+ BROTLI_BOOL new_default = !attached->contextual.context_based &&
+ attached->contextual.num_dictionaries == 1 &&
+ attached->contextual.dict[0]->hash_table_words ==
+ kStaticDictionaryHashWords &&
+ attached->contextual.dict[0]->hash_table_lengths ==
+ kStaticDictionaryHashLengths;
+ size_t i;
+ if (state->is_initialized_) return BROTLI_FALSE;
+ current->max_quality =
+ BROTLI_MIN(int, current->max_quality, attached->max_quality);
+ for (i = 0; i < attached->compound.num_chunks; i++) {
+ if (!AttachPreparedDictionary(&current->compound,
+ attached->compound.chunks[i])) {
+ return BROTLI_FALSE;
+ }
+ }
+ if (!new_default) {
+ if (!was_default) return BROTLI_FALSE;
+ /* Copy by value, but then set num_instances_ to 0 because their memory
+ is managed by attached, not by current */
+ current->contextual = attached->contextual;
+ current->contextual.num_instances_ = 0;
+ }
+ } else {
+ return BROTLI_FALSE;
+ }
+ return BROTLI_TRUE;
+}
+
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif
diff --git a/c/enc/encoder_dict.c b/c/enc/encoder_dict.c
index c9e963b..098727c 100644
--- a/c/enc/encoder_dict.c
+++ b/c/enc/encoder_dict.c
@@ -6,16 +6,43 @@
#include "./encoder_dict.h"
+#include <stdlib.h> /* malloc, free */
+
#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include "../common/shared_dictionary_internal.h"
#include "../common/transform.h"
+#include "./compound_dictionary.h"
#include "./dictionary_hash.h"
+#include "./memory.h"
+#include "./quality.h"
#include "./hash.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
-void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
+#define NUM_HASH_BITS 15u
+#define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS)
+
+static void BrotliTrieInit(BrotliTrie* trie) {
+ trie->pool_capacity = 0;
+ trie->pool_size = 0;
+ trie->pool = 0;
+
+ /* Set up the root node */
+ trie->root.single = 0;
+ trie->root.len_ = 0;
+ trie->root.idx_ = 0;
+ trie->root.sub = 0;
+}
+
+static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) {
+ BrotliFree(m, trie->pool);
+}
+
+/* Initializes to RFC 7932 static dictionary / transforms. */
+static void InitEncoderDictionary(BrotliEncoderDictionary* dict) {
dict->words = BrotliGetDictionary();
dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms;
@@ -26,6 +53,574 @@ void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
dict->cutoffTransformsCount = kCutoffTransformsCount;
dict->cutoffTransforms = kCutoffTransforms;
+
+ dict->parent = 0;
+
+ dict->hash_table_data_words_ = 0;
+ dict->hash_table_data_lengths_ = 0;
+ dict->buckets_alloc_size_ = 0;
+ dict->buckets_data_ = 0;
+ dict->dict_words_alloc_size_ = 0;
+ dict->dict_words_data_ = 0;
+ dict->words_instance_ = 0;
+ dict->has_words_heavy = BROTLI_FALSE;
+ BrotliTrieInit(&dict->trie);
+}
+
+static void BrotliDestroyEncoderDictionary(MemoryManager* m,
+ BrotliEncoderDictionary* dict) {
+ BrotliFree(m, dict->hash_table_data_words_);
+ BrotliFree(m, dict->hash_table_data_lengths_);
+ BrotliFree(m, dict->buckets_data_);
+ BrotliFree(m, dict->dict_words_data_);
+ BrotliFree(m, dict->words_instance_);
+ BrotliTrieFree(m, &dict->trie);
+}
+
+/* Word length must be at least 4 bytes */
+static uint32_t Hash(const uint8_t* data, int bits) {
+ uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+ /* The higher bits contain more mixture from the multiplication,
+ so we take our results from there. */
+ return h >> (32 - bits);
+}
+
+/* Theoretical max possible word size after transform */
+#define kTransformedBufferSize \
+ (256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH)
+
+/* To be safe buffer must have at least kTransformedBufferSize */
+static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform,
+ const BrotliTransforms* transforms,
+ const BrotliEncoderDictionary* dict,
+ uint8_t* buffer, size_t* size) {
+ const uint8_t* dict_word = &dict->words->data[
+ dict->words->offsets_by_length[len] + (uint32_t)len * word_idx];
+ *size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len,
+ transforms, transform);
+}
+
+static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) {
+ DictWord result;
+ result.len = len;
+ result.transform = transform;
+ result.idx = idx;
+ return result;
+}
+
+static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie,
+ BrotliTrieNode** keep) {
+ uint32_t result;
+ uint32_t keep_index = 0;
+ if (keep && *keep != &trie->root) {
+ /* Optional node to keep, since address may change after re-allocating */
+ keep_index = (uint32_t)(*keep - trie->pool);
+ }
+ if (trie->pool_size == 0) {
+ /* Have a dummy node in the front. We do not want the result to be 0, it
+ must be at least 1, 0 represents "null pointer" */
+ trie->pool_size = 1;
+ }
+ BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity,
+ trie->pool_size + num);
+ if (BROTLI_IS_OOM(m)) return 0;
+ /* Init the new nodes to empty */
+ memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num);
+ result = (uint32_t)trie->pool_size;
+ trie->pool_size += num;
+ if (keep && *keep != &trie->root) {
+ *keep = trie->pool + keep_index;
+ }
+ return result;
+}
+
+/**
+ * len and idx: payload for last node
+ * word, size: the string
+ * index: position in the string
+ */
+static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len,
+ uint32_t idx, const uint8_t* word, size_t size, int index,
+ BrotliTrieNode* node, BrotliTrie* trie) {
+ BrotliTrieNode* child = 0;
+ uint8_t c;
+ if ((size_t)index == size) {
+ if (!node->len_ || idx < node->idx_) {
+ node->len_ = len;
+ node->idx_ = idx;
+ }
+ return BROTLI_TRUE;
+ }
+ c = word[index];
+ if (node->single && c != node->c) {
+ BrotliTrieNode old = trie->pool[node->sub];
+ uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node);
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+ node->single = 0;
+ node->sub = new_nodes;
+ trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16;
+ trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] =
+ old;
+ }
+ if (!node->sub) {
+ uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node);
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+ node->single = 1;
+ node->c = c;
+ node->sub = new_node;
+ }
+ if (node->single) {
+ child = &trie->pool[node->sub];
+ } else {
+ if (!trie->pool[node->sub + (c >> 4)].sub) {
+ uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node);
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+ trie->pool[node->sub + (c >> 4)].sub = new_nodes;
+ }
+ child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)];
+ }
+ return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie);
+}
+
+static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx,
+ const uint8_t* word, size_t size, BrotliTrie* trie) {
+ return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie);
+}
+
+const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
+ const BrotliTrieNode* node, uint8_t c) {
+ BrotliTrieNode* temp_node;
+ if (node->single) {
+ if (node->c == c) return &trie->pool[node->sub];
+ return 0;
+ }
+ if (!node->sub) return 0;
+ temp_node = &trie->pool[node->sub + (c >> 4)];
+ if (!temp_node->sub) return 0;
+ return &trie->pool[temp_node->sub + (c & 15)];
+}
+
+static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie,
+ const uint8_t* word, size_t size) {
+ const BrotliTrieNode* node = &trie->root;
+ size_t i;
+ for (i = 0; i < size; i++) {
+ node = BrotliTrieSub(trie, node, word[i]);
+ if (!node) return 0;
+ }
+ return node;
+}
+
+static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m,
+ const BrotliTransforms* transforms,
+ BrotliEncoderDictionary* dict) {
+ uint32_t i;
+ DictWord* dict_words;
+ uint16_t* buckets;
+ DictWord** words_by_hash;
+ size_t* words_by_hash_size;
+ size_t* words_by_hash_capacity;
+ BrotliTrie dedup;
+ uint8_t word[kTransformedBufferSize];
+ size_t word_size;
+ size_t total = 0;
+ uint8_t l;
+ uint16_t idx;
+
+ BrotliTrieInit(&dedup);
+
+ words_by_hash = (DictWord**)BrotliAllocate(m,
+ sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
+ words_by_hash_size = (size_t*)BrotliAllocate(m,
+ sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
+ words_by_hash_capacity = (size_t*)BrotliAllocate(m,
+ sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+ memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
+ memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
+ memset(words_by_hash_capacity, 0,
+ sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
+
+ if (transforms->num_transforms > 0) {
+ for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
+ l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
+ uint16_t n = dict->words->size_bits_by_length[l] ?
+ (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
+ for (idx = 0; idx < n; ++idx) {
+ uint32_t key;
+ /* First transform (usually identity) */
+ TransformedDictionaryWord(idx, l, 0, transforms, dict, word,
+ &word_size);
+ /* Cannot hash words smaller than 4 bytes */
+ if (word_size < 4) {
+ /* Break instead of continue, all next words of this length will have
+ same length after transform */
+ break;
+ }
+ if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) {
+ return BROTLI_FALSE;
+ }
+ key = Hash(word, NUM_HASH_BITS);
+ BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
+ words_by_hash_capacity[key], words_by_hash_size[key],
+ MakeDictWord(l, 0, idx));
+ ++total;
+ }
+ }
+ }
+
+ /* These LUT transforms only supported if no custom transforms. This is
+ ok, we will use the heavy trie instead. */
+ if (transforms == BrotliGetTransforms()) {
+ for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
+ l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
+ uint16_t n = dict->words->size_bits_by_length[l] ?
+ (uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
+ for (idx = 0; idx < n; ++idx) {
+ int k;
+ BROTLI_BOOL is_ascii = BROTLI_TRUE;
+ size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx;
+ const uint8_t* data = &dict->words->data[offset];
+ for (k = 0; k < l; ++k) {
+ if (data[k] >= 128) is_ascii = BROTLI_FALSE;
+ }
+ if (data[0] < 128) {
+ int transform = 9; /* {empty, uppercase first, empty} */
+ uint32_t ix = idx + (uint32_t)transform * n;
+ const BrotliTrieNode* it;
+ TransformedDictionaryWord(idx, l, transform, transforms,
+ dict, word, &word_size);
+ it = BrotliTrieFind(&dedup, word, word_size);
+ if (!it || it->idx_ > ix) {
+ uint32_t key = Hash(word, NUM_HASH_BITS);
+ if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
+ return BROTLI_FALSE;
+ }
+ BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
+ words_by_hash_capacity[key], words_by_hash_size[key],
+ MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx));
+ ++total;
+ }
+ }
+ if (is_ascii) {
+ int transform = 44; /* {empty, uppercase all, empty} */
+ uint32_t ix = idx + (uint32_t)transform * n;
+ const BrotliTrieNode* it;
+ TransformedDictionaryWord(idx, l, transform, transforms,
+ dict, word, &word_size);
+ it = BrotliTrieFind(&dedup, word, word_size);
+ if (!it || it->idx_ > ix) {
+ uint32_t key = Hash(word, NUM_HASH_BITS);
+ if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
+ return BROTLI_FALSE;
+ }
+ BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
+ words_by_hash_capacity[key], words_by_hash_size[key],
+ MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx));
+ ++total;
+ }
+ }
+ }
+ }
+ }
+
+ dict_words = (DictWord*)BrotliAllocate(m,
+ sizeof(*dict->dict_words) * (total + 1));
+ buckets = (uint16_t*)BrotliAllocate(m,
+ sizeof(*dict->buckets) * NUM_HASH_BUCKETS);
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+ dict->dict_words_alloc_size_ = total + 1;
+ dict->dict_words = dict->dict_words_data_ = dict_words;
+ dict->buckets_alloc_size_ = NUM_HASH_BUCKETS;
+ dict->buckets = dict->buckets_data_ = buckets;
+
+ /* Unused; makes offsets start from 1. */
+ dict_words[0] = MakeDictWord(0, 0, 0);
+ total = 1;
+ for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
+ size_t num_words = words_by_hash_size[i];
+ if (num_words > 0) {
+ buckets[i] = (uint16_t)(total);
+ memcpy(&dict_words[total], &words_by_hash[i][0],
+ sizeof(dict_words[0]) * num_words);
+ total += num_words;
+ dict_words[total - 1].len |= 0x80;
+ } else {
+ buckets[i] = 0;
+ }
+ }
+
+ for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
+ BrotliFree(m, words_by_hash[i]);
+ }
+ BrotliFree(m, words_by_hash);
+ BrotliFree(m, words_by_hash_size);
+ BrotliFree(m, words_by_hash_capacity);
+ BrotliTrieFree(m, &dedup);
+
+ return BROTLI_TRUE;
+}
+
+static void BuildDictionaryHashTable(uint16_t* hash_table_words,
+ uint8_t* hash_table_lengths, const BrotliDictionary* dict) {
+ int j, len;
+ /* The order of the loops is such that in case of collision, words with
+ shorter length are preferred, and in case of same length, words with
+ smaller index. There is only a single word per bucket. */
+ /* TODO: consider adding optional user-supplied frequency_map to use
+ for preferred words instead, this can make the encoder better for
+ quality 9 and below without affecting the decoder */
+ memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords));
+ memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths));
+ for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH;
+ len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) {
+ const size_t num_words = dict->size_bits_by_length[len] ?
+ (1u << dict->size_bits_by_length[len]) : 0;
+ for (j = (int)num_words - 1; j >= 0; --j) {
+ size_t offset = dict->offsets_by_length[len] +
+ (size_t)len * (size_t)j;
+ const uint8_t* word = &dict->data[offset];
+ const uint32_t key = Hash(word, 14);
+ int idx = (int)(key << 1) + (len < 8 ? 1 : 0);
+ BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS);
+ hash_table_words[idx] = (uint16_t)j;
+ hash_table_lengths[idx] = (uint8_t)len;
+ }
+ }
+}
+
+static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m,
+ const BrotliTransforms* transforms,
+ BrotliEncoderDictionary* dict) {
+ int i, j, l;
+ for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) {
+ for (l = 0; l < 32; l++) {
+ int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u);
+ for (i = 0; i < num; i++) {
+ uint8_t transformed[kTransformedBufferSize];
+ size_t size;
+ TransformedDictionaryWord(
+ (uint32_t)i, l, j, transforms, dict, transformed, &size);
+ if (size < 4) continue;
+ if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j),
+ transformed, size, &dict->trie)) {
+ return BROTLI_FALSE;
+ }
+ }
+ }
+ }
+ return BROTLI_TRUE;
+}
+
+/* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for
+ the custom transforms, where possible within the limits of the
+ cutoffTransforms encoding. The fast encoder uses this to do fast lookup for
+ transforms that remove the N last characters (OmitLast). */
+static void ComputeCutoffTransforms(
+ const BrotliTransforms* transforms,
+ uint32_t* count, uint64_t* data) {
+ int i;
+ /* The encoding in a 64-bit integer of transform N in the data is: (N << 2) +
+ ((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity
+ transform code must be 0-63, for N=1 the transform code must be 4-67, ...,
+ for N=9 it must be 36-99.
+ TODO: consider a simple flexible uint8_t[10] instead of the uint64_t
+ for the cutoff transforms, so that shared dictionaries can have the
+ OmitLast transforms anywhere without loss. */
+ *count = 0;
+ *data = 0;
+ for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
+ int idx = transforms->cutOffTransforms[i];
+ if (idx == -1) break; /* Not found */
+ if (idx < (i << 2)) break; /* Too small for the encoding */
+ if (idx >= (i << 2) + 64) break; /* Too large for the encoding */
+ (*count)++;
+ *data |= (uint64_t)(((uint64_t)idx -
+ ((uint64_t)i << 2u)) << ((uint64_t)i * 6u));
+ }
+}
+
+static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality,
+ const BrotliTransforms* transforms,
+ BrotliEncoderDictionary* current) {
+ int default_words = current->words == BrotliGetDictionary();
+ int default_transforms = transforms == BrotliGetTransforms();
+
+ if (default_words && default_transforms) {
+ /* hashes are already set to Brotli defaults */
+ return BROTLI_TRUE;
+ }
+
+ current->hash_table_data_words_ = (uint16_t*)BrotliAllocate(
+ m, sizeof(kStaticDictionaryHashWords));
+ current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate(
+ m, sizeof(kStaticDictionaryHashLengths));
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+ current->hash_table_words = current->hash_table_data_words_;
+ current->hash_table_lengths = current->hash_table_data_lengths_;
+
+ BuildDictionaryHashTable(current->hash_table_data_words_,
+ current->hash_table_data_lengths_, current->words);
+
+ ComputeCutoffTransforms(transforms,
+ &current->cutoffTransformsCount, &current->cutoffTransforms);
+
+ /* Only compute the data for slow encoder if the requested quality is high
+ enough to need it */
+ if (quality >= ZOPFLIFICATION_QUALITY) {
+ if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE;
+
+ /* For the built-in Brotli transforms, there is a hard-coded function to
+ handle all transforms, but for custom transforms, we use the following
+ large hammer instead */
+ current->has_words_heavy = !default_transforms;
+ if (current->has_words_heavy) {
+ if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE;
+ }
+ }
+
+ return BROTLI_TRUE;
+}
+
+void BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) {
+ dict->magic = kSharedDictionaryMagic;
+
+ dict->compound.num_chunks = 0;
+ dict->compound.total_size = 0;
+ dict->compound.chunk_offsets[0] = 0;
+ dict->compound.num_prepared_instances_ = 0;
+
+ dict->contextual.context_based = 0;
+ dict->contextual.num_dictionaries = 1;
+ dict->contextual.instances_ = 0;
+ dict->contextual.num_instances_ = 1; /* The instance_ field */
+ dict->contextual.dict[0] = &dict->contextual.instance_;
+ InitEncoderDictionary(&dict->contextual.instance_);
+ dict->contextual.instance_.parent = &dict->contextual;
+
+ dict->max_quality = BROTLI_MAX_QUALITY;
+}
+
+/* TODO: make sure that tooling will warn user if not all the cutoff
+ transforms are available (for low-quality encoder). */
+static BROTLI_BOOL InitCustomSharedEncoderDictionary(
+ MemoryManager* m, const BrotliSharedDictionary* decoded_dict,
+ int quality, SharedEncoderDictionary* dict) {
+ ContextualEncoderDictionary* contextual;
+ CompoundDictionary* compound;
+ BrotliEncoderDictionary* instances;
+ int i;
+ BrotliInitSharedEncoderDictionary(dict);
+
+ contextual = &dict->contextual;
+ compound = &dict->compound;
+
+ for (i = 0; i < (int)decoded_dict->num_prefix; i++) {
+ PreparedDictionary* prepared = CreatePreparedDictionary(m,
+ decoded_dict->prefix[i], decoded_dict->prefix_size[i]);
+ AttachPreparedDictionary(compound, prepared);
+ /* remember for cleanup */
+ compound->prepared_instances_[
+ compound->num_prepared_instances_++] = prepared;
+ }
+
+ dict->max_quality = quality;
+ contextual->context_based = decoded_dict->context_based;
+ if (decoded_dict->context_based) {
+ memcpy(contextual->context_map, decoded_dict->context_map,
+ SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS);
+ }
+
+ contextual->num_dictionaries = decoded_dict->num_dictionaries;
+ contextual->num_instances_ = decoded_dict->num_dictionaries;
+ if (contextual->num_instances_ == 1) {
+ instances = &contextual->instance_;
+ } else {
+ contextual->instances_ = (BrotliEncoderDictionary*)
+ BrotliAllocate(m, sizeof(*contextual->instances_) *
+ contextual->num_instances_);
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+ instances = contextual->instances_;
+ }
+ for (i = 0; i < (int)contextual->num_instances_; i++) {
+ BrotliEncoderDictionary* current = &instances[i];
+ InitEncoderDictionary(current);
+ current->parent = &dict->contextual;
+ if (decoded_dict->words[i] == BrotliGetDictionary()) {
+ current->words = BrotliGetDictionary();
+ } else {
+ current->words_instance_ = (BrotliDictionary*)BrotliAllocate(
+ m, sizeof(BrotliDictionary));
+ if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+ *current->words_instance_ = *decoded_dict->words[i];
+ current->words = current->words_instance_;
+ }
+ current->num_transforms =
+ (uint32_t)decoded_dict->transforms[i]->num_transforms;
+ if (!ComputeDictionary(
+ m, quality, decoded_dict->transforms[i], current)) {
+ return BROTLI_FALSE;
+ }
+
+ contextual->dict[i] = current;
+ }
+
+ return BROTLI_TRUE; /* success */
+}
+
+BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
+ MemoryManager* m, const uint8_t* encoded_dict, size_t size,
+ int quality, SharedEncoderDictionary* dict) {
+ BROTLI_BOOL success = BROTLI_FALSE;
+ BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance(
+ m->alloc_func, m->free_func, m->opaque);
+ if (!decoded_dict) { /* OOM */
+ return BROTLI_FALSE;
+ }
+ success = BrotliSharedDictionaryAttach(
+ decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict);
+ if (success) {
+ success = InitCustomSharedEncoderDictionary(m,
+ decoded_dict, quality, dict);
+ }
+ BrotliSharedDictionaryDestroyInstance(decoded_dict);
+ return success;
+}
+
+void BrotliCleanupSharedEncoderDictionary(MemoryManager* m,
+ SharedEncoderDictionary* dict) {
+ size_t i;
+ for (i = 0; i < dict->compound.num_prepared_instances_; i++) {
+ DestroyPreparedDictionary(m,
+ (PreparedDictionary*)dict->compound.prepared_instances_[i]);
+ }
+ if (dict->contextual.num_instances_ == 1) {
+ BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_);
+ } else if (dict->contextual.num_instances_ > 1) {
+ for (i = 0; i < dict->contextual.num_instances_; i++) {
+ BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]);
+ }
+ BrotliFree(m, dict->contextual.instances_);
+ }
+}
+
+ManagedDictionary* BrotliCreateManagedDictionary(
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+ ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc(
+ sizeof(ManagedDictionary), alloc_func, free_func, opaque);
+ if (result == NULL) return NULL;
+
+ result->magic = kManagedDictionaryMagic;
+ BrotliInitMemoryManager(
+ &result->memory_manager_, alloc_func, free_func, opaque);
+ result->dictionary = NULL;
+
+ return result;
+}
+
+void BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) {
+ if (!dictionary) return;
+ BrotliBootstrapFree(dictionary, &dictionary->memory_manager_);
}
#if defined(__cplusplus) || defined(c_plusplus)
diff --git a/c/enc/encoder_dict.h b/c/enc/encoder_dict.h
index a1c329f..dfeb796 100644
--- a/c/enc/encoder_dict.h
+++ b/c/enc/encoder_dict.h
@@ -9,13 +9,50 @@
#include "../common/dictionary.h"
#include "../common/platform.h"
+#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
+#include "./compound_dictionary.h"
+#include "./memory.h"
#include "./static_dict_lut.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
+/*
+Dictionary hierarchy for Encoder:
+-SharedEncoderDictionary
+--CompoundDictionary
+---PreparedDictionary [up to 15x]
+ = prefix dictionary with precomputed hashes
+--ContextualEncoderDictionary
+---BrotliEncoderDictionary [up to 64x]
+ = for each context, precomputed static dictionary with words + transforms
+
+Dictionary hiearchy from common: similar, but without precomputed hashes
+-BrotliSharedDictionary
+--BrotliDictionary [up to 64x]
+--BrotliTransforms [up to 64x]
+--const uint8_t* prefix [up to 15x]: compound dictionaries
+*/
+
+typedef struct BrotliTrieNode {
+ uint8_t single; /* if 1, sub is a single node for c instead of 256 */
+ uint8_t c;
+ uint8_t len_; /* untransformed length */
+ uint32_t idx_; /* word index + num words * transform index */
+ uint32_t sub; /* index of sub node(s) in the pool */
+} BrotliTrieNode;
+
+typedef struct BrotliTrie {
+ BrotliTrieNode* pool;
+ size_t pool_capacity;
+ size_t pool_size;
+ BrotliTrieNode root;
+} BrotliTrie;
+
+BROTLI_INTERNAL const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
+ const BrotliTrieNode* node, uint8_t c);
/* Dictionary data (words and transforms) for 1 possible context */
typedef struct BrotliEncoderDictionary {
const BrotliDictionary* words;
@@ -32,9 +69,83 @@ typedef struct BrotliEncoderDictionary {
/* from static_dict_lut.h, for slow encoder */
const uint16_t* buckets;
const DictWord* dict_words;
+ /* Heavy version, for use by slow encoder when there are custom transforms.
+ Contains every possible transformed dictionary word in a trie. It encodes
+ about as fast as the non-heavy encoder but consumes a lot of memory and
+ takes time to build. */
+ BrotliTrie trie;
+ BROTLI_BOOL has_words_heavy;
+
+ /* Reference to other dictionaries. */
+ const struct ContextualEncoderDictionary* parent;
+
+ /* Allocated memory, used only when not using the Brotli defaults */
+ uint16_t* hash_table_data_words_;
+ uint8_t* hash_table_data_lengths_;
+ size_t buckets_alloc_size_;
+ uint16_t* buckets_data_;
+ size_t dict_words_alloc_size_;
+ DictWord* dict_words_data_;
+ BrotliDictionary* words_instance_;
} BrotliEncoderDictionary;
-BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict);
+/* Dictionary data for all 64 contexts */
+typedef struct ContextualEncoderDictionary {
+ BROTLI_BOOL context_based;
+ uint8_t num_dictionaries;
+ uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+ const BrotliEncoderDictionary* dict[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
+
+ /* If num_instances_ is 1, instance_ is used, else dynamic allocation with
+ instances_ is used. */
+ size_t num_instances_;
+ BrotliEncoderDictionary instance_;
+ BrotliEncoderDictionary* instances_;
+} ContextualEncoderDictionary;
+
+static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1;
+static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2;
+
+typedef struct SharedEncoderDictionary {
+ /* Magic value to distinguish this struct from PreparedDictionary for
+ certain external usages. */
+ uint32_t magic;
+
+ /* LZ77 prefix, compound dictionary */
+ CompoundDictionary compound;
+
+ /* Custom static dictionary (optionally context-based) */
+ ContextualEncoderDictionary contextual;
+
+ /* The maximum quality the dictionary was computed for */
+ int max_quality;
+} SharedEncoderDictionary;
+
+typedef struct ManagedDictionary {
+ uint32_t magic;
+ MemoryManager memory_manager_;
+ uint32_t* dictionary;
+} ManagedDictionary;
+
+/* Initializes to the brotli built-in dictionary */
+BROTLI_INTERNAL void BrotliInitSharedEncoderDictionary(
+ SharedEncoderDictionary* dict);
+
+/* Initializes to shared dictionary that will be parsed from
+ encoded_dict. Requires that you keep the encoded_dict buffer
+ around, parts of data will point to it. */
+BROTLI_INTERNAL BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
+ MemoryManager* m, const uint8_t* encoded_dict, size_t size,
+ int quality, SharedEncoderDictionary* dict);
+
+BROTLI_INTERNAL void BrotliCleanupSharedEncoderDictionary(
+ MemoryManager* m, SharedEncoderDictionary* dict);
+
+BROTLI_INTERNAL ManagedDictionary* BrotliCreateManagedDictionary(
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+BROTLI_INTERNAL void BrotliDestroyManagedDictionary(
+ ManagedDictionary* dictionary);
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
diff --git a/c/enc/hash.h b/c/enc/hash.h
index 114b621..7a54be2 100644
--- a/c/enc/hash.h
+++ b/c/enc/hash.h
@@ -504,6 +504,206 @@ static BROTLI_INLINE void InitOrStitchToPreviousBlock(
}
}
+/* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
+ to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
+static BROTLI_INLINE void FindCompoundDictionaryMatch(
+ const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
+ const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
+ const size_t cur_ix, const size_t max_length, const size_t distance_offset,
+ const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
+ const uint32_t source_offset = self->source_offset;
+ const uint32_t source_size = self->source_size;
+ const size_t boundary = distance_offset - source_size;
+ const uint32_t hash_bits = self->hash_bits;
+ const uint32_t bucket_bits = self->bucket_bits;
+ const uint32_t slot_bits = self->slot_bits;
+
+ const uint32_t hash_shift = 64u - bucket_bits;
+ const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
+ const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
+
+ const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
+ const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
+ const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
+ const uint8_t* source = (uint8_t*)(&items[source_offset]);
+
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+ score_t best_score = out->score;
+ size_t best_len = out->len;
+ size_t i;
+ const uint64_t h =
+ (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
+ kPreparedDictionaryHashMul64Long;
+ const uint32_t key = (uint32_t)(h >> hash_shift);
+ const uint32_t slot = key & slot_mask;
+ const uint32_t head = heads[key];
+ const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
+ uint32_t item = (head == 0xFFFF) ? 1 : 0;
+ for (i = 0; i < 4; ++i) {
+ const size_t distance = (size_t)distance_cache[i];
+ size_t offset;
+ size_t limit;
+ size_t len;
+ if (distance <= boundary || distance > distance_offset) continue;
+ offset = distance_offset - distance;
+ limit = source_size - offset;
+ limit = limit > max_length ? max_length : limit;
+ len = FindMatchLengthWithLimit(&source[offset], &data[cur_ix_masked],
+ limit);
+ if (len >= 2) {
+ score_t score = BackwardReferenceScoreUsingLastDistance(len);
+ if (best_score < score) {
+ if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
+ if (best_score < score) {
+ best_score = score;
+ if (len > best_len) best_len = len;
+ out->len = len;
+ out->len_code_delta = 0;
+ out->distance = distance;
+ out->score = best_score;
+ }
+ }
+ }
+ }
+ while (item == 0) {
+ size_t offset;
+ size_t distance;
+ size_t limit;
+ item = *chain;
+ chain++;
+ offset = item & 0x7FFFFFFF;
+ item &= 0x80000000;
+ distance = distance_offset - offset;
+ limit = source_size - offset;
+ limit = (limit > max_length) ? max_length : limit;
+ if (distance > max_distance) continue;
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
+ best_len >= limit ||
+ data[cur_ix_masked + best_len] != source[offset + best_len]) {
+ continue;
+ }
+ {
+ const size_t len = FindMatchLengthWithLimit(&source[offset],
+ &data[cur_ix_masked],
+ limit);
+ if (len >= 4) {
+ score_t score = BackwardReferenceScore(len, distance);
+ if (best_score < score) {
+ best_score = score;
+ best_len = len;
+ out->len = best_len;
+ out->len_code_delta = 0;
+ out->distance = distance;
+ out->score = best_score;
+ }
+ }
+ }
+ }
+}
+
+/* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
+ to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
+static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
+ const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
+ const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length,
+ const size_t max_length, const size_t distance_offset,
+ const size_t max_distance, BackwardMatch* matches, size_t match_limit) {
+ const uint32_t source_offset = self->source_offset;
+ const uint32_t source_size = self->source_size;
+ const uint32_t hash_bits = self->hash_bits;
+ const uint32_t bucket_bits = self->bucket_bits;
+ const uint32_t slot_bits = self->slot_bits;
+
+ const uint32_t hash_shift = 64u - bucket_bits;
+ const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
+ const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
+
+ const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
+ const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
+ const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
+ const uint8_t* source = (uint8_t*)(&items[source_offset]);
+
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+ size_t best_len = min_length;
+ const uint64_t h =
+ (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
+ kPreparedDictionaryHashMul64Long;
+ const uint32_t key = (uint32_t)(h >> hash_shift);
+ const uint32_t slot = key & slot_mask;
+ const uint32_t head = heads[key];
+ const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
+ uint32_t item = (head == 0xFFFF) ? 1 : 0;
+ size_t found = 0;
+ while (item == 0) {
+ size_t offset;
+ size_t distance;
+ size_t limit;
+ size_t len;
+ item = *chain;
+ chain++;
+ offset = item & 0x7FFFFFFF;
+ item &= 0x80000000;
+ distance = distance_offset - offset;
+ limit = source_size - offset;
+ limit = (limit > max_length) ? max_length : limit;
+ if (distance > max_distance) continue;
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
+ best_len >= limit ||
+ data[cur_ix_masked + best_len] != source[offset + best_len]) {
+ continue;
+ }
+ len = FindMatchLengthWithLimit(
+ &source[offset], &data[cur_ix_masked], limit);
+ if (len > best_len) {
+ best_len = len;
+ InitBackwardMatch(matches++, distance, len);
+ found++;
+ if (found == match_limit) break;
+ }
+ }
+ return found;
+}
+
+static BROTLI_INLINE void LookupCompoundDictionaryMatch(
+ const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
+ const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
+ const size_t cur_ix, const size_t max_length,
+ const size_t max_ring_buffer_distance, const size_t max_distance,
+ HasherSearchResult* sr) {
+ size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
+ size_t d;
+ for (d = 0; d < addon->num_chunks; ++d) {
+ /* Only one prepared dictionary type is currently supported. */
+ FindCompoundDictionaryMatch(
+ (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
+ distance_cache, cur_ix, max_length,
+ base_offset - addon->chunk_offsets[d], max_distance, sr);
+ }
+}
+
+static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches(
+ const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
+ const size_t ring_buffer_mask, const size_t cur_ix, size_t min_length,
+ const size_t max_length, const size_t max_ring_buffer_distance,
+ const size_t max_distance, BackwardMatch* matches,
+ size_t match_limit) {
+ size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
+ size_t d;
+ size_t total_found = 0;
+ for (d = 0; d < addon->num_chunks; ++d) {
+ /* Only one prepared dictionary type is currently supported. */
+ total_found += FindAllCompoundDictionaryMatches(
+ (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
+ cur_ix, min_length, max_length, base_offset - addon->chunk_offsets[d],
+ max_distance, matches + total_found, match_limit - total_found);
+ if (total_found == match_limit) break;
+ if (total_found > 0) {
+ min_length = BackwardMatchLength(&matches[total_found - 1]);
+ }
+ }
+ return total_found;
+}
+
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif
diff --git a/c/enc/params.h b/c/enc/params.h
index 54a7f00..c4c0642 100644
--- a/c/enc/params.h
+++ b/c/enc/params.h
@@ -40,7 +40,8 @@ typedef struct BrotliEncoderParams {
BROTLI_BOOL large_window;
BrotliHasherParams hasher;
BrotliDistanceParams dist;
- BrotliEncoderDictionary dictionary;
+ /* TODO(eustas): rename to BrotliShared... */
+ SharedEncoderDictionary dictionary;
} BrotliEncoderParams;
#endif /* BROTLI_ENC_PARAMS_H_ */
diff --git a/c/enc/static_dict.c b/c/enc/static_dict.c
index 7299ab7..c0e0ec4 100644
--- a/c/enc/static_dict.c
+++ b/c/enc/static_dict.c
@@ -74,10 +74,25 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
}
}
-BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
+/* Finds matches for a single static dictionary */
+static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor(
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
size_t min_length, size_t max_length, uint32_t* matches) {
BROTLI_BOOL has_found_match = BROTLI_FALSE;
+ if (dictionary->has_words_heavy) {
+ const BrotliTrieNode* node = &dictionary->trie.root;
+ size_t l = 0;
+ while (node && l < max_length) {
+ uint8_t c;
+ if (l >= min_length && node->len_) {
+ AddMatch(node->idx_, l, node->len_, matches);
+ has_found_match = BROTLI_TRUE;
+ }
+ c = data[l++];
+ node = BrotliTrieSub(&dictionary->trie, node, c);
+ }
+ return has_found_match;
+ }
{
size_t offset = dictionary->buckets[Hash(data)];
BROTLI_BOOL end = !offset;
@@ -481,6 +496,45 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
return has_found_match;
}
+/* Finds matches for one or more dictionaries, if multiple are present
+ in the contextual dictionary */
+BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
+ const BrotliEncoderDictionary* dictionary, const uint8_t* data,
+ size_t min_length, size_t max_length, uint32_t* matches) {
+ BROTLI_BOOL has_found_match =
+ BrotliFindAllStaticDictionaryMatchesFor(
+ dictionary, data, min_length, max_length, matches);
+
+ if (!!dictionary->parent && dictionary->parent->num_dictionaries > 1) {
+ uint32_t matches2[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
+ int l;
+ const BrotliEncoderDictionary* dictionary2 = dictionary->parent->dict[0];
+ if (dictionary2 == dictionary) {
+ dictionary2 = dictionary->parent->dict[1];
+ }
+
+ for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
+ matches2[l] = kInvalidMatch;
+ }
+
+ has_found_match |= BrotliFindAllStaticDictionaryMatchesFor(
+ dictionary2, data, min_length, max_length, matches2);
+
+ for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
+ if (matches2[l] != kInvalidMatch) {
+ uint32_t dist = (uint32_t)(matches2[l] >> 5);
+ uint32_t len_code = matches2[l] & 31;
+ uint32_t skipdist = (uint32_t)((uint32_t)(1 << dictionary->words->
+ size_bits_by_length[len_code]) & ~1u) *
+ (uint32_t)dictionary->num_transforms;
+ /* TODO: check for dist overflow */
+ dist += skipdist;
+ AddMatch(dist, (size_t)l, len_code, matches);
+ }
+ }
+ }
+ return has_found_match;
+}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif
diff --git a/c/include/brotli/decode.h b/c/include/brotli/decode.h
index 0f5c8f9..98b3c7b 100644
--- a/c/include/brotli/decode.h
+++ b/c/include/brotli/decode.h
@@ -13,6 +13,7 @@
#define BROTLI_DEC_DECODE_H_
#include <brotli/port.h>
+#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
#if defined(__cplusplus) || defined(c_plusplus)
@@ -85,8 +86,9 @@ typedef enum {
BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_2, -15) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_FORMAT_, DISTANCE, -16) SEPARATOR \
\
- /* -17..-18 codes are reserved */ \
+ /* -17 code is reserved */ \
\
+ BROTLI_ERROR_CODE(_ERROR_, COMPOUND_DICTIONARY, -18) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_, DICTIONARY_NOT_SET, -19) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_, INVALID_ARGUMENTS, -20) SEPARATOR \
\
@@ -155,6 +157,28 @@ BROTLI_DEC_API BROTLI_BOOL BrotliDecoderSetParameter(
BrotliDecoderState* state, BrotliDecoderParameter param, uint32_t value);
/**
+ * Adds LZ77 prefix dictionary, adds or replaces built-in static dictionary and
+ * transforms.
+ *
+ * Attached dictionary ownership is not transferred.
+ * Data provided to this method should be kept accessible until
+ * decoding is finished and decoder instance is destroyed.
+ *
+ * @note Dictionaries could NOT be attached after actual decoding is started.
+ *
+ * @param state decoder instance
+ * @param type dictionary data format
+ * @param data_size length of memory region pointed by @p data
+ * @param data dictionary data in format corresponding to @p type
+ * @returns ::BROTLI_FALSE if dictionary is corrupted,
+ * or dictionary count limit is reached
+ * @returns ::BROTLI_TRUE if dictionary is accepted / attached
+ */
+BROTLI_DEC_API BROTLI_BOOL BrotliDecoderAttachDictionary(
+ BrotliDecoderState* state, BrotliSharedDictionaryType type,
+ size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]);
+
+/**
* Creates an instance of ::BrotliDecoderState and initializes it.
*
* The instance can be used once for decoding and should then be destroyed with
diff --git a/c/include/brotli/encode.h b/c/include/brotli/encode.h
index b2774cb..72668e1 100644
--- a/c/include/brotli/encode.h
+++ b/c/include/brotli/encode.h
@@ -13,6 +13,7 @@
#define BROTLI_ENC_ENCODE_H_
#include <brotli/port.h>
+#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
#if defined(__cplusplus) || defined(c_plusplus)
@@ -269,6 +270,51 @@ BROTLI_ENC_API BrotliEncoderState* BrotliEncoderCreateInstance(
*/
BROTLI_ENC_API void BrotliEncoderDestroyInstance(BrotliEncoderState* state);
+/* Opaque type for pointer to different possible internal structures containing
+ dictionary prepared for the encoder */
+typedef struct BrotliEncoderPreparedDictionaryStruct
+ BrotliEncoderPreparedDictionary;
+
+/**
+ * Prepares a shared dictionary from the given file format for the encoder.
+ *
+ * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
+ * case they are both zero, default memory allocators are used. @p opaque is
+ * passed to @p alloc_func and @p free_func when they are called. @p free_func
+ * has to return without doing anything when asked to free a NULL pointer.
+ *
+ * @param type type of dictionary stored in data
+ * @param data_size size of @p data buffer
+ * @param data pointer to the dictionary data
+ * @param quality the maximum Brotli quality to prepare the dictionary for,
+ * use BROTLI_MAX_QUALITY by default
+ * @param alloc_func custom memory allocation function
+ * @param free_func custom memory free function
+ * @param opaque custom memory manager handle
+ */
+BROTLI_ENC_API BrotliEncoderPreparedDictionary*
+BrotliEncoderPrepareDictionary(BrotliSharedDictionaryType type,
+ size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)],
+ int quality,
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+BROTLI_ENC_API void BrotliEncoderDestroyPreparedDictionary(
+ BrotliEncoderPreparedDictionary* dictionary);
+
+/**
+ * Attaches a prepared dictionary of any type to the encoder. Can be used
+ * multiple times to attach multiple dictionaries. The dictionary type was
+ * determined by BrotliEncoderPrepareDictionary. Multiple raw prefix
+ * dictionaries and/or max 1 serialized dictionary with custom words can be
+ * attached.
+ *
+ * @returns ::BROTLI_FALSE in case of error
+ * @returns ::BROTLI_TRUE otherwise
+ */
+BROTLI_ENC_API BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(
+ BrotliEncoderState* state,
+ const BrotliEncoderPreparedDictionary* dictionary);
+
/**
* Calculates the output size bound for the given @p input_size.
*
diff --git a/c/include/brotli/shared_dictionary.h b/c/include/brotli/shared_dictionary.h
new file mode 100644
index 0000000..ceb6cf1
--- /dev/null
+++ b/c/include/brotli/shared_dictionary.h
@@ -0,0 +1,97 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* (Opaque) Shared Dictionary definition and utilities. */
+
+#ifndef BROTLI_COMMON_SHARED_DICTIONARY_H_
+#define BROTLI_COMMON_SHARED_DICTIONARY_H_
+
+#include <brotli/port.h>
+#include <brotli/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
+#define SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH 31
+#define SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS 64
+#define SHARED_BROTLI_MAX_COMPOUND_DICTS 15
+
+/**
+ * Opaque structure that holds shared dictionary data.
+ *
+ * Allocated and initialized with ::BrotliSharedDictionaryCreateInstance.
+ * Cleaned up and deallocated with ::BrotliSharedDictionaryDestroyInstance.
+ */
+typedef struct BrotliSharedDictionaryStruct BrotliSharedDictionary;
+
+/**
+ * Input data type for ::BrotliSharedDictionaryAttach.
+ */
+typedef enum BrotliSharedDictionaryType {
+ /** Raw LZ77 prefix dictionary. */
+ BROTLI_SHARED_DICTIONARY_RAW = 0,
+ /** Serialized shared dictionary. */
+ BROTLI_SHARED_DICTIONARY_SERIALIZED = 1
+} BrotliSharedDictionaryType;
+
+/**
+ * Creates an instance of ::BrotliSharedDictionary.
+ *
+ * Fresh instance has default word dictionary and transforms
+ * and no LZ77 prefix dictionary.
+ *
+ * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
+ * case they are both zero, default memory allocators are used. @p opaque is
+ * passed to @p alloc_func and @p free_func when they are called. @p free_func
+ * has to return without doing anything when asked to free a NULL pointer.
+ *
+ * @param alloc_func custom memory allocation function
+ * @param free_func custom memory free function
+ * @param opaque custom memory manager handle
+ * @returns @c 0 if instance can not be allocated or initialized
+ * @returns pointer to initialized ::BrotliSharedDictionary otherwise
+ */
+BROTLI_COMMON_API BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+
+/**
+ * Deinitializes and frees ::BrotliSharedDictionary instance.
+ *
+ * @param dict shared dictionary instance to be cleaned up and deallocated
+ */
+BROTLI_COMMON_API void BrotliSharedDictionaryDestroyInstance(
+ BrotliSharedDictionary* dict);
+
+/**
+ * Attaches dictionary to a given instance of ::BrotliSharedDictionary.
+ *
+ * Dictionary to be attached is represented in a serialized format as a region
+ * of memory.
+ *
+ * Provided data it partially referenced by a resulting (compound) dictionary,
+ * and should be kept untouched, while at least one compound dictionary uses it.
+ * This way memory overhead is kept minimal by the cost of additional resource
+ * management.
+ *
+ * @param dict dictionary to extend
+ * @param type type of dictionary to attach
+ * @param data_size size of @p data
+ * @param data serialized dictionary of type @p type, with at least @p data_size
+ * addressable bytes
+ * @returns ::BROTLI_TRUE if provided dictionary is successfully attached
+ * @returns ::BROTLI_FALSE otherwise
+ */
+BROTLI_COMMON_API BROTLI_BOOL BrotliSharedDictionaryAttach(
+ BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
+ size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_COMMON_SHARED_DICTIONARY_H_ */
diff --git a/c/tools/brotli.c b/c/tools/brotli.c
index 77752b2..049371c 100644
--- a/c/tools/brotli.c
+++ b/c/tools/brotli.c
@@ -100,6 +100,7 @@ typedef struct {
BROTLI_BOOL decompress;
BROTLI_BOOL large_window;
const char* output_path;
+ const char* dictionary_path;
const char* suffix;
int not_input_indices[MAX_OPTIONS];
size_t longest_path_len;
@@ -108,6 +109,9 @@ typedef struct {
/* Inner state */
int argc;
char** argv;
+ uint8_t* dictionary;
+ size_t dictionary_size;
+ BrotliEncoderPreparedDictionary* prepared_dictionary;
char* modified_path; /* Storage for path with appended / cut suffix */
int iterator;
int ignore;
@@ -359,6 +363,12 @@ static Command ParseParams(Context* params) {
params->lgwin, BROTLI_MIN_WINDOW_BITS);
return COMMAND_INVALID;
}
+ } else if (c == 'D') {
+ if (params->dictionary_path) {
+ fprintf(stderr, "dictionary path already set\n");
+ return COMMAND_INVALID;
+ }
+ params->dictionary_path = argv[i];
} else if (c == 'S') {
if (suffix_set) {
fprintf(stderr, "suffix already set\n");
@@ -446,7 +456,13 @@ static Command ParseParams(Context* params) {
}
key_len = (size_t)(value - arg);
value++;
- if (strncmp("lgwin", arg, key_len) == 0) {
+ if (strncmp("dictionary", arg, key_len) == 0) {
+ if (params->dictionary_path) {
+ fprintf(stderr, "dictionary path already set\n");
+ return COMMAND_INVALID;
+ }
+ params->dictionary_path = value;
+ } else if (strncmp("lgwin", arg, key_len) == 0) {
if (lgwin_set) {
fprintf(stderr, "lgwin parameter already set\n");
return COMMAND_INVALID;
@@ -575,6 +591,8 @@ static void PrintHelp(const char* name, BROTLI_BOOL error) {
" decodable with regular brotli decoders\n",
BROTLI_MIN_WINDOW_BITS, BROTLI_LARGE_MAX_WINDOW_BITS);
fprintf(media,
+" -D FILE, --dictionary=FILE use FILE as raw (LZ77) dictionary\n");
+ fprintf(media,
" -S SUF, --suffix=SUF output file suffix (default:'%s')\n",
DEFAULT_SUFFIX);
fprintf(media,
@@ -678,6 +696,69 @@ static void CopyStat(const char* input_path, const char* output_path) {
}
}
+/* Result ownership is passed to caller.
+ |*dictionary_size| is set to resulting buffer size. */
+static BROTLI_BOOL ReadDictionary(Context* context, Command command) {
+ static const int kMaxDictionarySize =
+ BROTLI_MAX_DISTANCE - BROTLI_MAX_BACKWARD_LIMIT(24);
+ FILE* f;
+ int64_t file_size_64;
+ uint8_t* buffer;
+ size_t bytes_read;
+
+ if (context->dictionary_path == NULL) return BROTLI_TRUE;
+ f = fopen(context->dictionary_path, "rb");
+ if (f == NULL) {
+ fprintf(stderr, "failed to open dictionary file [%s]: %s\n",
+ PrintablePath(context->dictionary_path), strerror(errno));
+ return BROTLI_FALSE;
+ }
+
+ file_size_64 = FileSize(context->dictionary_path);
+ if (file_size_64 == -1) {
+ fprintf(stderr, "could not get size of dictionary file [%s]",
+ PrintablePath(context->dictionary_path));
+ fclose(f);
+ return BROTLI_FALSE;
+ }
+
+ if (file_size_64 > kMaxDictionarySize) {
+ fprintf(stderr, "dictionary [%s] is larger than maximum allowed: %d\n",
+ PrintablePath(context->dictionary_path), kMaxDictionarySize);
+ fclose(f);
+ return BROTLI_FALSE;
+ }
+ context->dictionary_size = (size_t)file_size_64;
+
+ buffer = (uint8_t*)malloc(context->dictionary_size);
+ if (!buffer) {
+ fprintf(stderr, "could not read dictionary: out of memory\n");
+ fclose(f);
+ return BROTLI_FALSE;
+ }
+ bytes_read = fread(buffer, sizeof(uint8_t), context->dictionary_size, f);
+ if (bytes_read != context->dictionary_size) {
+ free(buffer);
+ fprintf(stderr, "failed to read dictionary [%s]: %s\n",
+ PrintablePath(context->dictionary_path), strerror(errno));
+ fclose(f);
+ return BROTLI_FALSE;
+ }
+ fclose(f);
+ context->dictionary = buffer;
+ if (command == COMMAND_COMPRESS) {
+ context->prepared_dictionary = BrotliEncoderPrepareDictionary(
+ BROTLI_SHARED_DICTIONARY_RAW, context->dictionary_size,
+ context->dictionary, BROTLI_MAX_QUALITY, NULL, NULL, NULL);
+ if (context->prepared_dictionary == NULL) {
+ fprintf(stderr, "failed to prepare dictionary [%s]\n",
+ PrintablePath(context->dictionary_path));
+ return BROTLI_FALSE;
+ }
+ }
+ return BROTLI_TRUE;
+}
+
static BROTLI_BOOL NextFile(Context* context) {
const char* arg;
size_t arg_len;
@@ -933,6 +1014,10 @@ static BROTLI_BOOL DecompressFiles(Context* context) {
fragmentation (new builds decode streams that old builds don't),
it is better from used experience perspective. */
BrotliDecoderSetParameter(s, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1u);
+ if (context->dictionary) {
+ BrotliDecoderAttachDictionary(s, BROTLI_SHARED_DICTIONARY_RAW,
+ context->dictionary_size, context->dictionary);
+ }
is_ok = OpenFiles(context);
if (is_ok && !context->current_input_path &&
!context->force_overwrite && isatty(STDIN_FILENO)) {
@@ -1020,6 +1105,9 @@ static BROTLI_BOOL CompressFiles(Context* context) {
(uint32_t)context->input_file_length : (1u << 30);
BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, size_hint);
}
+ if (context->dictionary) {
+ BrotliEncoderAttachPreparedDictionary(s, context->prepared_dictionary);
+ }
is_ok = OpenFiles(context);
if (is_ok && !context->current_output_path &&
!context->force_overwrite && isatty(STDOUT_FILENO)) {
@@ -1051,6 +1139,7 @@ int main(int argc, char** argv) {
context.decompress = BROTLI_FALSE;
context.large_window = BROTLI_FALSE;
context.output_path = NULL;
+ context.dictionary_path = NULL;
context.suffix = DEFAULT_SUFFIX;
for (i = 0; i < MAX_OPTIONS; ++i) context.not_input_indices[i] = 0;
context.longest_path_len = 1;
@@ -1058,6 +1147,9 @@ int main(int argc, char** argv) {
context.argc = argc;
context.argv = argv;
+ context.dictionary = NULL;
+ context.dictionary_size = 0;
+ context.prepared_dictionary = NULL;
context.modified_path = NULL;
context.iterator = 0;
context.ignore = 0;
@@ -1072,6 +1164,7 @@ int main(int argc, char** argv) {
if (command == COMMAND_COMPRESS || command == COMMAND_DECOMPRESS ||
command == COMMAND_TEST_INTEGRITY) {
+ if (!ReadDictionary(&context, command)) is_ok = BROTLI_FALSE;
if (is_ok) {
size_t modified_path_len =
context.longest_path_len + strlen(context.suffix) + 1;
@@ -1116,6 +1209,8 @@ int main(int argc, char** argv) {
if (context.iterator_error) is_ok = BROTLI_FALSE;
+ BrotliEncoderDestroyPreparedDictionary(context.prepared_dictionary);
+ free(context.dictionary);
free(context.modified_path);
free(context.buffer);
diff --git a/c/tools/brotli.md b/c/tools/brotli.md
index c029869..86253b8 100644
--- a/c/tools/brotli.md
+++ b/c/tools/brotli.md
@@ -84,6 +84,9 @@ OPTIONS
`(2**NUM - 16)`; 0 lets compressor decide over the optimal value; bigger
windows size improve density; decoder might require up to window size
memory to operate
+* `-D FILE`, `--dictionary=FILE`:
+ use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for
+ compression and decompression
* `-S SUF`, `--suffix=SUF`:
output file suffix (default: `.br`)
* `-V`, `--version`:
diff --git a/java/org/brotli/wrapper/dec/BUILD b/java/org/brotli/wrapper/dec/BUILD
index 4fd51d0..6190504 100644
--- a/java/org/brotli/wrapper/dec/BUILD
+++ b/java/org/brotli/wrapper/dec/BUILD
@@ -24,6 +24,7 @@ java_library(
":dec",
"//org/brotli/integration:brotli_jni_test_base",
"//org/brotli/integration:bundle_helper",
+ "//org/brotli/wrapper/enc",
"@maven//:junit_junit",
],
)
@@ -82,3 +83,16 @@ java_test(
test_class = "org.brotli.wrapper.dec.DecoderTest",
runtime_deps = [":test_lib"],
)
+
+java_test(
+ name = "CornerCasesTest",
+ size = "large",
+ data = [
+ ":brotli_jni", # Bazel JNI workaround
+ ],
+ jvm_flags = [
+ "-DBROTLI_JNI_LIBRARY=$(location :brotli_jni)",
+ ],
+ test_class = "org.brotli.wrapper.dec.CornerCasesTest",
+ runtime_deps = [":test_lib"],
+)
diff --git a/java/org/brotli/wrapper/dec/BrotliDecoderChannel.java b/java/org/brotli/wrapper/dec/BrotliDecoderChannel.java
index 6dfe8f2..30f6f1d 100644
--- a/java/org/brotli/wrapper/dec/BrotliDecoderChannel.java
+++ b/java/org/brotli/wrapper/dec/BrotliDecoderChannel.java
@@ -36,6 +36,11 @@ public class BrotliDecoderChannel extends Decoder implements ReadableByteChannel
}
@Override
+ public void attachDictionary(ByteBuffer dictionary) throws IOException {
+ super.attachDictionary(dictionary);
+ }
+
+ @Override
public boolean isOpen() {
synchronized (mutex) {
return !closed;
diff --git a/java/org/brotli/wrapper/dec/BrotliInputStream.java b/java/org/brotli/wrapper/dec/BrotliInputStream.java
index 6e2e6e5..13a8880 100644
--- a/java/org/brotli/wrapper/dec/BrotliInputStream.java
+++ b/java/org/brotli/wrapper/dec/BrotliInputStream.java
@@ -8,6 +8,7 @@ package org.brotli.wrapper.dec;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.ByteBuffer;
import java.nio.channels.Channels;
/**
@@ -34,6 +35,10 @@ public class BrotliInputStream extends InputStream {
this(source, DEFAULT_BUFFER_SIZE);
}
+ public void attachDictionary(ByteBuffer dictionary) throws IOException {
+ decoder.attachDictionary(dictionary);
+ }
+
public void enableEagerOutput() {
decoder.enableEagerOutput();
}
diff --git a/java/org/brotli/wrapper/dec/CornerCasesTest.java b/java/org/brotli/wrapper/dec/CornerCasesTest.java
new file mode 100644
index 0000000..ab02f23
--- /dev/null
+++ b/java/org/brotli/wrapper/dec/CornerCasesTest.java
@@ -0,0 +1,33 @@
+/* Copyright 2021 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+package org.brotli.wrapper.dec;
+
+import static org.junit.Assert.assertEquals;
+
+import org.brotli.integration.BrotliJniTestBase;
+import org.brotli.wrapper.enc.Encoder;
+import java.io.IOException;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link org.brotli.wrapper.enc.Encoder}. */
+@RunWith(JUnit4.class)
+public class CornerCasesTest extends BrotliJniTestBase {
+ @Test
+ public void testPowerOfTwoInput() throws IOException {
+ // 24 == max window bits to ensure ring-buffer size is not greater than input.
+ int len = 1 << 24;
+ byte[] data = new byte[len];
+ for (int i = 0; i < len; ++i) {
+ data[i] = (byte) Integer.bitCount(i);
+ }
+ byte[] encoded = Encoder.compress(data);
+ byte[] decoded = Decoder.decompress(encoded);
+ assertEquals(len, decoded.length);
+ }
+}
diff --git a/java/org/brotli/wrapper/dec/Decoder.java b/java/org/brotli/wrapper/dec/Decoder.java
index 018317d..de69d3a 100644
--- a/java/org/brotli/wrapper/dec/Decoder.java
+++ b/java/org/brotli/wrapper/dec/Decoder.java
@@ -50,6 +50,12 @@ public class Decoder {
throw new IOException(message);
}
+ void attachDictionary(ByteBuffer dictionary) throws IOException {
+ if (!decoder.attachDictionary(dictionary)) {
+ fail("failed to attach dictionary");
+ }
+ }
+
public void enableEagerOutput() {
this.eager = true;
}
diff --git a/java/org/brotli/wrapper/dec/DecoderJNI.java b/java/org/brotli/wrapper/dec/DecoderJNI.java
index 2319b1e..fc5225d 100644
--- a/java/org/brotli/wrapper/dec/DecoderJNI.java
+++ b/java/org/brotli/wrapper/dec/DecoderJNI.java
@@ -17,6 +17,7 @@ public class DecoderJNI {
private static native void nativePush(long[] context, int length);
private static native ByteBuffer nativePull(long[] context);
private static native void nativeDestroy(long[] context);
+ private static native boolean nativeAttachDictionary(long[] context, ByteBuffer dictionary);
public enum Status {
ERROR,
@@ -40,6 +41,19 @@ public class DecoderJNI {
}
}
+ public boolean attachDictionary(ByteBuffer dictionary) {
+ if (!dictionary.isDirect()) {
+ throw new IllegalArgumentException("only direct buffers allowed");
+ }
+ if (context[0] == 0) {
+ throw new IllegalStateException("brotli decoder is already destroyed");
+ }
+ if (!fresh) {
+ throw new IllegalStateException("decoding is already started");
+ }
+ return nativeAttachDictionary(context, dictionary);
+ }
+
public void push(int length) {
if (length < 0) {
throw new IllegalArgumentException("negative block length");
diff --git a/java/org/brotli/wrapper/dec/decoder_jni.cc b/java/org/brotli/wrapper/dec/decoder_jni.cc
index 268a10b..e10ffe3 100644
--- a/java/org/brotli/wrapper/dec/decoder_jni.cc
+++ b/java/org/brotli/wrapper/dec/decoder_jni.cc
@@ -15,6 +15,9 @@ namespace {
typedef struct DecoderHandle {
BrotliDecoderState* state;
+ jobject dictionary_refs[15];
+ size_t dictionary_count;
+
uint8_t* input_start;
size_t input_offset;
size_t input_length;
@@ -54,6 +57,10 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativeCreate(
ok = !!handle;
if (ok) {
+ for (int i = 0; i < 15; ++i) {
+ handle->dictionary_refs[i] = nullptr;
+ }
+ handle->dictionary_count = 0;
handle->input_offset = 0;
handle->input_length = 0;
handle->input_start = nullptr;
@@ -193,10 +200,53 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativeDestroy(
env->GetLongArrayRegion(ctx, 0, 3, context);
DecoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
BrotliDecoderDestroyInstance(handle->state);
+ for (size_t i = 0; i < handle->dictionary_count; ++i) {
+ env->DeleteGlobalRef(handle->dictionary_refs[i]);
+ }
delete[] handle->input_start;
delete handle;
}
+JNIEXPORT jboolean JNICALL
+Java_org_brotli_wrapper_dec_DecoderJNI_nativeAttachDictionary(
+ JNIEnv* env, jobject /*jobj*/, jlongArray ctx, jobject dictionary) {
+ jlong context[3];
+ env->GetLongArrayRegion(ctx, 0, 3, context);
+ DecoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
+ jobject ref = nullptr;
+ uint8_t* address = nullptr;
+ jlong capacity = 0;
+
+ bool ok = true;
+ if (ok && !dictionary) {
+ ok = false;
+ }
+ if (ok && handle->dictionary_count >= 15) {
+ ok = false;
+ }
+ if (ok) {
+ ref = env->NewGlobalRef(dictionary);
+ ok = !!ref;
+ }
+ if (ok) {
+ handle->dictionary_refs[handle->dictionary_count] = ref;
+ handle->dictionary_count++;
+ address = static_cast<uint8_t*>(env->GetDirectBufferAddress(ref));
+ ok = !!address;
+ }
+ if (ok) {
+ capacity = env->GetDirectBufferCapacity(ref);
+ ok = (capacity > 0) && (capacity < (1 << 30));
+ }
+ if (ok) {
+ size_t size = static_cast<size_t>(capacity);
+ ok = !!BrotliDecoderAttachDictionary(handle->state,
+ BROTLI_SHARED_DICTIONARY_RAW, size, address);
+ }
+
+ return static_cast<jboolean>(ok);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/java/org/brotli/wrapper/enc/BUILD b/java/org/brotli/wrapper/enc/BUILD
index 22154d2..0f8d2e1 100644
--- a/java/org/brotli/wrapper/enc/BUILD
+++ b/java/org/brotli/wrapper/enc/BUILD
@@ -18,6 +18,10 @@ java_library(
["*.java"],
exclude = ["*Test*.java"],
),
+ deps = [
+ "//org/brotli/common:shared_dictionary",
+ "//org/brotli/enc:prepared_dictionary",
+ ],
resource_jars = ["//:license"],
)
@@ -27,8 +31,11 @@ java_library(
srcs = glob(["*Test*.java"]),
deps = [
":enc",
+ "//org/brotli/common:shared_dictionary",
+ "//org/brotli/enc:prepared_dictionary",
"//org/brotli/integration:brotli_jni_test_base",
"//org/brotli/integration:bundle_helper",
+ "//org/brotli/wrapper/common",
"//org/brotli/wrapper/dec",
"@maven//:junit_junit",
],
diff --git a/java/org/brotli/wrapper/enc/BrotliEncoderChannel.java b/java/org/brotli/wrapper/enc/BrotliEncoderChannel.java
index 047c356..2ee2ba6 100644
--- a/java/org/brotli/wrapper/enc/BrotliEncoderChannel.java
+++ b/java/org/brotli/wrapper/enc/BrotliEncoderChannel.java
@@ -6,6 +6,7 @@
package org.brotli.wrapper.enc;
+import org.brotli.enc.PreparedDictionary;
import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;
@@ -43,6 +44,11 @@ public class BrotliEncoderChannel extends Encoder implements WritableByteChannel
}
@Override
+ public void attachDictionary(PreparedDictionary dictionary) throws IOException {
+ super.attachDictionary(dictionary);
+ }
+
+ @Override
public boolean isOpen() {
synchronized (mutex) {
return !closed;
diff --git a/java/org/brotli/wrapper/enc/BrotliOutputStream.java b/java/org/brotli/wrapper/enc/BrotliOutputStream.java
index 5bd3957..09cbd5a 100644
--- a/java/org/brotli/wrapper/enc/BrotliOutputStream.java
+++ b/java/org/brotli/wrapper/enc/BrotliOutputStream.java
@@ -6,6 +6,7 @@
package org.brotli.wrapper.enc;
+import org.brotli.enc.PreparedDictionary;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.channels.Channels;
@@ -40,6 +41,10 @@ public class BrotliOutputStream extends OutputStream {
this(destination, new Encoder.Parameters());
}
+ public void attachDictionary(PreparedDictionary dictionary) throws IOException {
+ encoder.attachDictionary(dictionary);
+ }
+
@Override
public void close() throws IOException {
encoder.close();
diff --git a/java/org/brotli/wrapper/enc/Encoder.java b/java/org/brotli/wrapper/enc/Encoder.java
index af579a3..2aa9890 100644
--- a/java/org/brotli/wrapper/enc/Encoder.java
+++ b/java/org/brotli/wrapper/enc/Encoder.java
@@ -6,17 +6,20 @@
package org.brotli.wrapper.enc;
+import org.brotli.enc.PreparedDictionary;
import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.channels.WritableByteChannel;
import java.util.ArrayList;
+import java.util.List;
/**
* Base class for OutputStream / Channel implementations.
*/
public class Encoder {
private final WritableByteChannel destination;
+ private final List<PreparedDictionary> dictionaries;
private final EncoderJNI.Wrapper encoder;
private ByteBuffer buffer;
final ByteBuffer inputBuffer;
@@ -111,6 +114,7 @@ public class Encoder {
if (destination == null) {
throw new NullPointerException("destination can not be null");
}
+ this.dictionaries = new ArrayList<PreparedDictionary>();
this.destination = destination;
this.encoder = new EncoderJNI.Wrapper(inputBufferSize, params.quality, params.lgwin, params.mode);
this.inputBuffer = this.encoder.getInputBuffer();
@@ -125,6 +129,14 @@ public class Encoder {
throw new IOException(message);
}
+ public void attachDictionary(PreparedDictionary dictionary) throws IOException {
+ if (!encoder.attachDictionary(dictionary.getData())) {
+ fail("failed to attach dictionary");
+ }
+ // Reference to native prepared dictionary wrapper should be held till the end of encoding.
+ dictionaries.add(dictionary);
+ }
+
/**
* @param force repeat pushing until all output is consumed
* @return true if all encoder output is consumed
@@ -239,4 +251,15 @@ public class Encoder {
public static byte[] compress(byte[] data) throws IOException {
return compress(data, new Parameters());
}
+
+ /**
+ * Prepares raw or serialized dictionary for being used by encoder.
+ *
+ * @param dictionary raw / serialized dictionary data; MUST be direct
+ * @param sharedDictionaryType dictionary data type
+ */
+ public static PreparedDictionary prepareDictionary(ByteBuffer dictionary,
+ int sharedDictionaryType) {
+ return EncoderJNI.prepareDictionary(dictionary, sharedDictionaryType);
+ }
}
diff --git a/java/org/brotli/wrapper/enc/EncoderJNI.java b/java/org/brotli/wrapper/enc/EncoderJNI.java
index d0c5fac..1cbd7c1 100644
--- a/java/org/brotli/wrapper/enc/EncoderJNI.java
+++ b/java/org/brotli/wrapper/enc/EncoderJNI.java
@@ -6,6 +6,7 @@
package org.brotli.wrapper.enc;
+import org.brotli.enc.PreparedDictionary;
import java.io.IOException;
import java.nio.ByteBuffer;
@@ -17,6 +18,9 @@ class EncoderJNI {
private static native void nativePush(long[] context, int length);
private static native ByteBuffer nativePull(long[] context);
private static native void nativeDestroy(long[] context);
+ private static native boolean nativeAttachDictionary(long[] context, ByteBuffer dictionary);
+ private static native ByteBuffer nativePrepareDictionary(ByteBuffer dictionary, long type);
+ private static native void nativeDestroyDictionary(ByteBuffer dictionary);
enum Operation {
PROCESS,
@@ -24,6 +28,47 @@ class EncoderJNI {
FINISH
}
+ private static class PreparedDictionaryImpl implements PreparedDictionary {
+ private ByteBuffer data;
+
+ private PreparedDictionaryImpl(ByteBuffer data) {
+ this.data = data;
+ }
+
+ @Override
+ public ByteBuffer getData() {
+ return data;
+ }
+
+ @Override
+ protected void finalize() throws Throwable {
+ try {
+ ByteBuffer data = this.data;
+ this.data = null;
+ nativeDestroyDictionary(data);
+ } finally {
+ super.finalize();
+ }
+ }
+ }
+
+ /**
+ * Prepares raw or serialized dictionary for being used by encoder.
+ *
+ * @param dictionary raw / serialized dictionary data; MUST be direct
+ * @param sharedDictionaryType dictionary data type
+ */
+ static PreparedDictionary prepareDictionary(ByteBuffer dictionary, int sharedDictionaryType) {
+ if (!dictionary.isDirect()) {
+ throw new IllegalArgumentException("only direct buffers allowed");
+ }
+ ByteBuffer dictionaryData = nativePrepareDictionary(dictionary, sharedDictionaryType);
+ if (dictionaryData == null) {
+ throw new IllegalStateException("OOM");
+ }
+ return new PreparedDictionaryImpl(dictionaryData);
+ }
+
static class Wrapper {
protected final long[] context = new long[5];
private final ByteBuffer inputBuffer;
@@ -48,6 +93,19 @@ class EncoderJNI {
this.context[4] = 0;
}
+ boolean attachDictionary(ByteBuffer dictionary) {
+ if (!dictionary.isDirect()) {
+ throw new IllegalArgumentException("only direct buffers allowed");
+ }
+ if (context[0] == 0) {
+ throw new IllegalStateException("brotli decoder is already destroyed");
+ }
+ if (!fresh) {
+ throw new IllegalStateException("decoding is already started");
+ }
+ return nativeAttachDictionary(context, dictionary);
+ }
+
void push(Operation op, int length) {
if (length < 0) {
throw new IllegalArgumentException("negative block length");
diff --git a/java/org/brotli/wrapper/enc/UseCompoundDictionaryTest.java b/java/org/brotli/wrapper/enc/UseCompoundDictionaryTest.java
new file mode 100644
index 0000000..cbfd0d9
--- /dev/null
+++ b/java/org/brotli/wrapper/enc/UseCompoundDictionaryTest.java
@@ -0,0 +1,114 @@
+package org.brotli.wrapper.enc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.brotli.common.SharedDictionaryType;
+import org.brotli.enc.PreparedDictionary;
+import org.brotli.enc.PreparedDictionaryGenerator;
+import org.brotli.integration.BrotliJniTestBase;
+import org.brotli.integration.BundleHelper;
+import org.brotli.wrapper.common.BrotliCommon;
+import org.brotli.wrapper.dec.BrotliInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.util.List;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import org.junit.runner.RunWith;
+import org.junit.runners.AllTests;
+
+/** Tests for compression / decompression aided with LZ77 dictionary. */
+@RunWith(AllTests.class)
+public class UseCompoundDictionaryTest extends BrotliJniTestBase {
+
+ static InputStream getBundle() throws IOException {
+ return new FileInputStream(System.getProperty("TEST_BUNDLE"));
+ }
+
+ /** Creates a test suite. */
+ public static TestSuite suite() throws IOException {
+ TestSuite suite = new TestSuite();
+ InputStream bundle = getBundle();
+ try {
+ List<String> entries = BundleHelper.listEntries(bundle);
+ for (String entry : entries) {
+ suite.addTest(new UseCompoundDictionaryTestCase(entry));
+ }
+ } finally {
+ bundle.close();
+ }
+ return suite;
+ }
+
+ /** Test case with a unique name. */
+ static class UseCompoundDictionaryTestCase extends TestCase {
+ final String entryName;
+ UseCompoundDictionaryTestCase(String entryName) {
+ super("UseCompoundDictionaryTest." + entryName);
+ this.entryName = entryName;
+ }
+
+ @Override
+ protected void runTest() throws Throwable {
+ UseCompoundDictionaryTest.run(entryName);
+ }
+ }
+
+ private static PreparedDictionary prepareRawDictionary(String entryName, ByteBuffer data) {
+ if (entryName.endsWith("E.coli.txt")) {
+ // Default prepared dictionary parameters doesn't work well for DNA data.
+ // Should work well with 8-byte hash.
+ return PreparedDictionaryGenerator.generate(data, 17, 3, 64, 5);
+ } else {
+ return Encoder.prepareDictionary(data, SharedDictionaryType.RAW);
+ }
+ }
+
+ private static void run(String entryName) throws Throwable {
+ InputStream bundle = getBundle();
+ byte[] original;
+ try {
+ original = BundleHelper.readEntry(bundle, entryName);
+ } finally {
+ bundle.close();
+ }
+
+ if (original == null) {
+ throw new RuntimeException("Can't read bundle entry: " + entryName);
+ }
+
+ ByteBuffer dictionary = BrotliCommon.makeNative(original);
+ PreparedDictionary preparedDictionary = prepareRawDictionary(entryName, dictionary);
+
+ ByteArrayOutputStream dst = new ByteArrayOutputStream();
+ BrotliOutputStream encoder =
+ new BrotliOutputStream(dst, new Encoder.Parameters().setQuality(9).setWindow(23), 1 << 23);
+ encoder.attachDictionary(preparedDictionary);
+ try {
+ encoder.write(original);
+ } finally {
+ encoder.close();
+ }
+
+ byte[] compressed = dst.toByteArray();
+
+ // Just copy self from LZ77 dictionary -> ultimate compression ratio.
+ assertTrue(compressed.length < 80 + original.length / 65536);
+
+ BrotliInputStream decoder =
+ new BrotliInputStream(new ByteArrayInputStream(compressed), 1 << 23);
+ decoder.attachDictionary(dictionary);
+ try {
+ long originalCrc = BundleHelper.fingerprintStream(new ByteArrayInputStream(original));
+ long crc = BundleHelper.fingerprintStream(decoder);
+ assertEquals(originalCrc, crc);
+ } finally {
+ decoder.close();
+ }
+ }
+} \ No newline at end of file
diff --git a/java/org/brotli/wrapper/enc/encoder_jni.cc b/java/org/brotli/wrapper/enc/encoder_jni.cc
index e69f719..00fbfd8 100644
--- a/java/org/brotli/wrapper/enc/encoder_jni.cc
+++ b/java/org/brotli/wrapper/enc/encoder_jni.cc
@@ -15,6 +15,9 @@ namespace {
typedef struct EncoderHandle {
BrotliEncoderState* state;
+ jobject dictionary_refs[15];
+ size_t dictionary_count;
+
uint8_t* input_start;
size_t input_offset;
size_t input_last;
@@ -53,6 +56,10 @@ Java_org_brotli_wrapper_enc_EncoderJNI_nativeCreate(
ok = !!handle;
if (ok) {
+ for (int i = 0; i < 15; ++i) {
+ handle->dictionary_refs[i] = nullptr;
+ }
+ handle->dictionary_count = 0;
handle->input_offset = 0;
handle->input_last = 0;
handle->input_start = nullptr;
@@ -190,10 +197,90 @@ Java_org_brotli_wrapper_enc_EncoderJNI_nativeDestroy(
env->GetLongArrayRegion(ctx, 0, 2, context);
EncoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
BrotliEncoderDestroyInstance(handle->state);
+ for (size_t i = 0; i < handle->dictionary_count; ++i) {
+ env->DeleteGlobalRef(handle->dictionary_refs[i]);
+ }
delete[] handle->input_start;
delete handle;
}
+JNIEXPORT jboolean JNICALL
+Java_org_brotli_wrapper_enc_EncoderJNI_nativeAttachDictionary(
+ JNIEnv* env, jobject /*jobj*/, jlongArray ctx, jobject dictionary) {
+ jlong context[2];
+ env->GetLongArrayRegion(ctx, 0, 2, context);
+ EncoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
+ jobject ref = nullptr;
+ uint8_t* address = nullptr;
+
+ bool ok = true;
+ if (ok && !dictionary) {
+ ok = false;
+ }
+ if (ok && handle->dictionary_count >= 15) {
+ ok = false;
+ }
+ if (ok) {
+ ref = env->NewGlobalRef(dictionary);
+ ok = !!ref;
+ }
+ if (ok) {
+ handle->dictionary_refs[handle->dictionary_count] = ref;
+ handle->dictionary_count++;
+ address = static_cast<uint8_t*>(env->GetDirectBufferAddress(ref));
+ ok = !!address;
+ }
+ if (ok) {
+ ok = !!BrotliEncoderAttachPreparedDictionary(handle->state,
+ reinterpret_cast<BrotliEncoderPreparedDictionary*>(address));
+ }
+
+ return static_cast<jboolean>(ok);
+}
+
+JNIEXPORT void JNICALL
+Java_org_brotli_wrapper_enc_EncoderJNI_nativeDestroyDictionary(
+ JNIEnv* env, jobject /*jobj*/, jobject dictionary) {
+ if (!dictionary) {
+ return;
+ }
+ uint8_t* address =
+ static_cast<uint8_t*>(env->GetDirectBufferAddress(dictionary));
+ if (!address) {
+ return;
+ }
+ BrotliEncoderDestroyPreparedDictionary(
+ reinterpret_cast<BrotliEncoderPreparedDictionary*>(address));
+}
+
+JNIEXPORT jobject JNICALL
+Java_org_brotli_wrapper_enc_EncoderJNI_nativePrepareDictionary(
+ JNIEnv* env, jobject /*jobj*/, jobject dictionary, jlong type) {
+ if (!dictionary) {
+ return nullptr;
+ }
+ uint8_t* address =
+ static_cast<uint8_t*>(env->GetDirectBufferAddress(dictionary));
+ if (!address) {
+ return nullptr;
+ }
+ jlong capacity = env->GetDirectBufferCapacity(dictionary);
+ if ((capacity <= 0) || (capacity >= (1 << 30))) {
+ return nullptr;
+ }
+ BrotliSharedDictionaryType dictionary_type =
+ static_cast<BrotliSharedDictionaryType>(type);
+ size_t size = static_cast<size_t>(capacity);
+ BrotliEncoderPreparedDictionary* prepared_dictionary =
+ BrotliEncoderPrepareDictionary(dictionary_type, size, address,
+ BROTLI_MAX_QUALITY, nullptr, nullptr, nullptr);
+ if (!prepared_dictionary) {
+ return nullptr;
+ }
+ /* Size is 4 - just enough to check magic bytes. */
+ return env->NewDirectByteBuffer(prepared_dictionary, 4);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/scripts/sources.lst b/scripts/sources.lst
index 19a6d00..18d3867 100644
--- a/scripts/sources.lst
+++ b/scripts/sources.lst
@@ -9,6 +9,7 @@ BROTLI_COMMON_C = \
c/common/context.c \
c/common/dictionary.c \
c/common/platform.c \
+ c/common/shared_dictionary.c \
c/common/transform.c
BROTLI_COMMON_H = \
@@ -16,6 +17,7 @@ BROTLI_COMMON_H = \
c/common/context.h \
c/common/dictionary.h \
c/common/platform.h \
+ c/common/shared_dictionary_internal.h \
c/common/transform.h \
c/common/version.h
@@ -39,6 +41,7 @@ BROTLI_ENC_C = \
c/enc/brotli_bit_stream.c \
c/enc/cluster.c \
c/enc/command.c \
+ c/enc/compound_dictionary.c \
c/enc/compress_fragment.c \
c/enc/compress_fragment_two_pass.c \
c/enc/dictionary_hash.c \
@@ -66,6 +69,7 @@ BROTLI_ENC_H = \
c/enc/cluster.h \
c/enc/cluster_inc.h \
c/enc/command.h \
+ c/enc/compound_dictionary.h \
c/enc/compress_fragment.h \
c/enc/compress_fragment_two_pass.h \
c/enc/dictionary_hash.h \
@@ -101,4 +105,5 @@ BROTLI_INCLUDE = \
c/include/brotli/decode.h \
c/include/brotli/encode.h \
c/include/brotli/port.h \
+ c/include/brotli/shared_dictionary.h \
c/include/brotli/types.h
diff --git a/setup.py b/setup.py
index 25626ec..3afb035 100644
--- a/setup.py
+++ b/setup.py
@@ -185,6 +185,7 @@ EXT_MODULES = [
'c/common/context.c',
'c/common/dictionary.c',
'c/common/platform.c',
+ 'c/common/shared_dictionary.c',
'c/common/transform.c',
'c/dec/bit_reader.c',
'c/dec/decode.c',
@@ -197,6 +198,7 @@ EXT_MODULES = [
'c/enc/brotli_bit_stream.c',
'c/enc/cluster.c',
'c/enc/command.c',
+ 'c/enc/compound_dictionary.c',
'c/enc/compress_fragment.c',
'c/enc/compress_fragment_two_pass.c',
'c/enc/dictionary_hash.c',
@@ -216,6 +218,7 @@ EXT_MODULES = [
'c/common/context.h',
'c/common/dictionary.h',
'c/common/platform.h',
+ 'c/common/shared_dictionary_internal.h',
'c/common/transform.h',
'c/common/version.h',
'c/dec/bit_reader.h',
@@ -234,6 +237,7 @@ EXT_MODULES = [
'c/enc/cluster.h',
'c/enc/cluster_inc.h',
'c/enc/command.h',
+ 'c/enc/compound_dictionary.h',
'c/enc/compress_fragment.h',
'c/enc/compress_fragment_two_pass.h',
'c/enc/dictionary_hash.h',