aboutsummaryrefslogtreecommitdiff
path: root/c/enc
diff options
context:
space:
mode:
authorEugene Kliuchnikov <eustas@google.com>2018-06-20 15:14:10 +0200
committerGitHub <noreply@github.com>2018-06-20 15:14:10 +0200
commiteb12ec04ebb0e9ebb95bb2f13d1edfb62d7ae51d (patch)
tree16643827f5033fcee3c798ec1c0db9f96b9bfaf6 /c/enc
parent7505290ef9880ae636a57f57caf77a8dac56d283 (diff)
downloadbrotli-eb12ec04ebb0e9ebb95bb2f13d1edfb62d7ae51d.zip
brotli-eb12ec04ebb0e9ebb95bb2f13d1edfb62d7ae51d.tar.gz
brotli-eb12ec04ebb0e9ebb95bb2f13d1edfb62d7ae51d.tar.bz2
Update (#688)
* add rolling-composite-hasher for large-window mode * make API methods explicitly public
Diffstat (limited to 'c/enc')
-rw-r--r--c/enc/backward_references.c15
-rw-r--r--c/enc/hash.h56
-rwxr-xr-xc/enc/hash_composite_inc.h133
-rwxr-xr-xc/enc/hash_rolling_inc.h215
-rw-r--r--c/enc/quality.h18
5 files changed, 432 insertions, 5 deletions
diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c
index 3dd897b..cd023d9 100644
--- a/c/enc/backward_references.c
+++ b/c/enc/backward_references.c
@@ -97,6 +97,21 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
#include "./backward_references_inc.h"
#undef HASHER
+#define HASHER() H35
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H55
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H65
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
#undef PREFIX
#undef EXPORT_FN
diff --git a/c/enc/hash.h b/c/enc/hash.h
index fc7356b..2602490 100644
--- a/c/enc/hash.h
+++ b/c/enc/hash.h
@@ -153,14 +153,14 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
size_t max_length, size_t max_backward, size_t max_distance,
HasherSearchResult* out) {
size_t len;
- size_t dist;
+ size_t word_idx;
size_t offset;
size_t matchlen;
size_t backward;
score_t score;
len = item & 0x1F;
- dist = item >> 5;
- offset = dictionary->words->offsets_by_length[len] + len * dist;
+ word_idx = item >> 5;
+ offset = dictionary->words->offsets_by_length[len] + len * word_idx;
if (len > max_length) {
return BROTLI_FALSE;
}
@@ -174,7 +174,7 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
size_t cut = len - matchlen;
size_t transform_id = (cut << 2) +
(size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
- backward = max_backward + dist + 1 +
+ backward = max_backward + 1 + word_idx +
(transform_id << dictionary->words->size_bits_by_length[len]);
}
if (backward > max_distance) {
@@ -343,11 +343,57 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
#undef BUCKET_BITS
#undef HASHER
+/* fast large window hashers */
+
+#define HASHER() HROLLING_FAST
+#define CHUNKLEN 32
+#define JUMP 4
+#define NUMBUCKETS 16777216
+#define MASK ((NUMBUCKETS * 64) - 1)
+#include "./hash_rolling_inc.h" /* NOLINT(build/include) */
+#undef JUMP
+#undef HASHER
+
+
+#define HASHER() HROLLING
+#define JUMP 1
+#include "./hash_rolling_inc.h" /* NOLINT(build/include) */
+#undef MASK
+#undef NUMBUCKETS
+#undef JUMP
+#undef CHUNKLEN
+#undef HASHER
+
+#define HASHER() H35
+#define HASHER_A H3
+#define HASHER_B HROLLING_FAST
+#include "./hash_composite_inc.h" /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#define HASHER() H55
+#define HASHER_A H54
+#define HASHER_B HROLLING_FAST
+#include "./hash_composite_inc.h" /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#define HASHER() H65
+#define HASHER_A H6
+#define HASHER_B HROLLING
+#include "./hash_composite_inc.h" /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
#undef FN
#undef CAT
#undef EXPAND_CAT
-#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
+#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)\
+ H(35) H(55) H(65)
#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
static BROTLI_INLINE void DestroyHasher(
diff --git a/c/enc/hash_composite_inc.h b/c/enc/hash_composite_inc.h
new file mode 100755
index 0000000..f829a97
--- /dev/null
+++ b/c/enc/hash_composite_inc.h
@@ -0,0 +1,133 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, HASHER_A, HASHER_B */
+
+/* Composite hasher: This hasher allows to combine two other hashers, HASHER_A
+ and HASHER_B. */
+
+#define HashComposite HASHER()
+
+#define FN_A(X) EXPAND_CAT(X, HASHER_A)
+#define FN_B(X) EXPAND_CAT(X, HASHER_B)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) {
+ size_t a = FN_A(HashTypeLength)();
+ size_t b = FN_B(HashTypeLength)();
+ return a > b ? a : b;
+}
+
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
+ size_t a = FN_A(StoreLookahead)();
+ size_t b = FN_B(StoreLookahead)();
+ return a > b ? a : b;
+}
+
+typedef struct HashComposite {
+ HasherHandle ha;
+ HasherHandle hb;
+ const BrotliEncoderParams* params;
+} HashComposite;
+
+static BROTLI_INLINE HashComposite* FN(Self)(HasherHandle handle) {
+ return (HashComposite*)&(GetHasherCommon(handle)[1]);
+}
+
+static void FN(Initialize)(
+ HasherHandle handle, const BrotliEncoderParams* params) {
+ HashComposite* self = FN(Self)(handle);
+ self->ha = 0;
+ self->hb = 0;
+ self->params = params;
+ /* TODO: Initialize of the hashers is defered to Prepare (and params
+ remembered here) because we don't get the one_shot and input_size params
+ here that are needed to know the memory size of them. Instead provide
+ those params to all hashers FN(Initialize) */
+}
+
+static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
+ size_t input_size, const uint8_t* data) {
+ HashComposite* self = FN(Self)(handle);
+ if (!self->ha) {
+ HasherCommon* common_a;
+ HasherCommon* common_b;
+
+ self->ha = handle + sizeof(HasherCommon) + sizeof(HashComposite);
+ common_a = (HasherCommon*)self->ha;
+ common_a->params = self->params->hasher;
+ common_a->is_prepared_ = BROTLI_FALSE;
+ common_a->dict_num_lookups = 0;
+ common_a->dict_num_matches = 0;
+ FN_A(Initialize)(self->ha, self->params);
+
+ self->hb = self->ha + sizeof(HasherCommon) + FN_A(HashMemAllocInBytes)(
+ self->params, one_shot, input_size);
+ common_b = (HasherCommon*)self->hb;
+ common_b->params = self->params->hasher;
+ common_b->is_prepared_ = BROTLI_FALSE;
+ common_b->dict_num_lookups = 0;
+ common_b->dict_num_matches = 0;
+ FN_B(Initialize)(self->hb, self->params);
+ }
+ FN_A(Prepare)(self->ha, one_shot, input_size, data);
+ FN_B(Prepare)(self->hb, one_shot, input_size, data);
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+ const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+ size_t input_size) {
+ return sizeof(HashComposite) + 2 * sizeof(HasherCommon) +
+ FN_A(HashMemAllocInBytes)(params, one_shot, input_size) +
+ FN_B(HashMemAllocInBytes)(params, one_shot, input_size);
+}
+
+static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
+ const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+ HashComposite* self = FN(Self)(handle);
+ FN_A(Store)(self->ha, data, mask, ix);
+ FN_B(Store)(self->hb, data, mask, ix);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
+ const size_t ix_end) {
+ HashComposite* self = FN(Self)(handle);
+ FN_A(StoreRange)(self->ha, data, mask, ix_start, ix_end);
+ FN_B(StoreRange)(self->hb, data, mask, ix_start, ix_end);
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
+ size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+ size_t ring_buffer_mask) {
+ HashComposite* self = FN(Self)(handle);
+ FN_A(StitchToPreviousBlock)(self->ha, num_bytes, position, ringbuffer,
+ ring_buffer_mask);
+ FN_B(StitchToPreviousBlock)(self->hb, num_bytes, position, ringbuffer,
+ ring_buffer_mask);
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+ HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
+ HashComposite* self = FN(Self)(handle);
+ FN_A(PrepareDistanceCache)(self->ha, distance_cache);
+ FN_B(PrepareDistanceCache)(self->hb, distance_cache);
+}
+
+static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
+ const BrotliEncoderDictionary* dictionary,
+ const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+ const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+ const size_t max_length, const size_t max_backward, const size_t gap,
+ const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
+ HashComposite* self = FN(Self)(handle);
+ FN_A(FindLongestMatch)(self->ha, dictionary, data, ring_buffer_mask,
+ distance_cache, cur_ix, max_length, max_backward, gap, max_distance, out);
+ FN_B(FindLongestMatch)(self->hb, dictionary, data, ring_buffer_mask,
+ distance_cache, cur_ix, max_length, max_backward, gap, max_distance, out);
+}
+
+#undef HashComposite
diff --git a/c/enc/hash_rolling_inc.h b/c/enc/hash_rolling_inc.h
new file mode 100755
index 0000000..4d5d14a
--- /dev/null
+++ b/c/enc/hash_rolling_inc.h
@@ -0,0 +1,215 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
+/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
+/* JUMP = skip bytes for speedup */
+
+/* Rolling hash for long distance long string matches. Stores one position
+ per bucket, bucket key is computed over a long region. */
+
+#define HashRolling HASHER()
+
+static const uint32_t FN(kRollingHashMul32) = 69069;
+static const uint32_t FN(kInvalidPos) = 0xffffffff;
+
+/* This hasher uses a longer forward length, but returning a higher value here
+ will hurt compression by the main hasher when combined with a composite
+ hasher. The hasher tests for forward itself instead. */
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* Computes a code from a single byte. A lookup table of 256 values could be
+ used, but simply adding 1 works about as good. */
+static uint32_t FN(HashByte)(uint8_t byte) {
+ return (uint32_t)byte + 1u;
+}
+
+static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
+ uint32_t factor) {
+ return (uint32_t)(factor * state + FN(HashByte)(add));
+}
+
+static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
+ uint8_t rem, uint32_t factor,
+ uint32_t factor_remove) {
+ return (uint32_t)(factor * state +
+ FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
+}
+
+typedef struct HashRolling {
+ uint32_t state;
+ uint32_t* table;
+ size_t next_ix;
+
+ uint32_t chunk_len;
+ uint32_t factor;
+ uint32_t factor_remove;
+} HashRolling;
+
+static BROTLI_INLINE HashRolling* FN(Self)(HasherHandle handle) {
+ return (HashRolling*)&(GetHasherCommon(handle)[1]);
+}
+
+static void FN(Initialize)(
+ HasherHandle handle, const BrotliEncoderParams* params) {
+ HashRolling* self = FN(Self)(handle);
+ size_t i;
+ self->state = 0;
+ self->next_ix = 0;
+
+ self->factor = FN(kRollingHashMul32);
+
+ /* Compute the factor of the oldest byte to remove: factor**steps modulo
+ 0xffffffff (the multiplications rely on 32-bit overflow) */
+ self->factor_remove = 1;
+ for (i = 0; i < CHUNKLEN; i += JUMP) {
+ self->factor_remove *= self->factor;
+ }
+
+ self->table = (uint32_t*)((HasherHandle)self + sizeof(HashRolling));
+ for (i = 0; i < NUMBUCKETS; i++) {
+ self->table[i] = FN(kInvalidPos);
+ }
+
+ BROTLI_UNUSED(params);
+}
+
+static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
+ size_t input_size, const uint8_t* data) {
+ HashRolling* self = FN(Self)(handle);
+ size_t i;
+ /* Too small size, cannot use this hasher. */
+ if (input_size < CHUNKLEN) return;
+ self->state = 0;
+ for (i = 0; i < CHUNKLEN; i += JUMP) {
+ self->state = FN(HashRollingFunctionInitial)(
+ self->state, data[i], self->factor);
+ }
+ BROTLI_UNUSED(one_shot);
+}
+
+static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
+ const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
+ size_t input_size) {
+ return sizeof(HashRolling) + NUMBUCKETS * sizeof(uint32_t);
+ BROTLI_UNUSED(params);
+ BROTLI_UNUSED(one_shot);
+ BROTLI_UNUSED(input_size);
+}
+
+static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
+ const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
+ BROTLI_UNUSED(handle);
+ BROTLI_UNUSED(data);
+ BROTLI_UNUSED(mask);
+ BROTLI_UNUSED(ix);
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
+ const uint8_t* data, const size_t mask, const size_t ix_start,
+ const size_t ix_end) {
+ BROTLI_UNUSED(handle);
+ BROTLI_UNUSED(data);
+ BROTLI_UNUSED(mask);
+ BROTLI_UNUSED(ix_start);
+ BROTLI_UNUSED(ix_end);
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
+ size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+ size_t ring_buffer_mask) {
+ /* In this case we must re-initialize the hasher from scratch from the
+ current position. */
+ HashRolling* self = FN(Self)(handle);
+ size_t position_masked;
+ size_t available = num_bytes;
+ if ((position & (JUMP - 1)) != 0) {
+ size_t diff = JUMP - (position & (JUMP - 1));
+ available = (diff > available) ? 0 : (available - diff);
+ position += diff;
+ }
+ position_masked = position & ring_buffer_mask;
+ /* wrapping around ringbuffer not handled. */
+ if (available > ring_buffer_mask - position_masked) {
+ available = ring_buffer_mask - position_masked;
+ }
+
+ FN(Prepare)(handle, BROTLI_FALSE, available,
+ ringbuffer + (position & ring_buffer_mask));
+ self->next_ix = position;
+ BROTLI_UNUSED(num_bytes);
+}
+
+static BROTLI_INLINE void FN(PrepareDistanceCache)(
+ HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
+ BROTLI_UNUSED(handle);
+ BROTLI_UNUSED(distance_cache);
+}
+
+static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
+ const BrotliEncoderDictionary* dictionary,
+ const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+ const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+ const size_t max_length, const size_t max_backward, const size_t gap,
+ const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
+ HashRolling* self = FN(Self)(handle);
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+ size_t pos = self->next_ix;
+
+ if ((cur_ix & (JUMP - 1)) != 0) return;
+
+ /* Not enough lookahead */
+ if (max_length < CHUNKLEN) return;
+
+ for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
+ uint32_t code = self->state & MASK;
+
+ uint8_t rem = data[pos & ring_buffer_mask];
+ uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
+ size_t found_ix = FN(kInvalidPos);
+
+ self->state = FN(HashRollingFunction)(
+ self->state, add, rem, self->factor, self->factor_remove);
+
+ if (code < NUMBUCKETS) {
+ found_ix = self->table[code];
+ self->table[code] = (uint32_t)pos;
+ if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
+ /* The cast to 32-bit makes backward distances up to 4GB work even
+ if cur_ix is above 4GB, despite using 32-bit values in the table. */
+ size_t backward = (uint32_t)(cur_ix - found_ix);
+ if (backward <= max_backward) {
+ const size_t found_ix_masked = found_ix & ring_buffer_mask;
+ const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
+ &data[cur_ix_masked],
+ max_length);
+ if (len >= 4 && len > out->len) {
+ score_t score = BackwardReferenceScore(len, backward);
+ if (score > out->score) {
+ out->len = len;
+ out->distance = backward;
+ out->score = score;
+ out->len_code_delta = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ self->next_ix = cur_ix + JUMP;
+
+ /* NOTE: this hasher does not search in the dictionary. It is used as
+ backup-hasher, the main hasher already searches in it. */
+ BROTLI_UNUSED(dictionary);
+ BROTLI_UNUSED(distance_cache);
+ BROTLI_UNUSED(gap);
+ BROTLI_UNUSED(max_distance);
+}
+
+#undef HashRolling
diff --git a/c/enc/quality.h b/c/enc/quality.h
index aa7ba0d..5f4d034 100644
--- a/c/enc/quality.h
+++ b/c/enc/quality.h
@@ -142,6 +142,24 @@ static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
hparams->num_last_distances_to_check =
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
}
+
+ if (params->lgwin > 24) {
+ /* Different hashers for large window brotli: not for qualities <= 2,
+ these are too fast for large window. Not for qualities >= 10: their
+ hasher already works well with large window. So the changes are:
+ H3 --> H35: for quality 3.
+ H54 --> H55: for quality 4 with size hint > 1MB
+ H6 --> H65: for qualities 5, 6, 7, 8, 9. */
+ if (hparams->type == 3) {
+ hparams->type = 35;
+ }
+ if (hparams->type == 54) {
+ hparams->type = 55;
+ }
+ if (hparams->type == 6) {
+ hparams->type = 65;
+ }
+ }
}
#endif /* BROTLI_ENC_QUALITY_H_ */