diff options
Diffstat (limited to 'c/enc/static_dict.c')
-rw-r--r-- | c/enc/static_dict.c | 80 |
1 files changed, 43 insertions, 37 deletions
diff --git a/c/enc/static_dict.c b/c/enc/static_dict.c index 36caa61..758ef80 100644 --- a/c/enc/static_dict.c +++ b/c/enc/static_dict.c @@ -8,19 +8,20 @@ #include "../common/dictionary.h" #include "../common/platform.h" +#include "../common/transform.h" +#include "./encoder_dict.h" #include "./find_match_length.h" -#include "./static_dict_lut.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif -static const uint8_t kUppercaseFirst = 10; +/* TODO: use BrotliTransforms.cutOffTransforms instead. */ static const uint8_t kOmitLastNTransforms[10] = { 0, 12, 27, 23, 42, 63, 56, 48, 59, 64, }; -static BROTLI_INLINE uint32_t Hash(const uint8_t *data) { +static BROTLI_INLINE uint32_t Hash(const uint8_t* data) { uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ @@ -79,32 +80,33 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary, } BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( - const BrotliDictionary* dictionary, const uint8_t* data, + const BrotliEncoderDictionary* dictionary, const uint8_t* data, size_t min_length, size_t max_length, uint32_t* matches) { BROTLI_BOOL has_found_match = BROTLI_FALSE; { - size_t offset = kStaticDictionaryBuckets[Hash(data)]; + size_t offset = dictionary->buckets[Hash(data)]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0) { const size_t matchlen = - DictMatchLength(dictionary, data, id, l, max_length); + DictMatchLength(dictionary->words, data, id, l, max_length); const uint8_t* s; size_t minlen; size_t maxlen; size_t len; - /* Transform "" + kIdentity + "" */ + /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */ if (matchlen == l) { AddMatch(id, l, l, matches); has_found_match = BROTLI_TRUE; } - /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */ + /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and + "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */ if (matchlen >= l - 1) { AddMatch(id + 12 * n, l - 1, l, matches); if (l + 2 < max_length && @@ -114,7 +116,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } has_found_match = BROTLI_TRUE; } - /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */ + /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */ minlen = min_length; if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9); maxlen = BROTLI_MIN(size_t, matchlen, l - 2); @@ -126,7 +128,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( continue; } s = &data[l]; - /* Transforms "" + kIdentity + <suffix> */ + /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */ if (s[0] == ' ') { AddMatch(id + n, l + 1, l, matches); if (s[1] == 'a') { @@ -273,12 +275,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } } else { - /* Set is_all_caps=0 for kUppercaseFirst and - is_all_caps=1 otherwise (kUppercaseAll) transform. */ + /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and + is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) + transform. */ const BROTLI_BOOL is_all_caps = - TO_BROTLI_BOOL(w.transform != kUppercaseFirst); + TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); const uint8_t* s; - if (!IsMatch(dictionary, w, data, max_length)) { + if (!IsMatch(dictionary->words, w, data, max_length)) { continue; } /* Transform "" + kUppercase{First,All} + "" */ @@ -323,27 +326,29 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( /* Transforms with prefixes " " and "." */ if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' '); - size_t offset = kStaticDictionaryBuckets[Hash(&data[1])]; + size_t offset = dictionary->buckets[Hash(&data[1])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0) { const uint8_t* s; - if (!IsMatch(dictionary, w, &data[1], max_length - 1)) { + if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { continue; } - /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */ + /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and + "." + BROTLI_TRANSFORM_IDENTITY + "" */ AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); has_found_match = BROTLI_TRUE; if (l + 2 >= max_length) { continue; } - /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix> + /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and + "." + BROTLI_TRANSFORM_IDENTITY + <suffix> */ s = &data[l + 1]; if (s[0] == ' ') { @@ -370,12 +375,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } } else if (is_space) { - /* Set is_all_caps=0 for kUppercaseFirst and - is_all_caps=1 otherwise (kUppercaseAll) transform. */ + /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and + is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) + transform. */ const BROTLI_BOOL is_all_caps = - TO_BROTLI_BOOL(w.transform != kUppercaseFirst); + TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); const uint8_t* s; - if (!IsMatch(dictionary, w, &data[1], max_length - 1)) { + if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { continue; } /* Transforms " " + kUppercase{First,All} + "" */ @@ -411,22 +417,22 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( } } if (max_length >= 6) { - /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */ + /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */ if ((data[1] == ' ' && (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || - (data[0] == 0xc2 && data[1] == 0xa0)) { - size_t offset = kStaticDictionaryBuckets[Hash(&data[2])]; + (data[0] == 0xC2 && data[1] == 0xA0)) { + size_t offset = dictionary->buckets[Hash(&data[2])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0 && - IsMatch(dictionary, w, &data[2], max_length - 2)) { - if (data[0] == 0xc2) { + IsMatch(dictionary->words, w, &data[2], max_length - 2)) { + if (data[0] == 0xC2) { AddMatch(id + 102 * n, l + 2, l, matches); has_found_match = BROTLI_TRUE; } else if (l + 2 < max_length && data[l + 2] == ' ') { @@ -444,17 +450,17 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches( data[3] == 'e' && data[4] == ' ') || (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && data[3] == 'm' && data[4] == '/')) { - size_t offset = kStaticDictionaryBuckets[Hash(&data[5])]; + size_t offset = dictionary->buckets[Hash(&data[5])]; BROTLI_BOOL end = !offset; while (!end) { - DictWord w = kStaticDictionaryWords[offset++]; + DictWord w = dictionary->dict_words[offset++]; const size_t l = w.len & 0x1F; - const size_t n = (size_t)1 << dictionary->size_bits_by_length[l]; + const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; const size_t id = w.idx; end = !!(w.len & 0x80); w.len = (uint8_t)l; if (w.transform == 0 && - IsMatch(dictionary, w, &data[5], max_length - 5)) { + IsMatch(dictionary->words, w, &data[5], max_length - 5)) { AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); has_found_match = BROTLI_TRUE; if (l + 5 < max_length) { |