diff options
author | Eugene Kliuchnikov <eustas@google.com> | 2018-04-13 11:44:34 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-04-13 11:44:34 +0200 |
commit | 68db5c0272d46f316efab575b67dae5b9ce4378d (patch) | |
tree | 836070b9443dca7035b82bbed145ac8dc08fc006 | |
parent | c6333e1e79fb62ea088443f192293f964409b04e (diff) | |
download | brotli-68db5c0272d46f316efab575b67dae5b9ce4378d.zip brotli-68db5c0272d46f316efab575b67dae5b9ce4378d.tar.gz brotli-68db5c0272d46f316efab575b67dae5b9ce4378d.tar.bz2 |
Update (#660)
* Update
* improve q=1 compression on small files
* fix "left shift before promotion"
* fix osx Travis builds
-rw-r--r-- | .travis.yml | 6 | ||||
-rw-r--r-- | c/enc/compress_fragment_two_pass.c | 146 | ||||
-rw-r--r-- | c/enc/encode.c | 3 | ||||
-rw-r--r-- | c/tools/brotli.c | 2 | ||||
-rwxr-xr-x | scripts/.travis.sh | 2 |
5 files changed, 98 insertions, 61 deletions
diff --git a/.travis.yml b/.travis.yml index 78495d0..6ec786c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -117,8 +117,10 @@ matrix: - os: osx env: BUILD_SYSTEM=cmake C_COMPILER=gcc-6 CXX_COMPILER=g++-6 - os: osx - osx_image: beta-xcode6.2 - env: BUILD_SYSTEM=cmake C_COMPILER=gcc-4.4 CXX_COMPILER=g++-4.4 + env: BUILD_SYSTEM=cmake C_COMPILER=gcc-4.9 CXX_COMPILER=g++-4.9 + - os: osx + osx_image: xcode9.3 + env: BUILD_SYSTEM=cmake ### ## Python 2.7 OS X build (using the system /usr/bin/python) diff --git a/c/enc/compress_fragment_two_pass.c b/c/enc/compress_fragment_two_pass.c index b8bd6e8..f8a5606 100644 --- a/c/enc/compress_fragment_two_pass.c +++ b/c/enc/compress_fragment_two_pass.c @@ -39,26 +39,29 @@ extern "C" { * The number has been tuned heuristically against compression benchmarks. */ static const uint32_t kHashMul32 = 0x1E35A7BD; -static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) { - const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 16) * kHashMul32; +static BROTLI_INLINE uint32_t Hash(const uint8_t* p, + size_t shift, size_t length) { + const uint64_t h = + (BROTLI_UNALIGNED_LOAD64LE(p) << ((8 - length) * 8)) * kHashMul32; return (uint32_t)(h >> shift); } -static BROTLI_INLINE uint32_t HashBytesAtOffset( - uint64_t v, int offset, size_t shift) { - BROTLI_DCHECK(offset >= 0); - BROTLI_DCHECK(offset <= 2); +static BROTLI_INLINE uint32_t HashBytesAtOffset(uint64_t v, size_t offset, + size_t shift, size_t length) { + BROTLI_DCHECK(offset <= 8 - length); { - const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32; + const uint64_t h = ((v >> (8 * offset)) << ((8 - length) * 8)) * kHashMul32; return (uint32_t)(h >> shift); } } -static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) { - return TO_BROTLI_BOOL( - BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) && - p1[4] == p2[4] && - p1[5] == p2[5]); +static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2, + size_t length) { + if (BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2)) { + if (length == 4) return BROTLI_TRUE; + return TO_BROTLI_BOOL(p1[4] == p2[4] && p1[5] == p2[5]); + } + return BROTLI_FALSE; } /* Builds a command and distance prefix code (each 64 symbols) into "depth" and @@ -235,7 +238,8 @@ static void BrotliStoreMetaBlockHeader( static BROTLI_INLINE void CreateCommands(const uint8_t* input, size_t block_size, size_t input_size, const uint8_t* base_ip, int* table, - size_t table_bits, uint8_t** literals, uint32_t** commands) { + size_t table_bits, size_t min_match, + uint8_t** literals, uint32_t** commands) { /* "ip" is the input pointer. */ const uint8_t* ip = input; const size_t shift = 64u - table_bits; @@ -247,19 +251,18 @@ static BROTLI_INLINE void CreateCommands(const uint8_t* input, int last_distance = -1; const size_t kInputMarginBytes = BROTLI_WINDOW_GAP; - const size_t kMinMatchLen = 6; if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) { /* For the last block, we need to keep a 16 bytes margin so that we can be sure that all distances are at most window size - 16. For all other blocks, we only need to keep a margin of 5 bytes so that we don't go over the block size with a copy. */ - const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen, + const size_t len_limit = BROTLI_MIN(size_t, block_size - min_match, input_size - kInputMarginBytes); const uint8_t* ip_limit = input + len_limit; uint32_t next_hash; - for (next_hash = Hash(++ip, shift); ; ) { + for (next_hash = Hash(++ip, shift, min_match); ; ) { /* Step 1: Scan forward in the input looking for a 6-byte-long match. If we get close to exhausting the input then goto emit_remainder. @@ -286,14 +289,14 @@ trawl: uint32_t hash = next_hash; uint32_t bytes_between_hash_lookups = skip++ >> 5; ip = next_ip; - BROTLI_DCHECK(hash == Hash(ip, shift)); + BROTLI_DCHECK(hash == Hash(ip, shift, min_match)); next_ip = ip + bytes_between_hash_lookups; if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) { goto emit_remainder; } - next_hash = Hash(next_ip, shift); + next_hash = Hash(next_ip, shift, min_match); candidate = ip - last_distance; - if (IsMatch(ip, candidate)) { + if (IsMatch(ip, candidate, min_match)) { if (BROTLI_PREDICT_TRUE(candidate < ip)) { table[hash] = (int)(ip - base_ip); break; @@ -304,7 +307,7 @@ trawl: BROTLI_DCHECK(candidate < ip); table[hash] = (int)(ip - base_ip); - } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate))); + } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate, min_match))); /* Check copy distance. If candidate is not feasible, continue search. Checking is done outside of hot loop to reduce overhead. */ @@ -319,8 +322,9 @@ trawl: /* We have a 6-byte match at ip, and we need to emit bytes in [next_emit, ip). */ const uint8_t* base = ip; - size_t matched = 6 + FindMatchLengthWithLimit( - candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6); + size_t matched = min_match + FindMatchLengthWithLimit( + candidate + min_match, ip + min_match, + (size_t)(ip_end - ip) - min_match); int distance = (int)(base - candidate); /* > 0 */ int insert = (int)(base - next_emit); ip += matched; @@ -345,32 +349,47 @@ trawl: /* We could immediately start working at ip now, but to improve compression we first update "table" with the hashes of some positions within the last copy. */ - uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5); - uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); + uint64_t input_bytes; uint32_t cur_hash; - table[prev_hash] = (int)(ip - base_ip - 5); - prev_hash = HashBytesAtOffset(input_bytes, 1, shift); - table[prev_hash] = (int)(ip - base_ip - 4); - prev_hash = HashBytesAtOffset(input_bytes, 2, shift); - table[prev_hash] = (int)(ip - base_ip - 3); - input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2); - cur_hash = HashBytesAtOffset(input_bytes, 2, shift); - prev_hash = HashBytesAtOffset(input_bytes, 0, shift); - table[prev_hash] = (int)(ip - base_ip - 2); - prev_hash = HashBytesAtOffset(input_bytes, 1, shift); - table[prev_hash] = (int)(ip - base_ip - 1); + uint32_t prev_hash; + if (min_match == 4) { + input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3); + cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match); + prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 3); + prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 2); + prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 1); + } else { + input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5); + prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 5); + prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 4); + prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 3); + input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2); + cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match); + prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 2); + prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 1); + } candidate = base_ip + table[cur_hash]; table[cur_hash] = (int)(ip - base_ip); } } - while (ip - candidate <= MAX_DISTANCE && IsMatch(ip, candidate)) { + while (ip - candidate <= MAX_DISTANCE && + IsMatch(ip, candidate, min_match)) { /* We have a 6-byte match at ip, and no need to emit any literal bytes prior to ip. */ const uint8_t* base = ip; - size_t matched = 6 + FindMatchLengthWithLimit( - candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6); + size_t matched = min_match + FindMatchLengthWithLimit( + candidate + min_match, ip + min_match, + (size_t)(ip_end - ip) - min_match); ip += matched; last_distance = (int)(base - candidate); /* > 0 */ BROTLI_DCHECK(0 == memcmp(base, candidate, matched)); @@ -385,27 +404,40 @@ trawl: /* We could immediately start working at ip now, but to improve compression we first update "table" with the hashes of some positions within the last copy. */ - uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5); - uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); + uint64_t input_bytes; uint32_t cur_hash; - table[prev_hash] = (int)(ip - base_ip - 5); - prev_hash = HashBytesAtOffset(input_bytes, 1, shift); - table[prev_hash] = (int)(ip - base_ip - 4); - prev_hash = HashBytesAtOffset(input_bytes, 2, shift); - table[prev_hash] = (int)(ip - base_ip - 3); - input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2); - cur_hash = HashBytesAtOffset(input_bytes, 2, shift); - prev_hash = HashBytesAtOffset(input_bytes, 0, shift); - table[prev_hash] = (int)(ip - base_ip - 2); - prev_hash = HashBytesAtOffset(input_bytes, 1, shift); - table[prev_hash] = (int)(ip - base_ip - 1); + uint32_t prev_hash; + if (min_match == 4) { + input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3); + cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match); + prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 3); + prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 2); + prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 1); + } else { + input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5); + prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 5); + prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 4); + prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 3); + input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2); + cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match); + prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 2); + prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match); + table[prev_hash] = (int)(ip - base_ip - 1); + } candidate = base_ip + table[cur_hash]; table[cur_hash] = (int)(ip - base_ip); } } - next_hash = Hash(++ip, shift); + next_hash = Hash(++ip, shift, min_match); } } @@ -525,7 +557,8 @@ static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size, static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl( MemoryManager* m, const uint8_t* input, size_t input_size, BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf, - int* table, size_t table_bits, size_t* storage_ix, uint8_t* storage) { + int* table, size_t table_bits, size_t min_match, + size_t* storage_ix, uint8_t* storage) { /* Save the start of the first block for position and distance computations. */ const uint8_t* base_ip = input; @@ -537,8 +570,8 @@ static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl( uint32_t* commands = command_buf; uint8_t* literals = literal_buf; size_t num_literals; - CreateCommands(input, block_size, input_size, base_ip, table, table_bits, - &literals, &commands); + CreateCommands(input, block_size, input_size, base_ip, table, + table_bits, min_match, &literals, &commands); num_literals = (size_t)(literals - literal_buf); if (ShouldCompress(input, block_size, num_literals)) { const size_t num_commands = (size_t)(commands - command_buf); @@ -567,8 +600,9 @@ static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B( \ MemoryManager* m, const uint8_t* input, size_t input_size, \ BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf, \ int* table, size_t* storage_ix, uint8_t* storage) { \ + size_t min_match = (B <= 15) ? 4 : 6; \ BrotliCompressFragmentTwoPassImpl(m, input, input_size, is_last, command_buf,\ - literal_buf, table, B, storage_ix, storage); \ + literal_buf, table, B, min_match, storage_ix, storage); \ } FOR_TABLE_BITS_(BAKE_METHOD_PARAM_) #undef BAKE_METHOD_PARAM_ diff --git a/c/enc/encode.c b/c/enc/encode.c index 4bf0cb4..7b02401 100644 --- a/c/enc/encode.c +++ b/c/enc/encode.c @@ -882,7 +882,8 @@ static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes, Command* last_command = &s->commands_[s->num_commands_ - 1]; const uint8_t* data = s->ringbuffer_.buffer_; const uint32_t mask = s->ringbuffer_.mask_; - uint64_t max_backward_distance = (1u << s->params.lgwin) - BROTLI_WINDOW_GAP; + uint64_t max_backward_distance = + (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP; uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF; uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len; uint64_t max_distance = last_processed_pos < max_backward_distance ? diff --git a/c/tools/brotli.c b/c/tools/brotli.c index a122080..ea4fdac 100644 --- a/c/tools/brotli.c +++ b/c/tools/brotli.c @@ -780,7 +780,7 @@ static BROTLI_BOOL CloseFiles(Context* context, BROTLI_BOOL success) { return is_ok; } -static const size_t kFileBufferSize = 1 << 16; +static const size_t kFileBufferSize = 1 << 19; static void InitializeBuffers(Context* context) { context->available_in = 0; diff --git a/scripts/.travis.sh b/scripts/.travis.sh index 18cd7b0..579856a 100755 --- a/scripts/.travis.sh +++ b/scripts/.travis.sh @@ -11,7 +11,7 @@ case "$1" in case "${CC}" in "gcc-"*) - which ${CC} || brew install homebrew/versions/gcc$(echo "${CC#*-}" | sed 's/\.//') + which ${CC} || brew install $(echo "${CC}" | sed 's/\-/@/') || brew link --overwrite $(echo "${CC}" | sed 's/\-/@/') ;; esac |