From 533843e3546cd24c8344eaa899c6b0b681c8d222 Mon Sep 17 00:00:00 2001 From: Eugene Kliuchnikov Date: Fri, 2 Mar 2018 15:49:58 +0100 Subject: Update (#643) Update * make the zopflification aware of `NDIRECT`, `NPOSTFIX` (better compression in `font` mode) * add small and simple decoder tool * fix typo * Java: wrapper: make decoder channel more async-friendly Ramp up version to 1.0.3 / 1.0.3 --- c/common/transform.h | 2 +- c/common/version.h | 4 +- c/enc/backward_references_hq.c | 31 +++++--- c/enc/backward_references_inc.h | 4 +- c/enc/command.h | 32 +++++--- c/enc/encode.c | 37 ++------- .../brotli/wrapper/dec/BrotliDecoderChannel.java | 4 +- research/BUILD | 7 ++ research/brotli_decoder.c | 93 ++++++++++++++++++++++ 9 files changed, 150 insertions(+), 64 deletions(-) create mode 100644 research/brotli_decoder.c diff --git a/c/common/transform.h b/c/common/transform.h index b279c04..456c12d 100755 --- a/c/common/transform.h +++ b/c/common/transform.h @@ -1,4 +1,4 @@ -/* transforms is a part of ABI, nut not API. +/* transforms is a part of ABI, but not API. It means that there are some functions that are supposed to be in "common" library, but header itself is not placed into include/brotli. This way, diff --git a/c/common/version.h b/c/common/version.h index 38946e6..435371c 100644 --- a/c/common/version.h +++ b/c/common/version.h @@ -14,13 +14,13 @@ BrotliEncoderVersion methods. */ /* Semantic version, calculated as (MAJOR << 24) | (MINOR << 12) | PATCH */ -#define BROTLI_VERSION 0x1000002 +#define BROTLI_VERSION 0x1000003 /* This macro is used by build system to produce Libtool-friendly soname. See https://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html */ /* ABI version, calculated as (CURRENT << 24) | (REVISION << 12) | AGE */ -#define BROTLI_ABI_VERSION 0x1002000 +#define BROTLI_ABI_VERSION 0x1003000 #endif /* BROTLI_COMMON_VERSION_H_ */ diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c index f2f9918..62c6ba2 100644 --- a/c/enc/backward_references_hq.c +++ b/c/enc/backward_references_hq.c @@ -18,6 +18,7 @@ #include "./find_match_length.h" #include "./literal_cost.h" #include "./memory.h" +#include "./params.h" #include "./prefix.h" #include "./quality.h" @@ -25,9 +26,7 @@ extern "C" { #endif -#define BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE ( \ - BROTLI_NUM_DISTANCE_SHORT_CODES + (2 * BROTLI_LARGE_MAX_DISTANCE_BITS)) -/* BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE == 74 */ +#define BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE 544 static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */ @@ -76,7 +75,8 @@ static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) { typedef struct ZopfliCostModel { /* The insert and copy length symbols. */ float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS]; - float cost_dist_[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE]; + float* cost_dist_; + uint32_t distance_alphabet_size; /* Cumulative costs of literals per position in the stream. */ float* literal_costs_; float min_cost_cmd_; @@ -84,14 +84,18 @@ typedef struct ZopfliCostModel { } ZopfliCostModel; static void InitZopfliCostModel( - MemoryManager* m, ZopfliCostModel* self, size_t num_bytes) { + MemoryManager* m, ZopfliCostModel* self, const BrotliDistanceParams* dist, + size_t num_bytes) { self->num_bytes_ = num_bytes; self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2); + self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size); + self->distance_alphabet_size = dist->alphabet_size; if (BROTLI_IS_OOM(m)) return; } static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) { BROTLI_FREE(m, self->literal_costs_); + BROTLI_FREE(m, self->cost_dist_); } static void SetCost(const uint32_t* histogram, size_t histogram_size, @@ -135,7 +139,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, size_t last_insert_len) { uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS]; uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS]; - uint32_t histogram_dist[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE]; + uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE]; float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS]; size_t pos = position - last_insert_len; float min_cost_cmd = kInfinity; @@ -167,7 +171,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, cost_literal); SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE, cost_cmd); - SetCost(histogram_dist, BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE, BROTLI_FALSE, + SetCost(histogram_dist, self->distance_alphabet_size, BROTLI_FALSE, self->cost_dist_); for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) { @@ -210,7 +214,7 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self, for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) { cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i); } - for (i = 0; i < BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE; ++i) { + for (i = 0; i < self->distance_alphabet_size; ++i) { cost_dist[i] = (float)FastLog2(20 + (uint32_t)i); } self->min_cost_cmd_ = (float)FastLog2(11); @@ -505,7 +509,9 @@ static size_t UpdateNodes( uint32_t distnumextra; float dist_cost; size_t max_match_len; - PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra); + PrefixEncodeCopyDistance( + dist_code, params->dist.num_direct_distance_codes, + params->dist.distance_postfix_bits, &dist_symbol, &distextra); distnumextra = dist_symbol >> 10; dist_cost = base_cost + (float)distnumextra + ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF); @@ -566,7 +572,6 @@ void BrotliZopfliCreateCommands(const size_t num_bytes, uint32_t offset = nodes[0].u.next; size_t i; size_t gap = 0; - BROTLI_UNUSED(params); for (i = 0; offset != BROTLI_UINT32_MAX; i++) { const ZopfliNode* next = &nodes[pos + offset]; size_t copy_length = ZopfliNodeCopyLength(next); @@ -584,7 +589,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes, BROTLI_MIN(size_t, block_start + pos, max_backward_limit); BROTLI_BOOL is_dictionary = TO_BROTLI_BOOL(distance > max_distance + gap); size_t dist_code = ZopfliNodeDistanceCode(next); - InitCommand(&commands[i], insert_length, + InitCommand(&commands[i], ¶ms->dist, insert_length, copy_length, (int)len_code - (int)copy_length, dist_code); if (!is_dictionary && dist_code > 0) { @@ -663,7 +668,7 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t lz_matches_offset = 0; nodes[0].length = 0; nodes[0].u.cost = 0; - InitZopfliCostModel(m, &model, num_bytes); + InitZopfliCostModel(m, &model, ¶ms->dist, num_bytes); if (BROTLI_IS_OOM(m)) return 0; ZopfliCostModelSetFromLiteralCosts( &model, position, ringbuffer, ringbuffer_mask); @@ -788,7 +793,7 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, orig_num_commands = *num_commands; nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1); if (BROTLI_IS_OOM(m)) return; - InitZopfliCostModel(m, &model, num_bytes); + InitZopfliCostModel(m, &model, ¶ms->dist, num_bytes); if (BROTLI_IS_OOM(m)) return; for (i = 0; i < 2; i++) { BrotliInitZopfliNodes(nodes, num_bytes + 1); diff --git a/c/enc/backward_references_inc.h b/c/enc/backward_references_inc.h index 967545d..38a48d3 100644 --- a/c/enc/backward_references_inc.h +++ b/c/enc/backward_references_inc.h @@ -89,8 +89,8 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)( dist_cache[0] = (int)sr.distance; FN(PrepareDistanceCache)(hasher, dist_cache); } - InitCommand(commands++, insert_length, sr.len, sr.len_code_delta, - distance_code); + InitCommand(commands++, ¶ms->dist, insert_length, + sr.len, sr.len_code_delta, distance_code); } *num_literals += insert_length; insert_length = 0; diff --git a/c/enc/command.h b/c/enc/command.h index 0526815..6075dfe 100644 --- a/c/enc/command.h +++ b/c/enc/command.h @@ -13,6 +13,7 @@ #include "../common/platform.h" #include #include "./fast_log.h" +#include "./params.h" #include "./prefix.h" #if defined(__cplusplus) || defined(c_plusplus) @@ -116,7 +117,8 @@ typedef struct Command { } Command; /* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */ -static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen, +static BROTLI_INLINE void InitCommand(Command* self, + const BrotliDistanceParams* dist, size_t insertlen, size_t copylen, int copylen_code_delta, size_t distance_code) { /* Don't rely on signed int representation, use honest casts. */ uint32_t delta = (uint8_t)((int8_t)copylen_code_delta); @@ -126,7 +128,8 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen, npostfix and ndirect were 0, they are only recomputed later after the clustering if needed. */ PrefixEncodeCopyDistance( - distance_code, 0, 0, &self->dist_prefix_, &self->dist_extra_); + distance_code, dist->num_direct_distance_codes, + dist->distance_postfix_bits, &self->dist_prefix_, &self->dist_extra_); GetLengthCode( insertlen, (size_t)((int)copylen + copylen_code_delta), TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_); @@ -140,21 +143,24 @@ static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) { GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_); } -static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(const Command* self) { - if ((self->dist_prefix_ & 0x3FF) < BROTLI_NUM_DISTANCE_SHORT_CODES) { +static BROTLI_INLINE uint32_t CommandRestoreDistanceCode( + const Command* self, const BrotliDistanceParams* dist) { + if ((self->dist_prefix_ & 0x3FF) < + BROTLI_NUM_DISTANCE_SHORT_CODES + dist->num_direct_distance_codes) { return self->dist_prefix_ & 0x3FF; } else { + uint32_t dcode = self->dist_prefix_ & 0x3FF; uint32_t nbits = self->dist_prefix_ >> 10; uint32_t extra = self->dist_extra_; - /* It is assumed that the distance was first encoded with NPOSTFIX = 0 and - NDIRECT = 0, so the code itself is of this form: - BROTLI_NUM_DISTANCE_SHORT_CODES + 2 * (nbits - 1) + prefix_bit - Therefore, the following expression results in (2 + prefix_bit). */ - uint32_t prefix = (self->dist_prefix_ & 0x3FF) + 4u - - BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits; - /* Subtract 4 for offset (Chapter 4.) and - increase by BROTLI_NUM_DISTANCE_SHORT_CODES - 1 */ - return (prefix << nbits) + extra + BROTLI_NUM_DISTANCE_SHORT_CODES - 4u; + uint32_t postfix_mask = (1U << dist->distance_postfix_bits) - 1U; + uint32_t hcode = (dcode - dist->num_direct_distance_codes - + BROTLI_NUM_DISTANCE_SHORT_CODES) >> + dist->distance_postfix_bits; + uint32_t lcode = (dcode - dist->num_direct_distance_codes - + BROTLI_NUM_DISTANCE_SHORT_CODES) & postfix_mask; + uint32_t offset = ((2U + (hcode & 1U)) << nbits) - 4U; + return ((offset + extra) << dist->distance_postfix_bits) + lcode + + dist->num_direct_distance_codes + BROTLI_NUM_DISTANCE_SHORT_CODES; } } diff --git a/c/enc/encode.c b/c/enc/encode.c index 4fd28d0..563c827 100644 --- a/c/enc/encode.c +++ b/c/enc/encode.c @@ -171,26 +171,6 @@ BROTLI_BOOL BrotliEncoderSetParameter( } } -static void RecomputeDistancePrefixes(Command* cmds, - size_t num_commands, - uint32_t num_direct_distance_codes, - uint32_t distance_postfix_bits) { - size_t i; - if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) { - return; - } - for (i = 0; i < num_commands; ++i) { - Command* cmd = &cmds[i]; - if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { - PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd), - num_direct_distance_codes, - distance_postfix_bits, - &cmd->dist_prefix_, - &cmd->dist_extra_); - } - } -} - /* Wraps 64-bit input position to 32-bit ring-buffer position preserving "not-a-first-lap" feature. */ static uint32_t WrapPosition(uint64_t position) { @@ -587,13 +567,6 @@ static void WriteMetaBlockInternal(MemoryManager* m, BROTLI_DCHECK(*storage_ix <= 14); last_bytes = (uint16_t)((storage[1] << 8) | storage[0]); last_bytes_bits = (uint8_t)(*storage_ix); - if (params->dist.num_direct_distance_codes != 0 || - params->dist.distance_postfix_bits != 0) { - RecomputeDistancePrefixes(commands, - num_commands, - params->dist.num_direct_distance_codes, - params->dist.distance_postfix_bits); - } if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos, bytes, mask, is_last, params, @@ -663,6 +636,8 @@ static void WriteMetaBlockInternal(MemoryManager* m, } static void ChooseDistanceParams(BrotliEncoderParams* params) { + /* NDIRECT, NPOSTFIX must be both zero for qualities + up to MIN_QUALITY_FOR_BLOCK_SPLIT == 3. */ uint32_t num_direct_distance_codes = 0; uint32_t distance_postfix_bits = 0; uint32_t alphabet_size; @@ -782,9 +757,8 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) { memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_)); } -BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func, - brotli_free_func free_func, - void* opaque) { +BrotliEncoderState* BrotliEncoderCreateInstance( + brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { BrotliEncoderState* state = 0; if (!alloc_func && !free_func) { state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState)); @@ -912,7 +886,8 @@ static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes, uint64_t max_distance = last_processed_pos < max_backward_distance ? last_processed_pos : max_backward_distance; uint64_t cmd_dist = (uint64_t)s->dist_cache_[0]; - uint32_t distance_code = CommandRestoreDistanceCode(last_command); + uint32_t distance_code = CommandRestoreDistanceCode(last_command, + &s->params.dist); if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES || distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) { if (cmd_dist <= max_distance) { diff --git a/java/org/brotli/wrapper/dec/BrotliDecoderChannel.java b/java/org/brotli/wrapper/dec/BrotliDecoderChannel.java index c9a752a..6dfe8f2 100644 --- a/java/org/brotli/wrapper/dec/BrotliDecoderChannel.java +++ b/java/org/brotli/wrapper/dec/BrotliDecoderChannel.java @@ -58,8 +58,8 @@ public class BrotliDecoderChannel extends Decoder implements ReadableByteChannel int result = 0; while (dst.hasRemaining()) { int outputSize = decode(); - if (outputSize == -1) { - return result == 0 ? -1 : result; + if (outputSize <= 0) { + return result == 0 ? outputSize : result; } result += consume(dst); } diff --git a/research/BUILD b/research/BUILD index 211b3e7..9da08c2 100755 --- a/research/BUILD +++ b/research/BUILD @@ -35,3 +35,10 @@ cc_binary( ":sieve", ], ) + +cc_binary( + name = "brotli_decoder", + srcs = ["brotli_decoder.c"], + linkstatic = 1, + deps = ["//:brotlidec"], +) diff --git a/research/brotli_decoder.c b/research/brotli_decoder.c new file mode 100644 index 0000000..b1d556d --- /dev/null +++ b/research/brotli_decoder.c @@ -0,0 +1,93 @@ +/* Copyright 2018 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#include +#include +#include + +#include + +#define BUFFER_SIZE (1u << 20) + +typedef struct Context { + FILE* fin; + FILE* fout; + uint8_t* input_buffer; + uint8_t* output_buffer; + BrotliDecoderState* decoder; +} Context; + +void init(Context* ctx) { + ctx->fin = 0; + ctx->fout = 0; + ctx->input_buffer = 0; + ctx->output_buffer = 0; + ctx->decoder = 0; +} + +void cleanup(Context* ctx) { + if (ctx->decoder) BrotliDecoderDestroyInstance(ctx->decoder); + if (ctx->output_buffer) free(ctx->output_buffer); + if (ctx->input_buffer) free(ctx->input_buffer); + if (ctx->fout) fclose(ctx->fout); + if (ctx->fin) fclose(ctx->fin); +} + +void fail(Context* ctx, const char* message) { + fprintf(stderr, "%s\n", message); + exit(1); +} + +int main(int argc, char** argv) { + Context ctx; + BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT; + size_t available_in; + const uint8_t* next_in; + size_t available_out = BUFFER_SIZE; + uint8_t* next_out; + init(&ctx); + + ctx.fin = fdopen(STDIN_FILENO, "rb"); + if (!ctx.fin) fail(&ctx, "can't open input file"); + ctx.fout = fdopen(STDOUT_FILENO, "wb"); + if (!ctx.fout) fail(&ctx, "can't open output file"); + ctx.input_buffer = (uint8_t*)malloc(BUFFER_SIZE); + if (!ctx.input_buffer) fail(&ctx, "out of memory / input buffer"); + ctx.output_buffer = (uint8_t*)malloc(BUFFER_SIZE); + if (!ctx.output_buffer) fail(&ctx, "out of memory / output buffer"); + ctx.decoder = BrotliDecoderCreateInstance(0, 0, 0); + if (!ctx.decoder) fail(&ctx, "out of memory / decoder"); + BrotliDecoderSetParameter(ctx.decoder, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1); + + next_out = ctx.output_buffer; + while (1) { + if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { + if (feof(ctx.fin)) break; + available_in = fread(ctx.input_buffer, 1, BUFFER_SIZE, ctx.fin); + next_in = ctx.input_buffer; + if (ferror(ctx.fin)) break; + } else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { + fwrite(ctx.output_buffer, 1, BUFFER_SIZE, ctx.fout); + if (ferror(ctx.fout)) break; + available_out = BUFFER_SIZE; + next_out = ctx.output_buffer; + } else { + break; + } + result = BrotliDecoderDecompressStream( + ctx.decoder, &available_in, &next_in, &available_out, &next_out, 0); + } + if (next_out != ctx.output_buffer) { + fwrite(ctx.output_buffer, 1, next_out - ctx.output_buffer, ctx.fout); + } + if ((result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) || ferror(ctx.fout)) { + fail(&ctx, "failed to write output"); + } else if (result != BROTLI_DECODER_RESULT_SUCCESS) { + fail(&ctx, "corrupt input"); + } + cleanup(&ctx); + return 0; +} -- cgit v1.1