From 3396c67fea14aef349905b90dfef0ff4ada1be8c Mon Sep 17 00:00:00 2001 From: Evgenii Kliuchnikov Date: Mon, 15 Jan 2024 12:49:21 -0800 Subject: add brcat alias + flag to decompress concatenated streams PiperOrigin-RevId: 598652401 --- c/tools/brotli.c | 168 ++++++++++++++++++++++++++++++++++++++---------------- c/tools/brotli.md | 10 +++- docs/brotli.1 | 12 +++- tests/cli_test.sh | 12 ++++ 4 files changed, 151 insertions(+), 51 deletions(-) diff --git a/c/tools/brotli.c b/c/tools/brotli.c index 56c60af..0dc99bd 100644 --- a/c/tools/brotli.c +++ b/c/tools/brotli.c @@ -112,7 +112,7 @@ typedef enum { #define DEFAULT_LGWIN 24 #define DEFAULT_SUFFIX ".br" -#define MAX_OPTIONS 20 +#define MAX_OPTIONS 24 #define MAX_COMMENT_LEN 80 typedef struct { @@ -128,6 +128,7 @@ typedef struct { BROTLI_BOOL test_integrity; BROTLI_BOOL decompress; BROTLI_BOOL large_window; + BROTLI_BOOL allow_concatenated; const char* output_path; const char* dictionary_path; const char* suffix; @@ -145,6 +146,7 @@ typedef struct { uint8_t* dictionary; size_t dictionary_size; BrotliEncoderPreparedDictionary* prepared_dictionary; + BrotliDecoderState* decoder; char* modified_path; /* Storage for path with appended / cut suffix */ int iterator; int ignore; @@ -187,7 +189,7 @@ static BROTLI_BOOL ParseBase64(const char* str, uint8_t* out, size_t* out_len) { size_t octet_count = 0; for (i = 0; i < in_len; ++i) { char c = str[i]; - uint32_t sextet = 0; + int sextet = 0; if (c == 9 || c == 10 || c == 13 || c == ' ') { continue; } @@ -209,7 +211,7 @@ static BROTLI_BOOL ParseBase64(const char* str, uint8_t* out, size_t* out_len) { } else { return BROTLI_FALSE; } - bits = (bits << 6) | sextet; + bits = (bits << 6) | (uint32_t)sextet; bit_count += 6; if (bit_count >= 8) { if (octet_count == max_out_len) return BROTLI_FALSE; @@ -250,17 +252,16 @@ static const char* FileName(const char* path) { } /* Detect if the program name is a special alias that infers a command type. */ -static Command ParseAlias(const char* name) { +static BROTLI_BOOL CheckAlias(const char* name, const char* alias) { /* TODO: cast name to lower case? */ - const char* unbrotli = "unbrotli"; - size_t unbrotli_len = strlen(unbrotli); + size_t alias_len = strlen(alias); name = FileName(name); /* Partial comparison. On Windows there could be ".exe" suffix. */ - if (strncmp(name, unbrotli, unbrotli_len) == 0) { - char terminator = name[unbrotli_len]; - if (terminator == 0 || terminator == '.') return COMMAND_DECOMPRESS; + if (strncmp(name, alias, alias_len) == 0) { + char terminator = name[alias_len]; + if (terminator == 0 || terminator == '.') return BROTLI_TRUE; } - return COMMAND_COMPRESS; + return BROTLI_FALSE; } static Command ParseParams(Context* params) { @@ -279,7 +280,20 @@ static Command ParseParams(Context* params) { BROTLI_BOOL suffix_set = BROTLI_FALSE; BROTLI_BOOL after_dash_dash = BROTLI_FALSE; BROTLI_BOOL comment_set = BROTLI_FALSE; - Command command = ParseAlias(argv[0]); + BROTLI_BOOL concatenated_set = BROTLI_FALSE; + Command command = COMMAND_COMPRESS; + + if (CheckAlias(argv[0], "brcat")) { + command_set = BROTLI_TRUE; + command = COMMAND_DECOMPRESS; + concatenated_set = BROTLI_TRUE; + params->allow_concatenated = BROTLI_TRUE; + output_set = BROTLI_TRUE; + params->write_to_stdout = BROTLI_TRUE; + } else if (CheckAlias(argv[0], "unbrotli")) { + command_set = BROTLI_TRUE; + command = COMMAND_DECOMPRESS; + } for (i = 1; i < argc; ++i) { const char* arg = argv[i]; @@ -293,7 +307,7 @@ static Command ParseParams(Context* params) { } /* Too many options. The expected longest option list is: - "-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total. + "-q 0 -w 10 -o f -D d -S b -d -f -k -n -v -K --", i.e. 17 items in total. This check is an additional guard that is never triggered, but provides a guard for future changes. */ if (next_option_index > (MAX_OPTIONS - 2)) { @@ -394,6 +408,14 @@ static Command ParseParams(Context* params) { } params->verbosity = 1; continue; + } else if (c == 'K') { + if (concatenated_set) { + fprintf(stderr, "argument -K / --concatenated already set\n"); + return COMMAND_INVALID; + } + concatenated_set = BROTLI_TRUE; + params->allow_concatenated = BROTLI_TRUE; + continue; } else if (c == 'V') { /* Don't parse further. */ return COMMAND_VERSION; @@ -491,6 +513,14 @@ static Command ParseParams(Context* params) { } quality_set = BROTLI_TRUE; params->quality = 11; + } else if (strcmp("concatenated", arg) == 0) { + if (concatenated_set) { + fprintf(stderr, "argument -K / --concatenated already set\n"); + return COMMAND_INVALID; + } + concatenated_set = BROTLI_TRUE; + params->allow_concatenated = BROTLI_TRUE; + continue; } else if (strcmp("decompress", arg) == 0) { if (command_set) { fprintf(stderr, "command already set when parsing --decompress\n"); @@ -669,6 +699,12 @@ static Command ParseParams(Context* params) { if (strchr(params->suffix, '/') || strchr(params->suffix, '\\')) { return COMMAND_INVALID; } + if (!params->decompress && params->allow_concatenated) { + return COMMAND_INVALID; + } + if (params->allow_concatenated && params->comment_len) { + return COMMAND_INVALID; + } return command; } @@ -724,7 +760,8 @@ static void PrintHelp(const char* name, BROTLI_BOOL error) { " when encoding: embed comment (fingerprint)\n", MAX_COMMENT_LEN); fprintf(media, -" -D FILE, --dictionary=FILE use FILE as raw (LZ77) dictionary\n"); +" -D FILE, --dictionary=FILE use FILE as raw (LZ77) dictionary\n" +" -K, --concatenated allows concatenated brotli streams as input\n"); fprintf(media, " -S SUF, --suffix=SUF output file suffix (default:'%s')\n", DEFAULT_SUFFIX); @@ -1086,6 +1123,7 @@ static BROTLI_BOOL ProvideOutput(Context* context) { static BROTLI_BOOL FlushOutput(Context* context) { if (!WriteOutput(context)) return BROTLI_FALSE; context->available_out = 0; + context->next_out = context->output; return BROTLI_TRUE; } @@ -1157,7 +1195,27 @@ static void OnMetadataChunk(void* opaque, const uint8_t* data, size_t size) { } } -static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) { +static BROTLI_BOOL InitDecoder(Context* context) { + context->decoder = BrotliDecoderCreateInstance(NULL, NULL, NULL); + if (!context->decoder) { + fprintf(stderr, "out of memory\n"); + return BROTLI_FALSE; + } + /* This allows decoding "large-window" streams. Though it creates + fragmentation (new builds decode streams that old builds don't), + it is better from used experience perspective. */ + BrotliDecoderSetParameter( + context->decoder, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1u); + if (context->dictionary) { + BrotliDecoderAttachDictionary(context->decoder, + BROTLI_SHARED_DICTIONARY_RAW, context->dictionary_size, + context->dictionary); + } + return BROTLI_TRUE; +} + +static BROTLI_BOOL DecompressFile(Context* context) { + BrotliDecoderState* s = context->decoder; BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT; if (context->comment_len) { context->comment_state = COMMENT_INIT; @@ -1192,31 +1250,52 @@ static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) { if (!ProvideOutput(context)) return BROTLI_FALSE; } else if (result == BROTLI_DECODER_RESULT_SUCCESS) { if (!FlushOutput(context)) return BROTLI_FALSE; - int has_more_input = - (context->available_in != 0) || (fgetc(context->fin) != EOF); - if (has_more_input) { - fprintf(stderr, "corrupt input [%s]\n", - PrintablePath(context->current_input_path)); - if (context->verbosity > 0) { - fprintf(stderr, "reason: extra input\n"); + BROTLI_BOOL has_more_input = (context->available_in != 0); + int extra_char = EOF; + if (!has_more_input) { + extra_char = fgetc(context->fin); + if (extra_char != EOF) { + has_more_input = BROTLI_TRUE; + context->input[0] = (uint8_t)extra_char; + context->next_in = context->input; + context->available_in = 1; } - return BROTLI_FALSE; - } - if (context->verbosity > 0) { - context->end_time = clock(); - fprintf(stderr, "Decompressed "); - PrintFileProcessingProgress(context); - fprintf(stderr, "\n"); } - /* Final check */ - if (context->comment_state != COMMENT_OK) { - fprintf(stderr, "corrupt input [%s]\n", - PrintablePath(context->current_input_path)); + if (has_more_input) { + if (context->allow_concatenated) { + if (context->verbosity > 0) { + fprintf(stderr, "extra input\n"); + } + if (!ProvideOutput(context)) return BROTLI_FALSE; + BrotliDecoderDestroyInstance(context->decoder); + context->decoder = NULL; + if (!InitDecoder(context)) return BROTLI_FALSE; + s = context->decoder; + } else { + fprintf(stderr, "corrupt input [%s]\n", + PrintablePath(context->current_input_path)); + if (context->verbosity > 0) { + fprintf(stderr, "reason: extra input\n"); + } + return BROTLI_FALSE; + } + } else { if (context->verbosity > 0) { - fprintf(stderr, "reason: comment mismatch\n"); + context->end_time = clock(); + fprintf(stderr, "Decompressed "); + PrintFileProcessingProgress(context); + fprintf(stderr, "\n"); + } + /* Final check */ + if (context->comment_state != COMMENT_OK) { + fprintf(stderr, "corrupt input [%s]\n", + PrintablePath(context->current_input_path)); + if (context->verbosity > 0) { + fprintf(stderr, "reason: comment mismatch\n"); + } } + return BROTLI_TRUE; } - return BROTLI_TRUE; } else { /* result == BROTLI_DECODER_RESULT_ERROR */ fprintf(stderr, "corrupt input [%s]\n", PrintablePath(context->current_input_path)); @@ -1238,27 +1317,16 @@ static BROTLI_BOOL DecompressFiles(Context* context) { BROTLI_BOOL is_ok = BROTLI_TRUE; BROTLI_BOOL rm_input = BROTLI_FALSE; BROTLI_BOOL rm_output = BROTLI_TRUE; - BrotliDecoderState* s = BrotliDecoderCreateInstance(NULL, NULL, NULL); - if (!s) { - fprintf(stderr, "out of memory\n"); - return BROTLI_FALSE; - } - /* This allows decoding "large-window" streams. Though it creates - fragmentation (new builds decode streams that old builds don't), - it is better from used experience perspective. */ - BrotliDecoderSetParameter(s, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1u); - if (context->dictionary) { - BrotliDecoderAttachDictionary(s, BROTLI_SHARED_DICTIONARY_RAW, - context->dictionary_size, context->dictionary); - } + if (!InitDecoder(context)) return BROTLI_FALSE; is_ok = OpenFiles(context); if (is_ok && !context->current_input_path && !context->force_overwrite && isatty(STDIN_FILENO)) { fprintf(stderr, "Use -h help. Use -f to force input from a terminal.\n"); is_ok = BROTLI_FALSE; } - if (is_ok) is_ok = DecompressFile(context, s); - BrotliDecoderDestroyInstance(s); + if (is_ok) is_ok = DecompressFile(context); + if (context->decoder) BrotliDecoderDestroyInstance(context->decoder); + context->decoder = NULL; rm_output = !is_ok; rm_input = !rm_output && context->junk_source; if (!CloseFiles(context, rm_input, rm_output)) is_ok = BROTLI_FALSE; @@ -1408,6 +1476,7 @@ int main(int argc, char** argv) { context.write_to_stdout = BROTLI_FALSE; context.decompress = BROTLI_FALSE; context.large_window = BROTLI_FALSE; + context.allow_concatenated = BROTLI_FALSE; context.output_path = NULL; context.dictionary_path = NULL; context.suffix = DEFAULT_SUFFIX; @@ -1419,6 +1488,7 @@ int main(int argc, char** argv) { context.argv = argv; context.dictionary = NULL; context.dictionary_size = 0; + context.decoder = NULL; context.prepared_dictionary = NULL; context.modified_path = NULL; context.iterator = 0; diff --git a/c/tools/brotli.md b/c/tools/brotli.md index cb6d6f3..8792314 100644 --- a/c/tools/brotli.md +++ b/c/tools/brotli.md @@ -1,11 +1,13 @@ # NAME -brotli(1) -- brotli, unbrotli - compress or decompress files +brotli(1) -- brotli, brcat, unbrotli - compress or decompress files # SYNOPSIS `brotli` [*OPTION|FILE*]... +`brcat` is equivalent to `brotli --decompress --concatenated --stdout` + `unbrotli` is equivalent to `brotli --decompress` # DESCRIPTION @@ -83,9 +85,15 @@ Conflicting or duplicate _options_ are not allowed. `(pow(2, NUM) - 16)`; 0 lets compressor decide over the optimal value; bigger windows size improve density; decoder might require up to window size memory to operate +* `-C B64`, `--comment=B64`: + set comment; argument is base64-decoded first; + when decoding: check stream comment; + when encoding: embed comment (fingerprint) * `-D FILE`, `--dictionary=FILE`: use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for compression and decompression +* `-K`, `--concatenated`: + when decoding, allow concatenated brotli streams as input * `-S SUF`, `--suffix=SUF`: output file suffix (default: `.br`) * `-V`, `--version`: diff --git a/docs/brotli.1 b/docs/brotli.1 index 7ca1355..14a4de1 100644 --- a/docs/brotli.1 +++ b/docs/brotli.1 @@ -4,11 +4,14 @@ .hy .SH NAME .PP -brotli(1) -- brotli, unbrotli - compress or decompress files +brotli(1) -- brotli, brcat, unbrotli - compress or decompress files .SH SYNOPSIS .PP \f[B]brotli\f[R] [\f[I]OPTION|FILE\f[R]]\&... .PP +\f[B]brcat\f[R] is equivalent to \f[B]brotli --decompress --concatenated +--stdout\f[R] +.PP \f[B]unbrotli\f[R] is equivalent to \f[B]brotli --decompress\f[R] .SH DESCRIPTION .PP @@ -104,10 +107,17 @@ bigger values cause denser, but slower compression compressor decide over the optimal value; bigger windows size improve density; decoder might require up to window size memory to operate .IP \[bu] 2 +\f[B]-C B64\f[R], \f[B]--comment=B64\f[R]: set comment; argument is +base64-decoded first; when decoding: check stream comment; when +encoding: embed comment (fingerprint) +.IP \[bu] 2 \f[B]-D FILE\f[R], \f[B]--dictionary=FILE\f[R]: use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for compression and decompression .IP \[bu] 2 +\f[B]-K\f[R], \f[B]--concatenated\f[R]: when decoding, allow +concatenated brotli streams as input +.IP \[bu] 2 \f[B]-S SUF\f[R], \f[B]--suffix=SUF\f[R]: output file suffix (default: \f[B].br\f[R]) .IP \[bu] 2 diff --git a/tests/cli_test.sh b/tests/cli_test.sh index b6c563e..a8e0208 100755 --- a/tests/cli_test.sh +++ b/tests/cli_test.sh @@ -13,6 +13,7 @@ function test::brotli_cli::setup() { BROTLI="${BROTLI_PKG}/tools/brotli" cd ${TEMP_DIR} echo "Kot lomom kolol slona" > text.orig + echo "Lorem ipsum dolor sit amet. " > ipsum.orig } function test::brotli_cli::teardown() { @@ -81,4 +82,15 @@ function test::brotli_cli::comment_invalid_chars() { EXPECT_FAIL "${BROTLI} -Zfk -C S.GVsbG8= text.orig -o text.br" } +function test::brotli_cli::concatenated() { + ${BROTLI} -Zfk ipsum.orig -o one.br + ${BROTLI} -Zfk text.orig -o two.br + cat one.br two.br > full.br + EXPECT_FAIL "${BROTLI} -dc full.br > full.unbr" + EXPECT_SUCCEED "${BROTLI} -dKc full.br > full.unbr" + EXPECT_SUCCEED "${BROTLI} -dc --concatenated full.br > full.unbr" + cat ipsum.orig text.orig > full.orig + EXPECT_FILE_CONTENT_EQ full.orig full.unbr +} + gbash::unit::main "$@" -- cgit v1.1