aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvgenii Kliuchnikov <eustas@google.com>2024-01-15 12:49:21 -0800
committerCopybara-Service <copybara-worker@google.com>2024-01-15 12:49:56 -0800
commit3396c67fea14aef349905b90dfef0ff4ada1be8c (patch)
tree892dd1089c544dd4c45583814c8f90f0b0c2ee7c
parent033940f97cfa5e708c46961de7e85d5e4976ee40 (diff)
downloadbrotli-3396c67fea14aef349905b90dfef0ff4ada1be8c.zip
brotli-3396c67fea14aef349905b90dfef0ff4ada1be8c.tar.gz
brotli-3396c67fea14aef349905b90dfef0ff4ada1be8c.tar.bz2
add brcat alias + flag to decompress concatenated streams
PiperOrigin-RevId: 598652401
-rw-r--r--c/tools/brotli.c168
-rw-r--r--c/tools/brotli.md10
-rw-r--r--docs/brotli.112
-rwxr-xr-xtests/cli_test.sh12
4 files changed, 151 insertions, 51 deletions
diff --git a/c/tools/brotli.c b/c/tools/brotli.c
index 56c60af..0dc99bd 100644
--- a/c/tools/brotli.c
+++ b/c/tools/brotli.c
@@ -112,7 +112,7 @@ typedef enum {
#define DEFAULT_LGWIN 24
#define DEFAULT_SUFFIX ".br"
-#define MAX_OPTIONS 20
+#define MAX_OPTIONS 24
#define MAX_COMMENT_LEN 80
typedef struct {
@@ -128,6 +128,7 @@ typedef struct {
BROTLI_BOOL test_integrity;
BROTLI_BOOL decompress;
BROTLI_BOOL large_window;
+ BROTLI_BOOL allow_concatenated;
const char* output_path;
const char* dictionary_path;
const char* suffix;
@@ -145,6 +146,7 @@ typedef struct {
uint8_t* dictionary;
size_t dictionary_size;
BrotliEncoderPreparedDictionary* prepared_dictionary;
+ BrotliDecoderState* decoder;
char* modified_path; /* Storage for path with appended / cut suffix */
int iterator;
int ignore;
@@ -187,7 +189,7 @@ static BROTLI_BOOL ParseBase64(const char* str, uint8_t* out, size_t* out_len) {
size_t octet_count = 0;
for (i = 0; i < in_len; ++i) {
char c = str[i];
- uint32_t sextet = 0;
+ int sextet = 0;
if (c == 9 || c == 10 || c == 13 || c == ' ') {
continue;
}
@@ -209,7 +211,7 @@ static BROTLI_BOOL ParseBase64(const char* str, uint8_t* out, size_t* out_len) {
} else {
return BROTLI_FALSE;
}
- bits = (bits << 6) | sextet;
+ bits = (bits << 6) | (uint32_t)sextet;
bit_count += 6;
if (bit_count >= 8) {
if (octet_count == max_out_len) return BROTLI_FALSE;
@@ -250,17 +252,16 @@ static const char* FileName(const char* path) {
}
/* Detect if the program name is a special alias that infers a command type. */
-static Command ParseAlias(const char* name) {
+static BROTLI_BOOL CheckAlias(const char* name, const char* alias) {
/* TODO: cast name to lower case? */
- const char* unbrotli = "unbrotli";
- size_t unbrotli_len = strlen(unbrotli);
+ size_t alias_len = strlen(alias);
name = FileName(name);
/* Partial comparison. On Windows there could be ".exe" suffix. */
- if (strncmp(name, unbrotli, unbrotli_len) == 0) {
- char terminator = name[unbrotli_len];
- if (terminator == 0 || terminator == '.') return COMMAND_DECOMPRESS;
+ if (strncmp(name, alias, alias_len) == 0) {
+ char terminator = name[alias_len];
+ if (terminator == 0 || terminator == '.') return BROTLI_TRUE;
}
- return COMMAND_COMPRESS;
+ return BROTLI_FALSE;
}
static Command ParseParams(Context* params) {
@@ -279,7 +280,20 @@ static Command ParseParams(Context* params) {
BROTLI_BOOL suffix_set = BROTLI_FALSE;
BROTLI_BOOL after_dash_dash = BROTLI_FALSE;
BROTLI_BOOL comment_set = BROTLI_FALSE;
- Command command = ParseAlias(argv[0]);
+ BROTLI_BOOL concatenated_set = BROTLI_FALSE;
+ Command command = COMMAND_COMPRESS;
+
+ if (CheckAlias(argv[0], "brcat")) {
+ command_set = BROTLI_TRUE;
+ command = COMMAND_DECOMPRESS;
+ concatenated_set = BROTLI_TRUE;
+ params->allow_concatenated = BROTLI_TRUE;
+ output_set = BROTLI_TRUE;
+ params->write_to_stdout = BROTLI_TRUE;
+ } else if (CheckAlias(argv[0], "unbrotli")) {
+ command_set = BROTLI_TRUE;
+ command = COMMAND_DECOMPRESS;
+ }
for (i = 1; i < argc; ++i) {
const char* arg = argv[i];
@@ -293,7 +307,7 @@ static Command ParseParams(Context* params) {
}
/* Too many options. The expected longest option list is:
- "-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total.
+ "-q 0 -w 10 -o f -D d -S b -d -f -k -n -v -K --", i.e. 17 items in total.
This check is an additional guard that is never triggered, but provides
a guard for future changes. */
if (next_option_index > (MAX_OPTIONS - 2)) {
@@ -394,6 +408,14 @@ static Command ParseParams(Context* params) {
}
params->verbosity = 1;
continue;
+ } else if (c == 'K') {
+ if (concatenated_set) {
+ fprintf(stderr, "argument -K / --concatenated already set\n");
+ return COMMAND_INVALID;
+ }
+ concatenated_set = BROTLI_TRUE;
+ params->allow_concatenated = BROTLI_TRUE;
+ continue;
} else if (c == 'V') {
/* Don't parse further. */
return COMMAND_VERSION;
@@ -491,6 +513,14 @@ static Command ParseParams(Context* params) {
}
quality_set = BROTLI_TRUE;
params->quality = 11;
+ } else if (strcmp("concatenated", arg) == 0) {
+ if (concatenated_set) {
+ fprintf(stderr, "argument -K / --concatenated already set\n");
+ return COMMAND_INVALID;
+ }
+ concatenated_set = BROTLI_TRUE;
+ params->allow_concatenated = BROTLI_TRUE;
+ continue;
} else if (strcmp("decompress", arg) == 0) {
if (command_set) {
fprintf(stderr, "command already set when parsing --decompress\n");
@@ -669,6 +699,12 @@ static Command ParseParams(Context* params) {
if (strchr(params->suffix, '/') || strchr(params->suffix, '\\')) {
return COMMAND_INVALID;
}
+ if (!params->decompress && params->allow_concatenated) {
+ return COMMAND_INVALID;
+ }
+ if (params->allow_concatenated && params->comment_len) {
+ return COMMAND_INVALID;
+ }
return command;
}
@@ -724,7 +760,8 @@ static void PrintHelp(const char* name, BROTLI_BOOL error) {
" when encoding: embed comment (fingerprint)\n",
MAX_COMMENT_LEN);
fprintf(media,
-" -D FILE, --dictionary=FILE use FILE as raw (LZ77) dictionary\n");
+" -D FILE, --dictionary=FILE use FILE as raw (LZ77) dictionary\n"
+" -K, --concatenated allows concatenated brotli streams as input\n");
fprintf(media,
" -S SUF, --suffix=SUF output file suffix (default:'%s')\n",
DEFAULT_SUFFIX);
@@ -1086,6 +1123,7 @@ static BROTLI_BOOL ProvideOutput(Context* context) {
static BROTLI_BOOL FlushOutput(Context* context) {
if (!WriteOutput(context)) return BROTLI_FALSE;
context->available_out = 0;
+ context->next_out = context->output;
return BROTLI_TRUE;
}
@@ -1157,7 +1195,27 @@ static void OnMetadataChunk(void* opaque, const uint8_t* data, size_t size) {
}
}
-static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
+static BROTLI_BOOL InitDecoder(Context* context) {
+ context->decoder = BrotliDecoderCreateInstance(NULL, NULL, NULL);
+ if (!context->decoder) {
+ fprintf(stderr, "out of memory\n");
+ return BROTLI_FALSE;
+ }
+ /* This allows decoding "large-window" streams. Though it creates
+ fragmentation (new builds decode streams that old builds don't),
+ it is better from used experience perspective. */
+ BrotliDecoderSetParameter(
+ context->decoder, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1u);
+ if (context->dictionary) {
+ BrotliDecoderAttachDictionary(context->decoder,
+ BROTLI_SHARED_DICTIONARY_RAW, context->dictionary_size,
+ context->dictionary);
+ }
+ return BROTLI_TRUE;
+}
+
+static BROTLI_BOOL DecompressFile(Context* context) {
+ BrotliDecoderState* s = context->decoder;
BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
if (context->comment_len) {
context->comment_state = COMMENT_INIT;
@@ -1192,31 +1250,52 @@ static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
if (!ProvideOutput(context)) return BROTLI_FALSE;
} else if (result == BROTLI_DECODER_RESULT_SUCCESS) {
if (!FlushOutput(context)) return BROTLI_FALSE;
- int has_more_input =
- (context->available_in != 0) || (fgetc(context->fin) != EOF);
- if (has_more_input) {
- fprintf(stderr, "corrupt input [%s]\n",
- PrintablePath(context->current_input_path));
- if (context->verbosity > 0) {
- fprintf(stderr, "reason: extra input\n");
+ BROTLI_BOOL has_more_input = (context->available_in != 0);
+ int extra_char = EOF;
+ if (!has_more_input) {
+ extra_char = fgetc(context->fin);
+ if (extra_char != EOF) {
+ has_more_input = BROTLI_TRUE;
+ context->input[0] = (uint8_t)extra_char;
+ context->next_in = context->input;
+ context->available_in = 1;
}
- return BROTLI_FALSE;
- }
- if (context->verbosity > 0) {
- context->end_time = clock();
- fprintf(stderr, "Decompressed ");
- PrintFileProcessingProgress(context);
- fprintf(stderr, "\n");
}
- /* Final check */
- if (context->comment_state != COMMENT_OK) {
- fprintf(stderr, "corrupt input [%s]\n",
- PrintablePath(context->current_input_path));
+ if (has_more_input) {
+ if (context->allow_concatenated) {
+ if (context->verbosity > 0) {
+ fprintf(stderr, "extra input\n");
+ }
+ if (!ProvideOutput(context)) return BROTLI_FALSE;
+ BrotliDecoderDestroyInstance(context->decoder);
+ context->decoder = NULL;
+ if (!InitDecoder(context)) return BROTLI_FALSE;
+ s = context->decoder;
+ } else {
+ fprintf(stderr, "corrupt input [%s]\n",
+ PrintablePath(context->current_input_path));
+ if (context->verbosity > 0) {
+ fprintf(stderr, "reason: extra input\n");
+ }
+ return BROTLI_FALSE;
+ }
+ } else {
if (context->verbosity > 0) {
- fprintf(stderr, "reason: comment mismatch\n");
+ context->end_time = clock();
+ fprintf(stderr, "Decompressed ");
+ PrintFileProcessingProgress(context);
+ fprintf(stderr, "\n");
+ }
+ /* Final check */
+ if (context->comment_state != COMMENT_OK) {
+ fprintf(stderr, "corrupt input [%s]\n",
+ PrintablePath(context->current_input_path));
+ if (context->verbosity > 0) {
+ fprintf(stderr, "reason: comment mismatch\n");
+ }
}
+ return BROTLI_TRUE;
}
- return BROTLI_TRUE;
} else { /* result == BROTLI_DECODER_RESULT_ERROR */
fprintf(stderr, "corrupt input [%s]\n",
PrintablePath(context->current_input_path));
@@ -1238,27 +1317,16 @@ static BROTLI_BOOL DecompressFiles(Context* context) {
BROTLI_BOOL is_ok = BROTLI_TRUE;
BROTLI_BOOL rm_input = BROTLI_FALSE;
BROTLI_BOOL rm_output = BROTLI_TRUE;
- BrotliDecoderState* s = BrotliDecoderCreateInstance(NULL, NULL, NULL);
- if (!s) {
- fprintf(stderr, "out of memory\n");
- return BROTLI_FALSE;
- }
- /* This allows decoding "large-window" streams. Though it creates
- fragmentation (new builds decode streams that old builds don't),
- it is better from used experience perspective. */
- BrotliDecoderSetParameter(s, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1u);
- if (context->dictionary) {
- BrotliDecoderAttachDictionary(s, BROTLI_SHARED_DICTIONARY_RAW,
- context->dictionary_size, context->dictionary);
- }
+ if (!InitDecoder(context)) return BROTLI_FALSE;
is_ok = OpenFiles(context);
if (is_ok && !context->current_input_path &&
!context->force_overwrite && isatty(STDIN_FILENO)) {
fprintf(stderr, "Use -h help. Use -f to force input from a terminal.\n");
is_ok = BROTLI_FALSE;
}
- if (is_ok) is_ok = DecompressFile(context, s);
- BrotliDecoderDestroyInstance(s);
+ if (is_ok) is_ok = DecompressFile(context);
+ if (context->decoder) BrotliDecoderDestroyInstance(context->decoder);
+ context->decoder = NULL;
rm_output = !is_ok;
rm_input = !rm_output && context->junk_source;
if (!CloseFiles(context, rm_input, rm_output)) is_ok = BROTLI_FALSE;
@@ -1408,6 +1476,7 @@ int main(int argc, char** argv) {
context.write_to_stdout = BROTLI_FALSE;
context.decompress = BROTLI_FALSE;
context.large_window = BROTLI_FALSE;
+ context.allow_concatenated = BROTLI_FALSE;
context.output_path = NULL;
context.dictionary_path = NULL;
context.suffix = DEFAULT_SUFFIX;
@@ -1419,6 +1488,7 @@ int main(int argc, char** argv) {
context.argv = argv;
context.dictionary = NULL;
context.dictionary_size = 0;
+ context.decoder = NULL;
context.prepared_dictionary = NULL;
context.modified_path = NULL;
context.iterator = 0;
diff --git a/c/tools/brotli.md b/c/tools/brotli.md
index cb6d6f3..8792314 100644
--- a/c/tools/brotli.md
+++ b/c/tools/brotli.md
@@ -1,11 +1,13 @@
# NAME
-brotli(1) -- brotli, unbrotli - compress or decompress files
+brotli(1) -- brotli, brcat, unbrotli - compress or decompress files
# SYNOPSIS
`brotli` [*OPTION|FILE*]...
+`brcat` is equivalent to `brotli --decompress --concatenated --stdout`
+
`unbrotli` is equivalent to `brotli --decompress`
# DESCRIPTION
@@ -83,9 +85,15 @@ Conflicting or duplicate _options_ are not allowed.
`(pow(2, NUM) - 16)`; 0 lets compressor decide over the optimal value;
bigger windows size improve density; decoder might require up to window size
memory to operate
+* `-C B64`, `--comment=B64`:
+ set comment; argument is base64-decoded first;
+ when decoding: check stream comment;
+ when encoding: embed comment (fingerprint)
* `-D FILE`, `--dictionary=FILE`:
use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for
compression and decompression
+* `-K`, `--concatenated`:
+ when decoding, allow concatenated brotli streams as input
* `-S SUF`, `--suffix=SUF`:
output file suffix (default: `.br`)
* `-V`, `--version`:
diff --git a/docs/brotli.1 b/docs/brotli.1
index 7ca1355..14a4de1 100644
--- a/docs/brotli.1
+++ b/docs/brotli.1
@@ -4,11 +4,14 @@
.hy
.SH NAME
.PP
-brotli(1) -- brotli, unbrotli - compress or decompress files
+brotli(1) -- brotli, brcat, unbrotli - compress or decompress files
.SH SYNOPSIS
.PP
\f[B]brotli\f[R] [\f[I]OPTION|FILE\f[R]]\&...
.PP
+\f[B]brcat\f[R] is equivalent to \f[B]brotli --decompress --concatenated
+--stdout\f[R]
+.PP
\f[B]unbrotli\f[R] is equivalent to \f[B]brotli --decompress\f[R]
.SH DESCRIPTION
.PP
@@ -104,10 +107,17 @@ bigger values cause denser, but slower compression
compressor decide over the optimal value; bigger windows size improve
density; decoder might require up to window size memory to operate
.IP \[bu] 2
+\f[B]-C B64\f[R], \f[B]--comment=B64\f[R]: set comment; argument is
+base64-decoded first; when decoding: check stream comment; when
+encoding: embed comment (fingerprint)
+.IP \[bu] 2
\f[B]-D FILE\f[R], \f[B]--dictionary=FILE\f[R]: use FILE as raw (LZ77)
dictionary; same dictionary MUST be used both for compression and
decompression
.IP \[bu] 2
+\f[B]-K\f[R], \f[B]--concatenated\f[R]: when decoding, allow
+concatenated brotli streams as input
+.IP \[bu] 2
\f[B]-S SUF\f[R], \f[B]--suffix=SUF\f[R]: output file suffix (default:
\f[B].br\f[R])
.IP \[bu] 2
diff --git a/tests/cli_test.sh b/tests/cli_test.sh
index b6c563e..a8e0208 100755
--- a/tests/cli_test.sh
+++ b/tests/cli_test.sh
@@ -13,6 +13,7 @@ function test::brotli_cli::setup() {
BROTLI="${BROTLI_PKG}/tools/brotli"
cd ${TEMP_DIR}
echo "Kot lomom kolol slona" > text.orig
+ echo "Lorem ipsum dolor sit amet. " > ipsum.orig
}
function test::brotli_cli::teardown() {
@@ -81,4 +82,15 @@ function test::brotli_cli::comment_invalid_chars() {
EXPECT_FAIL "${BROTLI} -Zfk -C S.GVsbG8= text.orig -o text.br"
}
+function test::brotli_cli::concatenated() {
+ ${BROTLI} -Zfk ipsum.orig -o one.br
+ ${BROTLI} -Zfk text.orig -o two.br
+ cat one.br two.br > full.br
+ EXPECT_FAIL "${BROTLI} -dc full.br > full.unbr"
+ EXPECT_SUCCEED "${BROTLI} -dKc full.br > full.unbr"
+ EXPECT_SUCCEED "${BROTLI} -dc --concatenated full.br > full.unbr"
+ cat ipsum.orig text.orig > full.orig
+ EXPECT_FILE_CONTENT_EQ full.orig full.unbr
+}
+
gbash::unit::main "$@"