aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvgenii Kliuchnikov <eustas@google.com>2024-01-11 02:03:59 -0800
committerCopybara-Service <copybara-worker@google.com>2024-01-11 02:04:37 -0800
commit033940f97cfa5e708c46961de7e85d5e4976ee40 (patch)
tree014a59b581b8109495a5da5eaeafa9fbcd7e22c9
parent2ad58d8603294f5ee33d23bb725e0e6a17c1de50 (diff)
downloadbrotli-033940f97cfa5e708c46961de7e85d5e4976ee40.zip
brotli-033940f97cfa5e708c46961de7e85d5e4976ee40.tar.gz
brotli-033940f97cfa5e708c46961de7e85d5e4976ee40.tar.bz2
add comment (fingerprint) CLI feature
PiperOrigin-RevId: 597489910
-rw-r--r--c/tools/brotli.c194
-rwxr-xr-xtests/cli_test.sh84
2 files changed, 266 insertions, 12 deletions
diff --git a/c/tools/brotli.c b/c/tools/brotli.c
index 03f2d06..56c60af 100644
--- a/c/tools/brotli.c
+++ b/c/tools/brotli.c
@@ -103,9 +103,17 @@ typedef enum {
COMMAND_VERSION
} Command;
+typedef enum {
+ COMMENT_INIT,
+ COMMENT_READ,
+ COMMENT_OK,
+ COMMENT_BAD,
+} CommentState;
+
#define DEFAULT_LGWIN 24
#define DEFAULT_SUFFIX ".br"
#define MAX_OPTIONS 20
+#define MAX_COMMENT_LEN 80
typedef struct {
/* Parameters */
@@ -123,6 +131,10 @@ typedef struct {
const char* output_path;
const char* dictionary_path;
const char* suffix;
+ uint8_t comment[MAX_COMMENT_LEN];
+ size_t comment_len;
+ size_t comment_pos;
+ CommentState comment_state;
int not_input_indices[MAX_OPTIONS];
size_t longest_path_len;
size_t input_count;
@@ -161,6 +173,55 @@ typedef struct {
clock_t end_time;
} Context;
+/* Parse base 64 encoded string to buffer. Not performance-centric.
+ |out_len| as input is buffer size; |out_len| as output is decoded length.
+ Returns BROTLI_FALSE if either input is not (relaxed) base 64 conformant,
+ or output does not fit buffer. */
+static BROTLI_BOOL ParseBase64(const char* str, uint8_t* out, size_t* out_len) {
+ size_t in_len = strlen(str);
+ size_t max_out_len = *out_len;
+ size_t i;
+ size_t bit_count = 0;
+ uint32_t bits = 0;
+ size_t padding_count = 0;
+ size_t octet_count = 0;
+ for (i = 0; i < in_len; ++i) {
+ char c = str[i];
+ uint32_t sextet = 0;
+ if (c == 9 || c == 10 || c == 13 || c == ' ') {
+ continue;
+ }
+ if (c == '=') {
+ padding_count++;
+ continue;
+ }
+ if (padding_count) return BROTLI_FALSE;
+ if (c == '+' || c == '-') {
+ sextet = 62;
+ } else if (c == '/' || c == '_') {
+ sextet = 63;
+ } else if (c >= 'A' && c <= 'Z') {
+ sextet = c - 'A';
+ } else if (c >= 'a' && c <= 'z') {
+ sextet = c - 'a' + 26;
+ } else if (c >= '0' && c <= '9') {
+ sextet = c - '0' + 52;
+ } else {
+ return BROTLI_FALSE;
+ }
+ bits = (bits << 6) | sextet;
+ bit_count += 6;
+ if (bit_count >= 8) {
+ if (octet_count == max_out_len) return BROTLI_FALSE;
+ bit_count -= 8;
+ out[octet_count++] = (bits >> bit_count) & 0xFF;
+ }
+ }
+ if (padding_count > 2) return BROTLI_FALSE;
+ *out_len = octet_count;
+ return BROTLI_TRUE;
+}
+
/* Parse up to 5 decimal digits. */
static BROTLI_BOOL ParseInt(const char* s, int low, int high, int* result) {
int value = 0;
@@ -217,6 +278,7 @@ static Command ParseParams(Context* params) {
BROTLI_BOOL lgwin_set = BROTLI_FALSE;
BROTLI_BOOL suffix_set = BROTLI_FALSE;
BROTLI_BOOL after_dash_dash = BROTLI_FALSE;
+ BROTLI_BOOL comment_set = BROTLI_FALSE;
Command command = ParseAlias(argv[0]);
for (i = 1; i < argc; ++i) {
@@ -344,8 +406,9 @@ static Command ParseParams(Context* params) {
params->quality = 11;
continue;
}
- /* o/q/w/D/S with parameter is expected */
- if (c != 'o' && c != 'q' && c != 'w' && c != 'D' && c != 'S') {
+ /* o/q/w/C/D/S with parameter is expected */
+ if (c != 'o' && c != 'q' && c != 'w' && c != 'C' && c != 'D' &&
+ c != 'S') {
fprintf(stderr, "invalid argument -%c\n", c);
return COMMAND_INVALID;
}
@@ -393,6 +456,17 @@ static Command ParseParams(Context* params) {
params->lgwin, BROTLI_MIN_WINDOW_BITS);
return COMMAND_INVALID;
}
+ } else if (c == 'C') {
+ if (comment_set) {
+ fprintf(stderr, "comment already set\n");
+ return COMMAND_INVALID;
+ }
+ params->comment_len = MAX_COMMENT_LEN;
+ if (!ParseBase64(argv[i], params->comment, &params->comment_len)) {
+ fprintf(stderr, "invalid base64-encoded comment\n");
+ return COMMAND_INVALID;
+ }
+ comment_set = BROTLI_TRUE;
} else if (c == 'D') {
if (params->dictionary_path) {
fprintf(stderr, "dictionary path already set\n");
@@ -486,7 +560,7 @@ static Command ParseParams(Context* params) {
return COMMAND_VERSION;
} else {
/* key=value */
- const char* value = strrchr(arg, '=');
+ const char* value = strchr(arg, '=');
size_t key_len;
if (!value || value[1] == 0) {
fprintf(stderr, "must pass the parameter as --%s=value\n", arg);
@@ -494,7 +568,18 @@ static Command ParseParams(Context* params) {
}
key_len = (size_t)(value - arg);
value++;
- if (strncmp("dictionary", arg, key_len) == 0) {
+ if (strncmp("comment", arg, key_len) == 0) {
+ if (comment_set) {
+ fprintf(stderr, "comment already set\n");
+ return COMMAND_INVALID;
+ }
+ params->comment_len = MAX_COMMENT_LEN;
+ if (!ParseBase64(value, params->comment, &params->comment_len)) {
+ fprintf(stderr, "invalid base64-encoded comment\n");
+ return COMMAND_INVALID;
+ }
+ comment_set = BROTLI_TRUE;
+ } else if (strncmp("dictionary", arg, key_len) == 0) {
if (params->dictionary_path) {
fprintf(stderr, "dictionary path already set\n");
return COMMAND_INVALID;
@@ -633,6 +718,12 @@ static void PrintHelp(const char* name, BROTLI_BOOL error) {
" decodable with regular brotli decoders\n",
BROTLI_MIN_WINDOW_BITS, BROTLI_LARGE_MAX_WINDOW_BITS);
fprintf(media,
+" -C B64, --comment=B64 set comment; argument is base64-decoded first;\n"
+" (maximal decoded length: %d)\n"
+" when decoding: check stream comment;\n"
+" when encoding: embed comment (fingerprint)\n",
+ MAX_COMMENT_LEN);
+ fprintf(media,
" -D FILE, --dictionary=FILE use FILE as raw (LZ77) dictionary\n");
fprintf(media,
" -S SUF, --suffix=SUF output file suffix (default:'%s')\n",
@@ -1034,10 +1125,59 @@ static const char* PrettyDecoderErrorString(BrotliDecoderErrorCode code) {
return error_str;
}
+static void OnMetadataStart(void* opaque, size_t size) {
+ Context* context = (Context*) opaque;
+ if (context->comment_state == COMMENT_INIT) {
+ if (context->comment_len != size) {
+ context->comment_state = COMMENT_BAD;
+ return;
+ }
+ context->comment_pos = 0;
+ context->comment_state = COMMENT_READ;
+ }
+}
+
+static void OnMetadataChunk(void* opaque, const uint8_t* data, size_t size) {
+ Context* context = (Context*) opaque;
+ if (context->comment_state == COMMENT_READ) {
+ size_t i;
+ for (i = 0; i < size; ++i) {
+ if (context->comment_pos >= context->comment_len) {
+ context->comment_state = COMMENT_BAD;
+ return;
+ }
+ if (context->comment[context->comment_pos++] != data[i]) {
+ context->comment_state = COMMENT_BAD;
+ return;
+ }
+ }
+ if (context->comment_pos == context->comment_len) {
+ context->comment_state = COMMENT_OK;
+ }
+ }
+}
+
static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
+ if (context->comment_len) {
+ context->comment_state = COMMENT_INIT;
+ BrotliDecoderSetMetadataCallbacks(s, &OnMetadataStart, &OnMetadataChunk,
+ (void*)context);
+ } else {
+ context->comment_state = COMMENT_OK;
+ }
+
InitializeBuffers(context);
for (;;) {
+ /* Early check */
+ if (context->comment_state == COMMENT_BAD) {
+ fprintf(stderr, "corrupt input [%s]\n",
+ PrintablePath(context->current_input_path));
+ if (context->verbosity > 0) {
+ fprintf(stderr, "reason: comment mismatch\n");
+ }
+ return BROTLI_FALSE;
+ }
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
if (!HasMoreInput(context)) {
fprintf(stderr, "corrupt input [%s]\n",
@@ -1068,6 +1208,14 @@ static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
PrintFileProcessingProgress(context);
fprintf(stderr, "\n");
}
+ /* Final check */
+ if (context->comment_state != COMMENT_OK) {
+ fprintf(stderr, "corrupt input [%s]\n",
+ PrintablePath(context->current_input_path));
+ if (context->verbosity > 0) {
+ fprintf(stderr, "reason: comment mismatch\n");
+ }
+ }
return BROTLI_TRUE;
} else { /* result == BROTLI_DECODER_RESULT_ERROR */
fprintf(stderr, "corrupt input [%s]\n",
@@ -1121,6 +1269,7 @@ static BROTLI_BOOL DecompressFiles(Context* context) {
static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
BROTLI_BOOL is_eof = BROTLI_FALSE;
+ BROTLI_BOOL prologue = !!context->comment_len;
InitializeBuffers(context);
for (;;) {
if (context->available_in == 0 && !is_eof) {
@@ -1128,14 +1277,34 @@ static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
is_eof = !HasMoreInput(context);
}
- if (!BrotliEncoderCompressStream(s,
- is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
- &context->available_in, &context->next_in,
- &context->available_out, &context->next_out, NULL)) {
- /* Should detect OOM? */
- fprintf(stderr, "failed to compress data [%s]\n",
- PrintablePath(context->current_input_path));
- return BROTLI_FALSE;
+ if (prologue) {
+ prologue = BROTLI_FALSE;
+ const uint8_t* next_meta = context->comment;
+ size_t available_meta = context->comment_len;
+ if (!BrotliEncoderCompressStream(s,
+ BROTLI_OPERATION_EMIT_METADATA,
+ &available_meta, &next_meta,
+ &context->available_out, &context->next_out, NULL)) {
+ /* Should detect OOM? */
+ fprintf(stderr, "failed to emit metadata [%s]\n",
+ PrintablePath(context->current_input_path));
+ return BROTLI_FALSE;
+ }
+ if (available_meta != 0) {
+ fprintf(stderr, "failed to emit metadata [%s]\n",
+ PrintablePath(context->current_input_path));
+ return BROTLI_FALSE;
+ }
+ } else {
+ if (!BrotliEncoderCompressStream(s,
+ is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+ &context->available_in, &context->next_in,
+ &context->available_out, &context->next_out, NULL)) {
+ /* Should detect OOM? */
+ fprintf(stderr, "failed to compress data [%s]\n",
+ PrintablePath(context->current_input_path));
+ return BROTLI_FALSE;
+ }
}
if (context->available_out == 0) {
@@ -1230,6 +1399,7 @@ int main(int argc, char** argv) {
context.quality = 11;
context.lgwin = -1;
context.verbosity = 0;
+ context.comment_len = 0;
context.force_overwrite = BROTLI_FALSE;
context.junk_source = BROTLI_FALSE;
context.reject_uncompressible = BROTLI_FALSE;
diff --git a/tests/cli_test.sh b/tests/cli_test.sh
new file mode 100755
index 0000000..b6c563e
--- /dev/null
+++ b/tests/cli_test.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+source gbash.sh || exit
+source module gbash_unit.sh
+
+readonly MKTEMP='/bin/mktemp'
+readonly RM='/bin/rm'
+
+function test::brotli_cli::setup() {
+ TEMP_DIR=$(${MKTEMP} -d)
+ LOG INFO ${TEMP_DIR}
+ BROTLI_PKG="${RUNFILES}/google3/third_party/brotli"
+ BROTLI="${BROTLI_PKG}/tools/brotli"
+ cd ${TEMP_DIR}
+ echo "Kot lomom kolol slona" > text.orig
+}
+
+function test::brotli_cli::teardown() {
+ unset BROTLI
+ unset BROTLI_PKG
+ ${RM} -rf ${TEMP_DIR}
+ unset TEMP_DIR
+}
+
+function test::brotli_cli::roundtrip() {
+ ${BROTLI} -Zfk text.orig -o text.br
+ ${BROTLI} -d text.br -o text.unbr
+ EXPECT_FILE_CONTENT_EQ text.orig text.unbr
+}
+
+# 'SGVsbG8=' == $(echo -n "Hello" | base64)
+
+function test::brotli_cli::comment_ok() {
+ ${BROTLI} -Zfk -C SGVsbG8= text.orig -o text.br
+ ${BROTLI} -d --comment=SGVsbG8= text.br -o text.unbr
+ EXPECT_FILE_CONTENT_EQ text.orig text.unbr
+}
+
+function test::brotli_cli::comment_no_padding() {
+ ${BROTLI} -Zfk -C SGVsbG8 text.orig -o text.br
+ ${BROTLI} -d --comment=SGVsbG8= text.br -o text.unbr
+ EXPECT_FILE_CONTENT_EQ text.orig text.unbr
+}
+
+function test::brotli_cli::comment_extra_padding() {
+ ${BROTLI} -Zfk -C SGVsbG8 text.orig -o text.br
+ ${BROTLI} -d --comment=SGVsbG8== text.br -o text.unbr
+ EXPECT_FILE_CONTENT_EQ text.orig text.unbr
+}
+
+function test::brotli_cli::comment_ignored() {
+ ${BROTLI} -Zfk -C SGVsbG8= text.orig -o text.br
+ EXPECT_SUCCEED "${BROTLI} -d text.br -o text.unbr"
+}
+
+function test::brotli_cli::comment_mismatch_content() {
+ ${BROTLI} -Zfk --comment=SGVsbG8= text.orig -o text.br
+ EXPECT_FAIL "${BROTLI} -dC SGVsbG7= text.br -o text.unbr"
+ EXPECT_FAIL "${BROTLI} -tC SGVsbG7= text.br"
+}
+
+function test::brotli_cli::comment_mismatch_length() {
+ ${BROTLI} -Zfk --comment=SGVsbG8= text.orig -o text.br
+ EXPECT_FAIL "${BROTLI} -tC SGVsbA== text.br"
+}
+
+function test::brotli_cli::comment_too_much_padding() {
+ EXPECT_FAIL "${BROTLI} -Zfk -C SGVsbG8=== text.orig -o text.br"
+}
+
+function test::brotli_cli::comment_padding_in_the_middle() {
+ EXPECT_FAIL "${BROTLI} -Zfk -C SGVsbG=8 text.orig -o text.br"
+}
+
+function test::brotli_cli::comment_ignore_tab_cr_lf_sp() {
+ COMMENT=$'S\tG\rV\ns bG8='
+ EXPECT_SUCCEED "${BROTLI} -Zfk -C \"${COMMENT}\" text.orig -o text.br"
+}
+
+function test::brotli_cli::comment_invalid_chars() {
+ EXPECT_FAIL "${BROTLI} -Zfk -C S.GVsbG8= text.orig -o text.br"
+}
+
+gbash::unit::main "$@"