diff options
author | eustas <eustas.ru@gmail.com> | 2016-02-24 16:54:54 +0100 |
---|---|---|
committer | eustas <eustas.ru@gmail.com> | 2016-02-24 16:54:54 +0100 |
commit | 7e5bbd5f9b7575ed09e324286c7d7c0e36a2bcdd (patch) | |
tree | d7367ebb99b70c50519c669fa3731fc74f6a7472 /python | |
parent | 32a6b67c2922227ac4892614a405a6639e785997 (diff) | |
parent | 7d25e6b6feebae1f44d32c31290d576055e21122 (diff) | |
download | brotli-7e5bbd5f9b7575ed09e324286c7d7c0e36a2bcdd.zip brotli-7e5bbd5f9b7575ed09e324286c7d7c0e36a2bcdd.tar.gz brotli-7e5bbd5f9b7575ed09e324286c7d7c0e36a2bcdd.tar.bz2 |
Merge pull request #321 from eustas/master
Add custom dictionary feature binding
Diffstat (limited to 'python')
-rwxr-xr-x | python/bro.py | 15 | ||||
-rw-r--r-- | python/brotlimodule.cc | 64 | ||||
-rw-r--r-- | python/tests/custom_dictionary_test.py | 36 |
3 files changed, 98 insertions, 17 deletions
diff --git a/python/bro.py b/python/bro.py index c4cf7e2..c6f74ce 100755 --- a/python/bro.py +++ b/python/bro.py @@ -79,6 +79,8 @@ def main(args=None): help='Base 2 logarithm of the maximum input block size. ' 'Range is 16 to 24. If set to 0, the value will be set based ' 'on the quality. Defaults to 0.') + params.add_argument('--custom-dictionary', metavar="FILE", type=str, dest='dictfile', + help='Custom dictionary file.', default = None) # set default values using global DEFAULT_PARAMS dictionary parser.set_defaults(**DEFAULT_PARAMS) @@ -103,13 +105,22 @@ def main(args=None): else: outfile = get_binary_stdio('stdout') + if options.dictfile: + if not os.path.isfile(options.dictfile): + parser.error('file "%s" not found' % options.dictfile) + with open(options.dictfile, "rb") as dictfile: + custom_dictionary = dictfile.read() + else: + custom_dictionary = '' + + try: if options.decompress: - data = brotli.decompress(data) + data = brotli.decompress(data, dictionary=custom_dictionary) else: data = brotli.compress( data, mode=options.mode, quality=options.quality, - lgwin=options.lgwin, lgblock=options.lgblock) + lgwin=options.lgwin, lgblock=options.lgblock, dictionary=custom_dictionary) except brotli.error as e: parser.exit(1,'bro: error: %s: %s' % (e, options.infile or 'sys.stdin')) diff --git a/python/brotlimodule.cc b/python/brotlimodule.cc index 66535b2..936d1a4 100644 --- a/python/brotlimodule.cc +++ b/python/brotlimodule.cc @@ -91,7 +91,7 @@ PyDoc_STRVAR(compress__doc__, "Compress a byte string.\n" "\n" "Signature:\n" -" compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0)\n" +" compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0, dictionary='')\n" "\n" "Args:\n" " string (bytes): The input data.\n" @@ -105,6 +105,8 @@ PyDoc_STRVAR(compress__doc__, " lgblock (int, optional): Base 2 logarithm of the maximum input block size.\n" " Range is 16 to 24. If set to 0, the value will be set based on the\n" " quality. Defaults to 0.\n" +" dictionary (bytes, optional): Custom dictionary. Only last sliding window\n" +" size bytes will be used.\n" "\n" "Returns:\n" " The compressed byte string.\n" @@ -114,24 +116,28 @@ PyDoc_STRVAR(compress__doc__, static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywds) { PyObject *ret = NULL; - uint8_t *input, *output; - size_t length, output_length; + uint8_t *input, *output, *custom_dictionary; + size_t length, output_length, custom_dictionary_length; BrotliParams::Mode mode = (BrotliParams::Mode) -1; int quality = -1; int lgwin = -1; int lgblock = -1; int ok; - static const char *kwlist[] = {"string", "mode", "quality", "lgwin", "lgblock", NULL}; + static const char *kwlist[] = { + "string", "mode", "quality", "lgwin", "lgblock", "dictionary", NULL}; - ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|O&O&O&O&:compress", + custom_dictionary = NULL; + custom_dictionary_length = 0; + + ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|O&O&O&O&s#:compress", const_cast<char **>(kwlist), &input, &length, &mode_convertor, &mode, &quality_convertor, &quality, &lgwin_convertor, &lgwin, - &lgblock_convertor, &lgblock); - + &lgblock_convertor, &lgblock, + &custom_dictionary, &custom_dictionary_length); if (!ok) return NULL; @@ -148,8 +154,23 @@ static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywd if (lgblock != -1) params.lgblock = lgblock; - ok = BrotliCompressBuffer(params, length, input, - &output_length, output); + if (custom_dictionary_length == 0) { + ok = BrotliCompressBuffer(params, length, input, + &output_length, output); + } else { + uint8_t *custom_dictionary_start = custom_dictionary; + BrotliMemIn in(input, length); + BrotliMemOut out(output, output_length); + size_t sliding_window_size = ((size_t)1) << params.lgwin; + if (custom_dictionary_length > sliding_window_size) { + custom_dictionary_start += custom_dictionary_length - sliding_window_size; + custom_dictionary_length = sliding_window_size; + } + ok = BrotliCompressWithCustomDictionary(custom_dictionary_length, + custom_dictionary_start, params, &in, &out); + output_length = out.position(); + } + if (ok) { ret = PyBytes_FromStringAndSize((char*)output, output_length); } else { @@ -169,6 +190,8 @@ PyDoc_STRVAR(decompress__doc__, "\n" "Args:\n" " string (bytes): The compressed input data.\n" +" dictionary (bytes, optional): Custom dictionary. MUST be the same data\n" +" as passed to compress method.\n" "\n" "Returns:\n" " The decompressed byte string.\n" @@ -176,13 +199,21 @@ PyDoc_STRVAR(decompress__doc__, "Raises:\n" " brotli.error: If decompressor fails.\n"); -static PyObject* brotli_decompress(PyObject *self, PyObject *args) { +static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *keywds) { PyObject *ret = NULL; - const uint8_t *input; - size_t length; + const uint8_t *input, *custom_dictionary; + size_t length, custom_dictionary_length; int ok; - ok = PyArg_ParseTuple(args, "s#:decompress", &input, &length); + static const char *kwlist[] = {"string", "dictionary", NULL}; + + custom_dictionary = NULL; + custom_dictionary_length = 0; + + ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|s#:decompress", + const_cast<char **>(kwlist), + &input, &length, + &custom_dictionary, &custom_dictionary_length); if (!ok) return NULL; @@ -191,7 +222,10 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args) { uint8_t* buffer = new uint8_t[kBufferSize]; BrotliState state; BrotliStateInit(&state); - + if (custom_dictionary_length != 0) { + BrotliSetCustomDictionary(custom_dictionary_length, custom_dictionary, &state); + } + BrotliResult result = BROTLI_RESULT_NEEDS_MORE_OUTPUT; while (result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) { size_t available_out = kBufferSize; @@ -219,7 +253,7 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args) { static PyMethodDef brotli_methods[] = { {"compress", (PyCFunction)brotli_compress, METH_VARARGS | METH_KEYWORDS, compress__doc__}, - {"decompress", brotli_decompress, METH_VARARGS, decompress__doc__}, + {"decompress", (PyCFunction)brotli_decompress, METH_VARARGS | METH_KEYWORDS, decompress__doc__}, {NULL, NULL, 0, NULL} }; diff --git a/python/tests/custom_dictionary_test.py b/python/tests/custom_dictionary_test.py new file mode 100644 index 0000000..afbf07a --- /dev/null +++ b/python/tests/custom_dictionary_test.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +from __future__ import print_function +import sys +import os +from subprocess import check_call, Popen, PIPE + +from test_utils import PYTHON, BRO, TEST_ENV, diff_q + + +INPUTS = """\ +testdata/alice29.txt +testdata/asyoulik.txt +testdata/lcet10.txt +testdata/plrabn12.txt +../enc/encode.cc +../enc/dictionary.h +../dec/decode.c +%s +""" % BRO + +os.chdir(os.path.abspath("../../tests")) +for filename in INPUTS.splitlines(): + for quality in (1, 6, 9, 11): + for lgwin in (10, 15, 20, 24): + filename = os.path.abspath(filename) + print('Roundtrip testing file "%s" at quality %d with lg(win)=%d and auto-custom-dictionary' % + (os.path.basename(filename), quality, lgwin)) + compressed = os.path.splitext(filename)[0] + ".custom_bro" + uncompressed = os.path.splitext(filename)[0] + ".custom_unbro" + check_call([PYTHON, BRO, "-f", "-q", str(quality), "-i", filename, + "-o", compressed, "--lgwin", str(lgwin), + "--custom-dictionary", filename], env=TEST_ENV) + check_call([PYTHON, BRO, "-f", "-d", "-i", compressed, "-o", + uncompressed, "--custom-dictionary", filename], env=TEST_ENV) + if diff_q(filename, uncompressed) != 0: + sys.exit(1) |