aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorEugene Kliuchnikov <eustas@google.com>2016-02-23 17:42:55 +0100
committerEugene Kliuchnikov <eustas@google.com>2016-02-23 17:42:55 +0100
commit30612e3a64689dfc4aa8b79b563f6bca202135a8 (patch)
tree4ffd5cbdae86d4b28c885016adac621dd203bc10 /python
parent5cdfb5cf507389a185cd9ddb5471a75ff093bcf7 (diff)
downloadbrotli-30612e3a64689dfc4aa8b79b563f6bca202135a8.zip
brotli-30612e3a64689dfc4aa8b79b563f6bca202135a8.tar.gz
brotli-30612e3a64689dfc4aa8b79b563f6bca202135a8.tar.bz2
Add binding for custom_dictionary.
Diffstat (limited to 'python')
-rwxr-xr-xpython/bro.py15
-rw-r--r--python/brotlimodule.cc58
-rw-r--r--python/tests/custom_dictionary_test.py44
3 files changed, 100 insertions, 17 deletions
diff --git a/python/bro.py b/python/bro.py
index c4cf7e2..c6f74ce 100755
--- a/python/bro.py
+++ b/python/bro.py
@@ -79,6 +79,8 @@ def main(args=None):
help='Base 2 logarithm of the maximum input block size. '
'Range is 16 to 24. If set to 0, the value will be set based '
'on the quality. Defaults to 0.')
+ params.add_argument('--custom-dictionary', metavar="FILE", type=str, dest='dictfile',
+ help='Custom dictionary file.', default = None)
# set default values using global DEFAULT_PARAMS dictionary
parser.set_defaults(**DEFAULT_PARAMS)
@@ -103,13 +105,22 @@ def main(args=None):
else:
outfile = get_binary_stdio('stdout')
+ if options.dictfile:
+ if not os.path.isfile(options.dictfile):
+ parser.error('file "%s" not found' % options.dictfile)
+ with open(options.dictfile, "rb") as dictfile:
+ custom_dictionary = dictfile.read()
+ else:
+ custom_dictionary = ''
+
+
try:
if options.decompress:
- data = brotli.decompress(data)
+ data = brotli.decompress(data, dictionary=custom_dictionary)
else:
data = brotli.compress(
data, mode=options.mode, quality=options.quality,
- lgwin=options.lgwin, lgblock=options.lgblock)
+ lgwin=options.lgwin, lgblock=options.lgblock, dictionary=custom_dictionary)
except brotli.error as e:
parser.exit(1,'bro: error: %s: %s' % (e, options.infile or 'sys.stdin'))
diff --git a/python/brotlimodule.cc b/python/brotlimodule.cc
index 66535b2..99a4f8d 100644
--- a/python/brotlimodule.cc
+++ b/python/brotlimodule.cc
@@ -91,7 +91,7 @@ PyDoc_STRVAR(compress__doc__,
"Compress a byte string.\n"
"\n"
"Signature:\n"
-" compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0)\n"
+" compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0, dictionary='')\n"
"\n"
"Args:\n"
" string (bytes): The input data.\n"
@@ -105,6 +105,8 @@ PyDoc_STRVAR(compress__doc__,
" lgblock (int, optional): Base 2 logarithm of the maximum input block size.\n"
" Range is 16 to 24. If set to 0, the value will be set based on the\n"
" quality. Defaults to 0.\n"
+" dictionary (bytes, optional): Custom dictionary. Should be shorter than\n"
+" sliding window size.\n"
"\n"
"Returns:\n"
" The compressed byte string.\n"
@@ -114,24 +116,28 @@ PyDoc_STRVAR(compress__doc__,
static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywds) {
PyObject *ret = NULL;
- uint8_t *input, *output;
- size_t length, output_length;
+ uint8_t *input, *output, *custom_dictionary;
+ size_t length, output_length, custom_dictionary_length;
BrotliParams::Mode mode = (BrotliParams::Mode) -1;
int quality = -1;
int lgwin = -1;
int lgblock = -1;
int ok;
- static const char *kwlist[] = {"string", "mode", "quality", "lgwin", "lgblock", NULL};
+ static const char *kwlist[] = {
+ "string", "mode", "quality", "lgwin", "lgblock", "dictionary", NULL};
- ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|O&O&O&O&:compress",
+ custom_dictionary = NULL;
+ custom_dictionary_length = 0;
+
+ ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|O&O&O&O&s#:compress",
const_cast<char **>(kwlist),
&input, &length,
&mode_convertor, &mode,
&quality_convertor, &quality,
&lgwin_convertor, &lgwin,
- &lgblock_convertor, &lgblock);
-
+ &lgblock_convertor, &lgblock,
+ &custom_dictionary, &custom_dictionary_length);
if (!ok)
return NULL;
@@ -148,8 +154,17 @@ static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywd
if (lgblock != -1)
params.lgblock = lgblock;
- ok = BrotliCompressBuffer(params, length, input,
- &output_length, output);
+ if (custom_dictionary_length == 0) {
+ ok = BrotliCompressBuffer(params, length, input,
+ &output_length, output);
+ } else {
+ BrotliMemIn in(input, length);
+ BrotliMemOut out(output, output_length);
+ ok = BrotliCompressWithCustomDictionary(custom_dictionary_length,
+ custom_dictionary, params, &in, &out);
+ output_length = out.position();
+ }
+
if (ok) {
ret = PyBytes_FromStringAndSize((char*)output, output_length);
} else {
@@ -169,6 +184,8 @@ PyDoc_STRVAR(decompress__doc__,
"\n"
"Args:\n"
" string (bytes): The compressed input data.\n"
+" dictionary (bytes, optional): Custom dictionary. MUST be the same data\n"
+" as passed to compress method.\n"
"\n"
"Returns:\n"
" The decompressed byte string.\n"
@@ -176,13 +193,21 @@ PyDoc_STRVAR(decompress__doc__,
"Raises:\n"
" brotli.error: If decompressor fails.\n");
-static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
+static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *keywds) {
PyObject *ret = NULL;
- const uint8_t *input;
- size_t length;
+ const uint8_t *input, *custom_dictionary;
+ size_t length, custom_dictionary_length;
int ok;
- ok = PyArg_ParseTuple(args, "s#:decompress", &input, &length);
+ static const char *kwlist[] = {"string", "dictionary", NULL};
+
+ custom_dictionary = NULL;
+ custom_dictionary_length = 0;
+
+ ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|s#:decompress",
+ const_cast<char **>(kwlist),
+ &input, &length,
+ &custom_dictionary, &custom_dictionary_length);
if (!ok)
return NULL;
@@ -191,7 +216,10 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
uint8_t* buffer = new uint8_t[kBufferSize];
BrotliState state;
BrotliStateInit(&state);
-
+ if (custom_dictionary_length != 0) {
+ BrotliSetCustomDictionary(custom_dictionary_length, custom_dictionary, &state);
+ }
+
BrotliResult result = BROTLI_RESULT_NEEDS_MORE_OUTPUT;
while (result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) {
size_t available_out = kBufferSize;
@@ -219,7 +247,7 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
static PyMethodDef brotli_methods[] = {
{"compress", (PyCFunction)brotli_compress, METH_VARARGS | METH_KEYWORDS, compress__doc__},
- {"decompress", brotli_decompress, METH_VARARGS, decompress__doc__},
+ {"decompress", (PyCFunction)brotli_decompress, METH_VARARGS | METH_KEYWORDS, decompress__doc__},
{NULL, NULL, 0, NULL}
};
diff --git a/python/tests/custom_dictionary_test.py b/python/tests/custom_dictionary_test.py
new file mode 100644
index 0000000..b5a65d4
--- /dev/null
+++ b/python/tests/custom_dictionary_test.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import sys
+import os
+from subprocess import check_call, Popen, PIPE
+
+from test_utils import PYTHON, BRO, TEST_ENV, diff_q
+
+
+INPUTS = """\
+testdata/alice29.txt
+testdata/asyoulik.txt
+testdata/lcet10.txt
+testdata/plrabn12.txt
+../enc/encode.cc
+../enc/dictionary.h
+../dec/decode.c
+%s
+""" % BRO
+
+os.chdir(os.path.abspath("../../tests"))
+for filename in INPUTS.splitlines():
+ for quality in (1, 6, 9, 11):
+ filename = os.path.abspath(filename)
+ print('Roundtrip testing file "%s" at quality %d with auto-custom-dictionary' %
+ (os.path.basename(filename), quality))
+ compressed = os.path.splitext(filename)[0] + ".custom_bro"
+ uncompressed = os.path.splitext(filename)[0] + ".custom_unbro"
+ check_call([PYTHON, BRO, "-f", "-q", str(quality), "-i", filename,
+ "-o", compressed, "--lgwin", "24",
+ "--custom-dictionary", filename], env=TEST_ENV)
+ check_call([PYTHON, BRO, "-f", "-d", "-i", compressed, "-o",
+ uncompressed, "--custom-dictionary", filename], env=TEST_ENV)
+ if diff_q(filename, uncompressed) != 0:
+ sys.exit(1)
+ # Test the streaming version
+ with open(filename, "rb") as infile, \
+ open(uncompressed, "wb") as outfile:
+ p = Popen([PYTHON, BRO, "-q", str(quality)], stdin=infile,
+ stdout=PIPE, env=TEST_ENV)
+ check_call([PYTHON, BRO, "-d"], stdin=p.stdout, stdout=outfile,
+ env=TEST_ENV)
+ if diff_q(filename, uncompressed) != 0:
+ sys.exit(1)