aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorAlex Nicksay <nicksay@gmail.com>2016-10-24 07:28:56 -0400
committerEugene Kliuchnikov <eustas@google.com>2016-10-24 13:28:56 +0200
commit5632315d3568b4bba3966a69c195adeabaf2fc0b (patch)
tree245b4508711d453be23f6b9b512c6c6af122a859 /python
parent678f8627d3d2d6cdc92050a23041269b33febb0a (diff)
downloadbrotli-5632315d3568b4bba3966a69c195adeabaf2fc0b.zip
brotli-5632315d3568b4bba3966a69c195adeabaf2fc0b.tar.gz
brotli-5632315d3568b4bba3966a69c195adeabaf2fc0b.tar.bz2
Python: Support streamed compression with the Compressor object (#448)
This adds `flush` and `finish` methods to the `Compressor` object in the extension module, renames the `compress` method to `process`, and updates that method to only process data. Now, one or more `process` calls followed by a `finish` call will be equivalent to a module-level `compress` call. Note: To maximize the compression efficiency (and match underlying Brotli behavior, the `Compressor` object `process` method does not guarantee all input is immediately written to output. To ensure immediate output, call `flush` to manually flush the compression buffer. Extraneous flushing can increase the size, but may be required when processing streaming data. Progress on #191
Diffstat (limited to 'python')
-rw-r--r--python/_brotli.cc164
-rw-r--r--python/brotli.py4
2 files changed, 135 insertions, 33 deletions
diff --git a/python/_brotli.cc b/python/_brotli.cc
index 2b84eba..c084e8b 100644
--- a/python/_brotli.cc
+++ b/python/_brotli.cc
@@ -2,7 +2,6 @@
#include <Python.h>
#include <bytesobject.h>
#include <structmember.h>
-#include <cstdio>
#include <vector>
#include "../common/version.h"
#include <brotli/decode.h>
@@ -88,6 +87,38 @@ static int lgblock_convertor(PyObject *o, int *lgblock) {
return 1;
}
+static BROTLI_BOOL compress_stream(BrotliEncoderState* enc, BrotliEncoderOperation op,
+ std::vector<uint8_t>* output, uint8_t* input, size_t input_length) {
+ BROTLI_BOOL ok = BROTLI_TRUE;
+
+ size_t available_in = input_length;
+ const uint8_t* next_in = input;
+ size_t available_out = 0;
+ uint8_t* next_out = NULL;
+
+ while (ok) {
+ ok = BrotliEncoderCompressStream(enc, op,
+ &available_in, &next_in,
+ &available_out, &next_out, NULL);
+ if (!ok)
+ break;
+
+ size_t buffer_length = 0; // Request all available output.
+ const uint8_t* buffer = BrotliEncoderTakeOutput(enc, &buffer_length);
+ if (buffer_length) {
+ (*output).insert((*output).end(), buffer, buffer + buffer_length);
+ }
+
+ if (available_in || BrotliEncoderHasMoreOutput(enc)) {
+ continue;
+ }
+
+ break;
+ }
+
+ return ok;
+}
+
PyDoc_STRVAR(brotli_Compressor_doc,
"An object to compress a byte string.\n"
"\n"
@@ -177,63 +208,132 @@ static int brotli_Compressor_init(brotli_Compressor *self, PyObject *args, PyObj
return 0;
}
-PyDoc_STRVAR(brotli_Compressor_compress_doc,
-"Compress a byte string.\n"
+PyDoc_STRVAR(brotli_Compressor_process_doc,
+"Process \"string\" for compression, returning a string that contains \n"
+"compressed output data. This data should be concatenated to the output \n"
+"produced by any preceding calls to the \"process()\" or flush()\" methods. \n"
+"Some or all of the input may be kept in internal buffers for later \n"
+"processing, and the compressed output data may be empty until enough input \n"
+"has been accumulated.\n"
"\n"
"Signature:\n"
" compress(string)\n"
"\n"
"Args:\n"
-" string (bytes): The input data.\n"
+" string (bytes): The input data\n"
"\n"
"Returns:\n"
-" The compressed byte string.\n"
+" The compressed output data (bytes)\n"
"\n"
"Raises:\n"
-" brotli.error: If compression fails.\n");
+" brotli.error: If compression fails\n");
-static PyObject* brotli_Compressor_compress(brotli_Compressor *self, PyObject *args) {
+static PyObject* brotli_Compressor_process(brotli_Compressor *self, PyObject *args) {
PyObject* ret = NULL;
+ std::vector<uint8_t> output;
uint8_t* input;
- uint8_t* output = NULL;
- uint8_t* next_out;
- const uint8_t *next_in;
size_t input_length;
- size_t output_length;
- size_t available_in;
- size_t available_out;
- int ok;
+ BROTLI_BOOL ok = BROTLI_TRUE;
- ok = PyArg_ParseTuple(args, "s#:compress", &input, &input_length);
+ ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s#:process", &input, &input_length);
if (!ok)
return NULL;
- output_length = input_length + (input_length >> 2) + 10240;
-
if (!self->enc) {
- ok = false;
+ ok = BROTLI_FALSE;
goto end;
}
- output = new uint8_t[output_length];
- available_out = output_length;
- next_out = output;
- available_in = input_length;
- next_in = input;
+ ok = compress_stream(self->enc, BROTLI_OPERATION_PROCESS,
+ &output, input, input_length);
- BrotliEncoderCompressStream(self->enc, BROTLI_OPERATION_FINISH,
- &available_in, &next_in,
- &available_out, &next_out, 0);
- ok = BrotliEncoderIsFinished(self->enc);
+end:
+ if (ok) {
+ ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size());
+ } else {
+ PyErr_SetString(BrotliError, "BrotliEncoderCompressStream failed while processing the stream");
+ }
+
+ return ret;
+}
+
+PyDoc_STRVAR(brotli_Compressor_flush_doc,
+"Process all pending input, returning a string containing the remaining\n"
+"compressed data. This data should be concatenated to the output produced by\n"
+"any preceding calls to the \"process()\" or \"flush()\" methods.\n"
+"\n"
+"Signature:\n"
+" flush()\n"
+"\n"
+"Returns:\n"
+" The compressed output data (bytes)\n"
+"\n"
+"Raises:\n"
+" brotli.error: If compression fails\n");
+
+static PyObject* brotli_Compressor_flush(brotli_Compressor *self) {
+ PyObject *ret = NULL;
+ std::vector<uint8_t> output;
+ BROTLI_BOOL ok = BROTLI_TRUE;
+
+ if (!self->enc) {
+ ok = BROTLI_FALSE;
+ goto end;
+ }
+
+ ok = compress_stream(self->enc, BROTLI_OPERATION_FLUSH,
+ &output, NULL, 0);
end:
if (ok) {
- ret = PyBytes_FromStringAndSize((char*)output, output_length - available_out);
+ ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size());
} else {
- PyErr_SetString(BrotliError, "BrotliCompressBuffer failed");
+ PyErr_SetString(BrotliError, "BrotliEncoderCompressStream failed while flushing the stream");
+ }
+
+ return ret;
+}
+
+PyDoc_STRVAR(brotli_Compressor_finish_doc,
+"Process all pending input and complete all compression, returning a string\n"
+"containing the remaining compressed data. This data should be concatenated\n"
+"to the output produced by any preceding calls to the \"process()\" or\n"
+"\"flush()\" methods.\n"
+"After calling \"finish()\", the \"process()\" and \"flush()\" methods\n"
+"cannot be called again, and a new \"Compressor\" object should be created.\n"
+"\n"
+"Signature:\n"
+" finish(string)\n"
+"\n"
+"Returns:\n"
+" The compressed output data (bytes)\n"
+"\n"
+"Raises:\n"
+" brotli.error: If compression fails\n");
+
+static PyObject* brotli_Compressor_finish(brotli_Compressor *self) {
+ PyObject *ret = NULL;
+ std::vector<uint8_t> output;
+ BROTLI_BOOL ok = BROTLI_TRUE;
+
+ if (!self->enc) {
+ ok = BROTLI_FALSE;
+ goto end;
+ }
+
+ ok = compress_stream(self->enc, BROTLI_OPERATION_FINISH,
+ &output, NULL, 0);
+
+ if (ok) {
+ ok = BrotliEncoderIsFinished(self->enc);
}
- delete[] output;
+end:
+ if (ok) {
+ ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size());
+ } else {
+ PyErr_SetString(BrotliError, "BrotliEncoderCompressStream failed while finishing the stream");
+ }
return ret;
}
@@ -243,7 +343,9 @@ static PyMemberDef brotli_Compressor_members[] = {
};
static PyMethodDef brotli_Compressor_methods[] = {
- {"compress", (PyCFunction)brotli_Compressor_compress, METH_VARARGS, brotli_Compressor_compress_doc},
+ {"process", (PyCFunction)brotli_Compressor_process, METH_VARARGS, brotli_Compressor_process_doc},
+ {"flush", (PyCFunction)brotli_Compressor_flush, METH_NOARGS, brotli_Compressor_flush_doc},
+ {"finish", (PyCFunction)brotli_Compressor_finish, METH_NOARGS, brotli_Compressor_finish_doc},
{NULL} /* Sentinel */
};
diff --git a/python/brotli.py b/python/brotli.py
index f3f56b8..a88616e 100644
--- a/python/brotli.py
+++ b/python/brotli.py
@@ -34,7 +34,7 @@ def compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0,
Range is 16 to 24. If set to 0, the value will be set based on the
quality. Defaults to 0.
dictionary (bytes, optional): Custom dictionary. Only last sliding window
- size bytes will be used.
+ size bytes will be used.
Returns:
The compressed byte string.
@@ -44,7 +44,7 @@ def compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0,
"""
compressor = _brotli.Compressor(mode=mode, quality=quality, lgwin=lgwin,
lgblock=lgblock, dictionary=dictionary)
- return compressor.compress(string)
+ return compressor.process(string) + compressor.finish()
# Decompress a compressed byte string.
decompress = _brotli.decompress