aboutsummaryrefslogtreecommitdiff
path: root/zlib/examples/gzjoin.c
diff options
context:
space:
mode:
Diffstat (limited to 'zlib/examples/gzjoin.c')
-rw-r--r--zlib/examples/gzjoin.c448
1 files changed, 448 insertions, 0 deletions
diff --git a/zlib/examples/gzjoin.c b/zlib/examples/gzjoin.c
new file mode 100644
index 0000000..129347c
--- /dev/null
+++ b/zlib/examples/gzjoin.c
@@ -0,0 +1,448 @@
+/* gzjoin -- command to join gzip files into one gzip file
+
+ Copyright (C) 2004 Mark Adler, all rights reserved
+ version 1.0, 11 Dec 2004
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the author be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Mark Adler madler@alumni.caltech.edu
+ */
+
+/*
+ * Change history:
+ *
+ * 1.0 11 Dec 2004 - First version
+ * 1.1 12 Jun 2005 - Changed ssize_t to long for portability
+ */
+
+/*
+ gzjoin takes one or more gzip files on the command line and writes out a
+ single gzip file that will uncompress to the concatenation of the
+ uncompressed data from the individual gzip files. gzjoin does this without
+ having to recompress any of the data and without having to calculate a new
+ crc32 for the concatenated uncompressed data. gzjoin does however have to
+ decompress all of the input data in order to find the bits in the compressed
+ data that need to be modified to concatenate the streams.
+
+ gzjoin does not do an integrity check on the input gzip files other than
+ checking the gzip header and decompressing the compressed data. They are
+ otherwise assumed to be complete and correct.
+
+ Each joint between gzip files removes at least 18 bytes of previous trailer
+ and subsequent header, and inserts an average of about three bytes to the
+ compressed data in order to connect the streams. The output gzip file
+ has a minimal ten-byte gzip header with no file name or modification time.
+
+ This program was written to illustrate the use of the Z_BLOCK option of
+ inflate() and the crc32_combine() function. gzjoin will not compile with
+ versions of zlib earlier than 1.2.3.
+ */
+
+#include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */
+#include <stdlib.h> /* exit(), malloc(), free() */
+#include <fcntl.h> /* open() */
+#include <unistd.h> /* close(), read(), lseek() */
+#include "zlib.h"
+ /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
+
+#define local static
+
+/* exit with an error (return a value to allow use in an expression) */
+local int bail(char *why1, char *why2)
+{
+ fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
+ exit(1);
+ return 0;
+}
+
+/* -- simple buffered file input with access to the buffer -- */
+
+#define CHUNK 32768 /* must be a power of two and fit in unsigned */
+
+/* bin buffered input file type */
+typedef struct {
+ char *name; /* name of file for error messages */
+ int fd; /* file descriptor */
+ unsigned left; /* bytes remaining at next */
+ unsigned char *next; /* next byte to read */
+ unsigned char *buf; /* allocated buffer of length CHUNK */
+} bin;
+
+/* close a buffered file and free allocated memory */
+local void bclose(bin *in)
+{
+ if (in != NULL) {
+ if (in->fd != -1)
+ close(in->fd);
+ if (in->buf != NULL)
+ free(in->buf);
+ free(in);
+ }
+}
+
+/* open a buffered file for input, return a pointer to type bin, or NULL on
+ failure */
+local bin *bopen(char *name)
+{
+ bin *in;
+
+ in = malloc(sizeof(bin));
+ if (in == NULL)
+ return NULL;
+ in->buf = malloc(CHUNK);
+ in->fd = open(name, O_RDONLY, 0);
+ if (in->buf == NULL || in->fd == -1) {
+ bclose(in);
+ return NULL;
+ }
+ in->left = 0;
+ in->next = in->buf;
+ in->name = name;
+ return in;
+}
+
+/* load buffer from file, return -1 on read error, 0 or 1 on success, with
+ 1 indicating that end-of-file was reached */
+local int bload(bin *in)
+{
+ long len;
+
+ if (in == NULL)
+ return -1;
+ if (in->left != 0)
+ return 0;
+ in->next = in->buf;
+ do {
+ len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
+ if (len < 0)
+ return -1;
+ in->left += (unsigned)len;
+ } while (len != 0 && in->left < CHUNK);
+ return len == 0 ? 1 : 0;
+}
+
+/* get a byte from the file, bail if end of file */
+#define bget(in) (in->left ? 0 : bload(in), \
+ in->left ? (in->left--, *(in->next)++) : \
+ bail("unexpected end of file on ", in->name))
+
+/* get a four-byte little-endian unsigned integer from file */
+local unsigned long bget4(bin *in)
+{
+ unsigned long val;
+
+ val = bget(in);
+ val += (unsigned long)(bget(in)) << 8;
+ val += (unsigned long)(bget(in)) << 16;
+ val += (unsigned long)(bget(in)) << 24;
+ return val;
+}
+
+/* skip bytes in file */
+local void bskip(bin *in, unsigned skip)
+{
+ /* check pointer */
+ if (in == NULL)
+ return;
+
+ /* easy case -- skip bytes in buffer */
+ if (skip <= in->left) {
+ in->left -= skip;
+ in->next += skip;
+ return;
+ }
+
+ /* skip what's in buffer, discard buffer contents */
+ skip -= in->left;
+ in->left = 0;
+
+ /* seek past multiples of CHUNK bytes */
+ if (skip > CHUNK) {
+ unsigned left;
+
+ left = skip & (CHUNK - 1);
+ if (left == 0) {
+ /* exact number of chunks: seek all the way minus one byte to check
+ for end-of-file with a read */
+ lseek(in->fd, skip - 1, SEEK_CUR);
+ if (read(in->fd, in->buf, 1) != 1)
+ bail("unexpected end of file on ", in->name);
+ return;
+ }
+
+ /* skip the integral chunks, update skip with remainder */
+ lseek(in->fd, skip - left, SEEK_CUR);
+ skip = left;
+ }
+
+ /* read more input and skip remainder */
+ bload(in);
+ if (skip > in->left)
+ bail("unexpected end of file on ", in->name);
+ in->left -= skip;
+ in->next += skip;
+}
+
+/* -- end of buffered input functions -- */
+
+/* skip the gzip header from file in */
+local void gzhead(bin *in)
+{
+ int flags;
+
+ /* verify gzip magic header and compression method */
+ if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
+ bail(in->name, " is not a valid gzip file");
+
+ /* get and verify flags */
+ flags = bget(in);
+ if ((flags & 0xe0) != 0)
+ bail("unknown reserved bits set in ", in->name);
+
+ /* skip modification time, extra flags, and os */
+ bskip(in, 6);
+
+ /* skip extra field if present */
+ if (flags & 4) {
+ unsigned len;
+
+ len = bget(in);
+ len += (unsigned)(bget(in)) << 8;
+ bskip(in, len);
+ }
+
+ /* skip file name if present */
+ if (flags & 8)
+ while (bget(in) != 0)
+ ;
+
+ /* skip comment if present */
+ if (flags & 16)
+ while (bget(in) != 0)
+ ;
+
+ /* skip header crc if present */
+ if (flags & 2)
+ bskip(in, 2);
+}
+
+/* write a four-byte little-endian unsigned integer to out */
+local void put4(unsigned long val, FILE *out)
+{
+ putc(val & 0xff, out);
+ putc((val >> 8) & 0xff, out);
+ putc((val >> 16) & 0xff, out);
+ putc((val >> 24) & 0xff, out);
+}
+
+/* Load up zlib stream from buffered input, bail if end of file */
+local void zpull(z_streamp strm, bin *in)
+{
+ if (in->left == 0)
+ bload(in);
+ if (in->left == 0)
+ bail("unexpected end of file on ", in->name);
+ strm->avail_in = in->left;
+ strm->next_in = in->next;
+}
+
+/* Write header for gzip file to out and initialize trailer. */
+local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
+{
+ fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
+ *crc = crc32(0L, Z_NULL, 0);
+ *tot = 0;
+}
+
+/* Copy the compressed data from name, zeroing the last block bit of the last
+ block if clr is true, and adding empty blocks as needed to get to a byte
+ boundary. If clr is false, then the last block becomes the last block of
+ the output, and the gzip trailer is written. crc and tot maintains the
+ crc and length (modulo 2^32) of the output for the trailer. The resulting
+ gzip file is written to out. gzinit() must be called before the first call
+ of gzcopy() to write the gzip header and to initialize crc and tot. */
+local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
+ FILE *out)
+{
+ int ret; /* return value from zlib functions */
+ int pos; /* where the "last block" bit is in byte */
+ int last; /* true if processing the last block */
+ bin *in; /* buffered input file */
+ unsigned char *start; /* start of compressed data in buffer */
+ unsigned char *junk; /* buffer for uncompressed data -- discarded */
+ z_off_t len; /* length of uncompressed data (support > 4 GB) */
+ z_stream strm; /* zlib inflate stream */
+
+ /* open gzip file and skip header */
+ in = bopen(name);
+ if (in == NULL)
+ bail("could not open ", name);
+ gzhead(in);
+
+ /* allocate buffer for uncompressed data and initialize raw inflate
+ stream */
+ junk = malloc(CHUNK);
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.avail_in = 0;
+ strm.next_in = Z_NULL;
+ ret = inflateInit2(&strm, -15);
+ if (junk == NULL || ret != Z_OK)
+ bail("out of memory", "");
+
+ /* inflate and copy compressed data, clear last-block bit if requested */
+ len = 0;
+ zpull(&strm, in);
+ start = strm.next_in;
+ last = start[0] & 1;
+ if (last && clr)
+ start[0] &= ~1;
+ strm.avail_out = 0;
+ for (;;) {
+ /* if input used and output done, write used input and get more */
+ if (strm.avail_in == 0 && strm.avail_out != 0) {
+ fwrite(start, 1, strm.next_in - start, out);
+ start = in->buf;
+ in->left = 0;
+ zpull(&strm, in);
+ }
+
+ /* decompress -- return early when end-of-block reached */
+ strm.avail_out = CHUNK;
+ strm.next_out = junk;
+ ret = inflate(&strm, Z_BLOCK);
+ switch (ret) {
+ case Z_MEM_ERROR:
+ bail("out of memory", "");
+ case Z_DATA_ERROR:
+ bail("invalid compressed data in ", in->name);
+ }
+
+ /* update length of uncompressed data */
+ len += CHUNK - strm.avail_out;
+
+ /* check for block boundary (only get this when block copied out) */
+ if (strm.data_type & 128) {
+ /* if that was the last block, then done */
+ if (last)
+ break;
+
+ /* number of unused bits in last byte */
+ pos = strm.data_type & 7;
+
+ /* find the next last-block bit */
+ if (pos != 0) {
+ /* next last-block bit is in last used byte */
+ pos = 0x100 >> pos;
+ last = strm.next_in[-1] & pos;
+ if (last && clr)
+ strm.next_in[-1] &= ~pos;
+ }
+ else {
+ /* next last-block bit is in next unused byte */
+ if (strm.avail_in == 0) {
+ /* don't have that byte yet -- get it */
+ fwrite(start, 1, strm.next_in - start, out);
+ start = in->buf;
+ in->left = 0;
+ zpull(&strm, in);
+ }
+ last = strm.next_in[0] & 1;
+ if (last && clr)
+ strm.next_in[0] &= ~1;
+ }
+ }
+ }
+
+ /* update buffer with unused input */
+ in->left = strm.avail_in;
+ in->next = strm.next_in;
+
+ /* copy used input, write empty blocks to get to byte boundary */
+ pos = strm.data_type & 7;
+ fwrite(start, 1, in->next - start - 1, out);
+ last = in->next[-1];
+ if (pos == 0 || !clr)
+ /* already at byte boundary, or last file: write last byte */
+ putc(last, out);
+ else {
+ /* append empty blocks to last byte */
+ last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */
+ if (pos & 1) {
+ /* odd -- append an empty stored block */
+ putc(last, out);
+ if (pos == 1)
+ putc(0, out); /* two more bits in block header */
+ fwrite("\0\0\xff\xff", 1, 4, out);
+ }
+ else {
+ /* even -- append 1, 2, or 3 empty fixed blocks */
+ switch (pos) {
+ case 6:
+ putc(last | 8, out);
+ last = 0;
+ case 4:
+ putc(last | 0x20, out);
+ last = 0;
+ case 2:
+ putc(last | 0x80, out);
+ putc(0, out);
+ }
+ }
+ }
+
+ /* update crc and tot */
+ *crc = crc32_combine(*crc, bget4(in), len);
+ *tot += (unsigned long)len;
+
+ /* clean up */
+ inflateEnd(&strm);
+ free(junk);
+ bclose(in);
+
+ /* write trailer if this is the last gzip file */
+ if (!clr) {
+ put4(*crc, out);
+ put4(*tot, out);
+ }
+}
+
+/* join the gzip files on the command line, write result to stdout */
+int main(int argc, char **argv)
+{
+ unsigned long crc, tot; /* running crc and total uncompressed length */
+
+ /* skip command name */
+ argc--;
+ argv++;
+
+ /* show usage if no arguments */
+ if (argc == 0) {
+ fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
+ stderr);
+ return 0;
+ }
+
+ /* join gzip files on command line and write to stdout */
+ gzinit(&crc, &tot, stdout);
+ while (argc--)
+ gzcopy(*argv++, argc, &crc, &tot, stdout);
+
+ /* done */
+ return 0;
+}