aboutsummaryrefslogtreecommitdiff
path: root/zlib/examples
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2015-03-13 10:40:33 -0700
committerH.J. Lu <hjl.tools@gmail.com>2015-03-26 08:25:29 -0700
commit5ca28f792883afb409ae145666fc3662c3a3aed5 (patch)
treec7e7f508b52bdf5c6a1162cd30241275bbb89fd2 /zlib/examples
parentb19a8f8545100a08ee2a64c05631aff6f651faa1 (diff)
downloadgdb-5ca28f792883afb409ae145666fc3662c3a3aed5.zip
gdb-5ca28f792883afb409ae145666fc3662c3a3aed5.tar.gz
gdb-5ca28f792883afb409ae145666fc3662c3a3aed5.tar.bz2
Import zlib from GCC
Diffstat (limited to 'zlib/examples')
-rw-r--r--zlib/examples/README.examples49
-rw-r--r--zlib/examples/enough.c569
-rw-r--r--zlib/examples/fitblk.c233
-rw-r--r--zlib/examples/gun.c701
-rw-r--r--zlib/examples/gzappend.c500
-rw-r--r--zlib/examples/gzjoin.c448
-rw-r--r--zlib/examples/gzlog.c1058
-rw-r--r--zlib/examples/gzlog.h89
-rw-r--r--zlib/examples/zlib_how.html545
-rw-r--r--zlib/examples/zpipe.c205
-rw-r--r--zlib/examples/zran.c404
11 files changed, 4801 insertions, 0 deletions
diff --git a/zlib/examples/README.examples b/zlib/examples/README.examples
new file mode 100644
index 0000000..56a3171
--- /dev/null
+++ b/zlib/examples/README.examples
@@ -0,0 +1,49 @@
+This directory contains examples of the use of zlib and other relevant
+programs and documentation.
+
+enough.c
+ calculation and justification of ENOUGH parameter in inftrees.h
+ - calculates the maximum table space used in inflate tree
+ construction over all possible Huffman codes
+
+fitblk.c
+ compress just enough input to nearly fill a requested output size
+ - zlib isn't designed to do this, but fitblk does it anyway
+
+gun.c
+ uncompress a gzip file
+ - illustrates the use of inflateBack() for high speed file-to-file
+ decompression using call-back functions
+ - is approximately twice as fast as gzip -d
+ - also provides Unix uncompress functionality, again twice as fast
+
+gzappend.c
+ append to a gzip file
+ - illustrates the use of the Z_BLOCK flush parameter for inflate()
+ - illustrates the use of deflatePrime() to start at any bit
+
+gzjoin.c
+ join gzip files without recalculating the crc or recompressing
+ - illustrates the use of the Z_BLOCK flush parameter for inflate()
+ - illustrates the use of crc32_combine()
+
+gzlog.c
+gzlog.h
+ efficiently and robustly maintain a message log file in gzip format
+ - illustrates use of raw deflate, Z_PARTIAL_FLUSH, deflatePrime(),
+ and deflateSetDictionary()
+ - illustrates use of a gzip header extra field
+
+zlib_how.html
+ painfully comprehensive description of zpipe.c (see below)
+ - describes in excruciating detail the use of deflate() and inflate()
+
+zpipe.c
+ reads and writes zlib streams from stdin to stdout
+ - illustrates the proper use of deflate() and inflate()
+ - deeply commented in zlib_how.html (see above)
+
+zran.c
+ index a zlib or gzip stream and randomly access it
+ - illustrates the use of Z_BLOCK, inflatePrime(), and
+ inflateSetDictionary() to provide random access
diff --git a/zlib/examples/enough.c b/zlib/examples/enough.c
new file mode 100644
index 0000000..c40410b
--- /dev/null
+++ b/zlib/examples/enough.c
@@ -0,0 +1,569 @@
+/* enough.c -- determine the maximum size of inflate's Huffman code tables over
+ * all possible valid and complete Huffman codes, subject to a length limit.
+ * Copyright (C) 2007, 2008 Mark Adler
+ * Version 1.3 17 February 2008 Mark Adler
+ */
+
+/* Version history:
+ 1.0 3 Jan 2007 First version (derived from codecount.c version 1.4)
+ 1.1 4 Jan 2007 Use faster incremental table usage computation
+ Prune examine() search on previously visited states
+ 1.2 5 Jan 2007 Comments clean up
+ As inflate does, decrease root for short codes
+ Refuse cases where inflate would increase root
+ 1.3 17 Feb 2008 Add argument for initial root table size
+ Fix bug for initial root table size == max - 1
+ Use a macro to compute the history index
+ */
+
+/*
+ Examine all possible Huffman codes for a given number of symbols and a
+ maximum code length in bits to determine the maximum table size for zilb's
+ inflate. Only complete Huffman codes are counted.
+
+ Two codes are considered distinct if the vectors of the number of codes per
+ length are not identical. So permutations of the symbol assignments result
+ in the same code for the counting, as do permutations of the assignments of
+ the bit values to the codes (i.e. only canonical codes are counted).
+
+ We build a code from shorter to longer lengths, determining how many symbols
+ are coded at each length. At each step, we have how many symbols remain to
+ be coded, what the last code length used was, and how many bit patterns of
+ that length remain unused. Then we add one to the code length and double the
+ number of unused patterns to graduate to the next code length. We then
+ assign all portions of the remaining symbols to that code length that
+ preserve the properties of a correct and eventually complete code. Those
+ properties are: we cannot use more bit patterns than are available; and when
+ all the symbols are used, there are exactly zero possible bit patterns
+ remaining.
+
+ The inflate Huffman decoding algorithm uses two-level lookup tables for
+ speed. There is a single first-level table to decode codes up to root bits
+ in length (root == 9 in the current inflate implementation). The table
+ has 1 << root entries and is indexed by the next root bits of input. Codes
+ shorter than root bits have replicated table entries, so that the correct
+ entry is pointed to regardless of the bits that follow the short code. If
+ the code is longer than root bits, then the table entry points to a second-
+ level table. The size of that table is determined by the longest code with
+ that root-bit prefix. If that longest code has length len, then the table
+ has size 1 << (len - root), to index the remaining bits in that set of
+ codes. Each subsequent root-bit prefix then has its own sub-table. The
+ total number of table entries required by the code is calculated
+ incrementally as the number of codes at each bit length is populated. When
+ all of the codes are shorter than root bits, then root is reduced to the
+ longest code length, resulting in a single, smaller, one-level table.
+
+ The inflate algorithm also provides for small values of root (relative to
+ the log2 of the number of symbols), where the shortest code has more bits
+ than root. In that case, root is increased to the length of the shortest
+ code. This program, by design, does not handle that case, so it is verified
+ that the number of symbols is less than 2^(root + 1).
+
+ In order to speed up the examination (by about ten orders of magnitude for
+ the default arguments), the intermediate states in the build-up of a code
+ are remembered and previously visited branches are pruned. The memory
+ required for this will increase rapidly with the total number of symbols and
+ the maximum code length in bits. However this is a very small price to pay
+ for the vast speedup.
+
+ First, all of the possible Huffman codes are counted, and reachable
+ intermediate states are noted by a non-zero count in a saved-results array.
+ Second, the intermediate states that lead to (root + 1) bit or longer codes
+ are used to look at all sub-codes from those junctures for their inflate
+ memory usage. (The amount of memory used is not affected by the number of
+ codes of root bits or less in length.) Third, the visited states in the
+ construction of those sub-codes and the associated calculation of the table
+ size is recalled in order to avoid recalculating from the same juncture.
+ Beginning the code examination at (root + 1) bit codes, which is enabled by
+ identifying the reachable nodes, accounts for about six of the orders of
+ magnitude of improvement for the default arguments. About another four
+ orders of magnitude come from not revisiting previous states. Out of
+ approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes
+ need to be examined to cover all of the possible table memory usage cases
+ for the default arguments of 286 symbols limited to 15-bit codes.
+
+ Note that an unsigned long long type is used for counting. It is quite easy
+ to exceed the capacity of an eight-byte integer with a large number of
+ symbols and a large maximum code length, so multiple-precision arithmetic
+ would need to replace the unsigned long long arithmetic in that case. This
+ program will abort if an overflow occurs. The big_t type identifies where
+ the counting takes place.
+
+ An unsigned long long type is also used for calculating the number of
+ possible codes remaining at the maximum length. This limits the maximum
+ code length to the number of bits in a long long minus the number of bits
+ needed to represent the symbols in a flat code. The code_t type identifies
+ where the bit pattern counting takes place.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#define local static
+
+/* special data types */
+typedef unsigned long long big_t; /* type for code counting */
+typedef unsigned long long code_t; /* type for bit pattern counting */
+struct tab { /* type for been here check */
+ size_t len; /* length of bit vector in char's */
+ char *vec; /* allocated bit vector */
+};
+
+/* The array for saving results, num[], is indexed with this triplet:
+
+ syms: number of symbols remaining to code
+ left: number of available bit patterns at length len
+ len: number of bits in the codes currently being assigned
+
+ Those indices are constrained thusly when saving results:
+
+ syms: 3..totsym (totsym == total symbols to code)
+ left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6)
+ len: 1..max - 1 (max == maximum code length in bits)
+
+ syms == 2 is not saved since that immediately leads to a single code. left
+ must be even, since it represents the number of available bit patterns at
+ the current length, which is double the number at the previous length.
+ left ends at syms-1 since left == syms immediately results in a single code.
+ (left > sym is not allowed since that would result in an incomplete code.)
+ len is less than max, since the code completes immediately when len == max.
+
+ The offset into the array is calculated for the three indices with the
+ first one (syms) being outermost, and the last one (len) being innermost.
+ We build the array with length max-1 lists for the len index, with syms-3
+ of those for each symbol. There are totsym-2 of those, with each one
+ varying in length as a function of sym. See the calculation of index in
+ count() for the index, and the calculation of size in main() for the size
+ of the array.
+
+ For the deflate example of 286 symbols limited to 15-bit codes, the array
+ has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than
+ half of the space allocated for saved results is actually used -- not all
+ possible triplets are reached in the generation of valid Huffman codes.
+ */
+
+/* The array for tracking visited states, done[], is itself indexed identically
+ to the num[] array as described above for the (syms, left, len) triplet.
+ Each element in the array is further indexed by the (mem, rem) doublet,
+ where mem is the amount of inflate table space used so far, and rem is the
+ remaining unused entries in the current inflate sub-table. Each indexed
+ element is simply one bit indicating whether the state has been visited or
+ not. Since the ranges for mem and rem are not known a priori, each bit
+ vector is of a variable size, and grows as needed to accommodate the visited
+ states. mem and rem are used to calculate a single index in a triangular
+ array. Since the range of mem is expected in the default case to be about
+ ten times larger than the range of rem, the array is skewed to reduce the
+ memory usage, with eight times the range for mem than for rem. See the
+ calculations for offset and bit in beenhere() for the details.
+
+ For the deflate example of 286 symbols limited to 15-bit codes, the bit
+ vectors grow to total approximately 21 MB, in addition to the 4.3 MB done[]
+ array itself.
+ */
+
+/* Globals to avoid propagating constants or constant pointers recursively */
+local int max; /* maximum allowed bit length for the codes */
+local int root; /* size of base code table in bits */
+local int large; /* largest code table so far */
+local size_t size; /* number of elements in num and done */
+local int *code; /* number of symbols assigned to each bit length */
+local big_t *num; /* saved results array for code counting */
+local struct tab *done; /* states already evaluated array */
+
+/* Index function for num[] and done[] */
+#define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(max-1)+k-1)
+
+/* Free allocated space. Uses globals code, num, and done. */
+local void cleanup(void)
+{
+ size_t n;
+
+ if (done != NULL) {
+ for (n = 0; n < size; n++)
+ if (done[n].len)
+ free(done[n].vec);
+ free(done);
+ }
+ if (num != NULL)
+ free(num);
+ if (code != NULL)
+ free(code);
+}
+
+/* Return the number of possible Huffman codes using bit patterns of lengths
+ len through max inclusive, coding syms symbols, with left bit patterns of
+ length len unused -- return -1 if there is an overflow in the counting.
+ Keep a record of previous results in num to prevent repeating the same
+ calculation. Uses the globals max and num. */
+local big_t count(int syms, int len, int left)
+{
+ big_t sum; /* number of possible codes from this juncture */
+ big_t got; /* value returned from count() */
+ int least; /* least number of syms to use at this juncture */
+ int most; /* most number of syms to use at this juncture */
+ int use; /* number of bit patterns to use in next call */
+ size_t index; /* index of this case in *num */
+
+ /* see if only one possible code */
+ if (syms == left)
+ return 1;
+
+ /* note and verify the expected state */
+ assert(syms > left && left > 0 && len < max);
+
+ /* see if we've done this one already */
+ index = INDEX(syms, left, len);
+ got = num[index];
+ if (got)
+ return got; /* we have -- return the saved result */
+
+ /* we need to use at least this many bit patterns so that the code won't be
+ incomplete at the next length (more bit patterns than symbols) */
+ least = (left << 1) - syms;
+ if (least < 0)
+ least = 0;
+
+ /* we can use at most this many bit patterns, lest there not be enough
+ available for the remaining symbols at the maximum length (if there were
+ no limit to the code length, this would become: most = left - 1) */
+ most = (((code_t)left << (max - len)) - syms) /
+ (((code_t)1 << (max - len)) - 1);
+
+ /* count all possible codes from this juncture and add them up */
+ sum = 0;
+ for (use = least; use <= most; use++) {
+ got = count(syms - use, len + 1, (left - use) << 1);
+ sum += got;
+ if (got == -1 || sum < got) /* overflow */
+ return -1;
+ }
+
+ /* verify that all recursive calls are productive */
+ assert(sum != 0);
+
+ /* save the result and return it */
+ num[index] = sum;
+ return sum;
+}
+
+/* Return true if we've been here before, set to true if not. Set a bit in a
+ bit vector to indicate visiting this state. Each (syms,len,left) state
+ has a variable size bit vector indexed by (mem,rem). The bit vector is
+ lengthened if needed to allow setting the (mem,rem) bit. */
+local int beenhere(int syms, int len, int left, int mem, int rem)
+{
+ size_t index; /* index for this state's bit vector */
+ size_t offset; /* offset in this state's bit vector */
+ int bit; /* mask for this state's bit */
+ size_t length; /* length of the bit vector in bytes */
+ char *vector; /* new or enlarged bit vector */
+
+ /* point to vector for (syms,left,len), bit in vector for (mem,rem) */
+ index = INDEX(syms, left, len);
+ mem -= 1 << root;
+ offset = (mem >> 3) + rem;
+ offset = ((offset * (offset + 1)) >> 1) + rem;
+ bit = 1 << (mem & 7);
+
+ /* see if we've been here */
+ length = done[index].len;
+ if (offset < length && (done[index].vec[offset] & bit) != 0)
+ return 1; /* done this! */
+
+ /* we haven't been here before -- set the bit to show we have now */
+
+ /* see if we need to lengthen the vector in order to set the bit */
+ if (length <= offset) {
+ /* if we have one already, enlarge it, zero out the appended space */
+ if (length) {
+ do {
+ length <<= 1;
+ } while (length <= offset);
+ vector = realloc(done[index].vec, length);
+ if (vector != NULL)
+ memset(vector + done[index].len, 0, length - done[index].len);
+ }
+
+ /* otherwise we need to make a new vector and zero it out */
+ else {
+ length = 1 << (len - root);
+ while (length <= offset)
+ length <<= 1;
+ vector = calloc(length, sizeof(char));
+ }
+
+ /* in either case, bail if we can't get the memory */
+ if (vector == NULL) {
+ fputs("abort: unable to allocate enough memory\n", stderr);
+ cleanup();
+ exit(1);
+ }
+
+ /* install the new vector */
+ done[index].len = length;
+ done[index].vec = vector;
+ }
+
+ /* set the bit */
+ done[index].vec[offset] |= bit;
+ return 0;
+}
+
+/* Examine all possible codes from the given node (syms, len, left). Compute
+ the amount of memory required to build inflate's decoding tables, where the
+ number of code structures used so far is mem, and the number remaining in
+ the current sub-table is rem. Uses the globals max, code, root, large, and
+ done. */
+local void examine(int syms, int len, int left, int mem, int rem)
+{
+ int least; /* least number of syms to use at this juncture */
+ int most; /* most number of syms to use at this juncture */
+ int use; /* number of bit patterns to use in next call */
+
+ /* see if we have a complete code */
+ if (syms == left) {
+ /* set the last code entry */
+ code[len] = left;
+
+ /* complete computation of memory used by this code */
+ while (rem < left) {
+ left -= rem;
+ rem = 1 << (len - root);
+ mem += rem;
+ }
+ assert(rem == left);
+
+ /* if this is a new maximum, show the entries used and the sub-code */
+ if (mem > large) {
+ large = mem;
+ printf("max %d: ", mem);
+ for (use = root + 1; use <= max; use++)
+ if (code[use])
+ printf("%d[%d] ", code[use], use);
+ putchar('\n');
+ fflush(stdout);
+ }
+
+ /* remove entries as we drop back down in the recursion */
+ code[len] = 0;
+ return;
+ }
+
+ /* prune the tree if we can */
+ if (beenhere(syms, len, left, mem, rem))
+ return;
+
+ /* we need to use at least this many bit patterns so that the code won't be
+ incomplete at the next length (more bit patterns than symbols) */
+ least = (left << 1) - syms;
+ if (least < 0)
+ least = 0;
+
+ /* we can use at most this many bit patterns, lest there not be enough
+ available for the remaining symbols at the maximum length (if there were
+ no limit to the code length, this would become: most = left - 1) */
+ most = (((code_t)left << (max - len)) - syms) /
+ (((code_t)1 << (max - len)) - 1);
+
+ /* occupy least table spaces, creating new sub-tables as needed */
+ use = least;
+ while (rem < use) {
+ use -= rem;
+ rem = 1 << (len - root);
+ mem += rem;
+ }
+ rem -= use;
+
+ /* examine codes from here, updating table space as we go */
+ for (use = least; use <= most; use++) {
+ code[len] = use;
+ examine(syms - use, len + 1, (left - use) << 1,
+ mem + (rem ? 1 << (len - root) : 0), rem << 1);
+ if (rem == 0) {
+ rem = 1 << (len - root);
+ mem += rem;
+ }
+ rem--;
+ }
+
+ /* remove entries as we drop back down in the recursion */
+ code[len] = 0;
+}
+
+/* Look at all sub-codes starting with root + 1 bits. Look at only the valid
+ intermediate code states (syms, left, len). For each completed code,
+ calculate the amount of memory required by inflate to build the decoding
+ tables. Find the maximum amount of memory required and show the code that
+ requires that maximum. Uses the globals max, root, and num. */
+local void enough(int syms)
+{
+ int n; /* number of remaing symbols for this node */
+ int left; /* number of unused bit patterns at this length */
+ size_t index; /* index of this case in *num */
+
+ /* clear code */
+ for (n = 0; n <= max; n++)
+ code[n] = 0;
+
+ /* look at all (root + 1) bit and longer codes */
+ large = 1 << root; /* base table */
+ if (root < max) /* otherwise, there's only a base table */
+ for (n = 3; n <= syms; n++)
+ for (left = 2; left < n; left += 2)
+ {
+ /* look at all reachable (root + 1) bit nodes, and the
+ resulting codes (complete at root + 2 or more) */
+ index = INDEX(n, left, root + 1);
+ if (root + 1 < max && num[index]) /* reachable node */
+ examine(n, root + 1, left, 1 << root, 0);
+
+ /* also look at root bit codes with completions at root + 1
+ bits (not saved in num, since complete), just in case */
+ if (num[index - 1] && n <= left << 1)
+ examine((n - left) << 1, root + 1, (n - left) << 1,
+ 1 << root, 0);
+ }
+
+ /* done */
+ printf("done: maximum of %d table entries\n", large);
+}
+
+/*
+ Examine and show the total number of possible Huffman codes for a given
+ maximum number of symbols, initial root table size, and maximum code length
+ in bits -- those are the command arguments in that order. The default
+ values are 286, 9, and 15 respectively, for the deflate literal/length code.
+ The possible codes are counted for each number of coded symbols from two to
+ the maximum. The counts for each of those and the total number of codes are
+ shown. The maximum number of inflate table entires is then calculated
+ across all possible codes. Each new maximum number of table entries and the
+ associated sub-code (starting at root + 1 == 10 bits) is shown.
+
+ To count and examine Huffman codes that are not length-limited, provide a
+ maximum length equal to the number of symbols minus one.
+
+ For the deflate literal/length code, use "enough". For the deflate distance
+ code, use "enough 30 6".
+
+ This uses the %llu printf format to print big_t numbers, which assumes that
+ big_t is an unsigned long long. If the big_t type is changed (for example
+ to a multiple precision type), the method of printing will also need to be
+ updated.
+ */
+int main(int argc, char **argv)
+{
+ int syms; /* total number of symbols to code */
+ int n; /* number of symbols to code for this run */
+ big_t got; /* return value of count() */
+ big_t sum; /* accumulated number of codes over n */
+
+ /* set up globals for cleanup() */
+ code = NULL;
+ num = NULL;
+ done = NULL;
+
+ /* get arguments -- default to the deflate literal/length code */
+ syms = 286;
+ root = 9;
+ max = 15;
+ if (argc > 1) {
+ syms = atoi(argv[1]);
+ if (argc > 2) {
+ root = atoi(argv[2]);
+ if (argc > 3)
+ max = atoi(argv[3]);
+ }
+ }
+ if (argc > 4 || syms < 2 || root < 1 || max < 1) {
+ fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n",
+ stderr);
+ return 1;
+ }
+
+ /* if not restricting the code length, the longest is syms - 1 */
+ if (max > syms - 1)
+ max = syms - 1;
+
+ /* determine the number of bits in a code_t */
+ n = 0;
+ while (((code_t)1 << n) != 0)
+ n++;
+
+ /* make sure that the calculation of most will not overflow */
+ if (max > n || syms - 2 >= (((code_t)0 - 1) >> (max - 1))) {
+ fputs("abort: code length too long for internal types\n", stderr);
+ return 1;
+ }
+
+ /* reject impossible code requests */
+ if (syms - 1 > ((code_t)1 << max) - 1) {
+ fprintf(stderr, "%d symbols cannot be coded in %d bits\n",
+ syms, max);
+ return 1;
+ }
+
+ /* allocate code vector */
+ code = calloc(max + 1, sizeof(int));
+ if (code == NULL) {
+ fputs("abort: unable to allocate enough memory\n", stderr);
+ return 1;
+ }
+
+ /* determine size of saved results array, checking for overflows,
+ allocate and clear the array (set all to zero with calloc()) */
+ if (syms == 2) /* iff max == 1 */
+ num = NULL; /* won't be saving any results */
+ else {
+ size = syms >> 1;
+ if (size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) ||
+ (size *= n, size > ((size_t)0 - 1) / (n = max - 1)) ||
+ (size *= n, size > ((size_t)0 - 1) / sizeof(big_t)) ||
+ (num = calloc(size, sizeof(big_t))) == NULL) {
+ fputs("abort: unable to allocate enough memory\n", stderr);
+ cleanup();
+ return 1;
+ }
+ }
+
+ /* count possible codes for all numbers of symbols, add up counts */
+ sum = 0;
+ for (n = 2; n <= syms; n++) {
+ got = count(n, 1, 2);
+ sum += got;
+ if (got == -1 || sum < got) { /* overflow */
+ fputs("abort: can't count that high!\n", stderr);
+ cleanup();
+ return 1;
+ }
+ printf("%llu %d-codes\n", got, n);
+ }
+ printf("%llu total codes for 2 to %d symbols", sum, syms);
+ if (max < syms - 1)
+ printf(" (%d-bit length limit)\n", max);
+ else
+ puts(" (no length limit)");
+
+ /* allocate and clear done array for beenhere() */
+ if (syms == 2)
+ done = NULL;
+ else if (size > ((size_t)0 - 1) / sizeof(struct tab) ||
+ (done = calloc(size, sizeof(struct tab))) == NULL) {
+ fputs("abort: unable to allocate enough memory\n", stderr);
+ cleanup();
+ return 1;
+ }
+
+ /* find and show maximum inflate table usage */
+ if (root > max) /* reduce root to max length */
+ root = max;
+ if (syms < ((code_t)1 << (root + 1)))
+ enough(syms);
+ else
+ puts("cannot handle minimum code lengths > root");
+
+ /* done */
+ cleanup();
+ return 0;
+}
diff --git a/zlib/examples/fitblk.c b/zlib/examples/fitblk.c
new file mode 100644
index 0000000..c61de5c
--- /dev/null
+++ b/zlib/examples/fitblk.c
@@ -0,0 +1,233 @@
+/* fitblk.c: example of fitting compressed output to a specified size
+ Not copyrighted -- provided to the public domain
+ Version 1.1 25 November 2004 Mark Adler */
+
+/* Version history:
+ 1.0 24 Nov 2004 First version
+ 1.1 25 Nov 2004 Change deflateInit2() to deflateInit()
+ Use fixed-size, stack-allocated raw buffers
+ Simplify code moving compression to subroutines
+ Use assert() for internal errors
+ Add detailed description of approach
+ */
+
+/* Approach to just fitting a requested compressed size:
+
+ fitblk performs three compression passes on a portion of the input
+ data in order to determine how much of that input will compress to
+ nearly the requested output block size. The first pass generates
+ enough deflate blocks to produce output to fill the requested
+ output size plus a specfied excess amount (see the EXCESS define
+ below). The last deflate block may go quite a bit past that, but
+ is discarded. The second pass decompresses and recompresses just
+ the compressed data that fit in the requested plus excess sized
+ buffer. The deflate process is terminated after that amount of
+ input, which is less than the amount consumed on the first pass.
+ The last deflate block of the result will be of a comparable size
+ to the final product, so that the header for that deflate block and
+ the compression ratio for that block will be about the same as in
+ the final product. The third compression pass decompresses the
+ result of the second step, but only the compressed data up to the
+ requested size minus an amount to allow the compressed stream to
+ complete (see the MARGIN define below). That will result in a
+ final compressed stream whose length is less than or equal to the
+ requested size. Assuming sufficient input and a requested size
+ greater than a few hundred bytes, the shortfall will typically be
+ less than ten bytes.
+
+ If the input is short enough that the first compression completes
+ before filling the requested output size, then that compressed
+ stream is return with no recompression.
+
+ EXCESS is chosen to be just greater than the shortfall seen in a
+ two pass approach similar to the above. That shortfall is due to
+ the last deflate block compressing more efficiently with a smaller
+ header on the second pass. EXCESS is set to be large enough so
+ that there is enough uncompressed data for the second pass to fill
+ out the requested size, and small enough so that the final deflate
+ block of the second pass will be close in size to the final deflate
+ block of the third and final pass. MARGIN is chosen to be just
+ large enough to assure that the final compression has enough room
+ to complete in all cases.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "zlib.h"
+
+#define local static
+
+/* print nastygram and leave */
+local void quit(char *why)
+{
+ fprintf(stderr, "fitblk abort: %s\n", why);
+ exit(1);
+}
+
+#define RAWLEN 4096 /* intermediate uncompressed buffer size */
+
+/* compress from file to def until provided buffer is full or end of
+ input reached; return last deflate() return value, or Z_ERRNO if
+ there was read error on the file */
+local int partcompress(FILE *in, z_streamp def)
+{
+ int ret, flush;
+ unsigned char raw[RAWLEN];
+
+ flush = Z_NO_FLUSH;
+ do {
+ def->avail_in = fread(raw, 1, RAWLEN, in);
+ if (ferror(in))
+ return Z_ERRNO;
+ def->next_in = raw;
+ if (feof(in))
+ flush = Z_FINISH;
+ ret = deflate(def, flush);
+ assert(ret != Z_STREAM_ERROR);
+ } while (def->avail_out != 0 && flush == Z_NO_FLUSH);
+ return ret;
+}
+
+/* recompress from inf's input to def's output; the input for inf and
+ the output for def are set in those structures before calling;
+ return last deflate() return value, or Z_MEM_ERROR if inflate()
+ was not able to allocate enough memory when it needed to */
+local int recompress(z_streamp inf, z_streamp def)
+{
+ int ret, flush;
+ unsigned char raw[RAWLEN];
+
+ flush = Z_NO_FLUSH;
+ do {
+ /* decompress */
+ inf->avail_out = RAWLEN;
+ inf->next_out = raw;
+ ret = inflate(inf, Z_NO_FLUSH);
+ assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
+ ret != Z_NEED_DICT);
+ if (ret == Z_MEM_ERROR)
+ return ret;
+
+ /* compress what was decompresed until done or no room */
+ def->avail_in = RAWLEN - inf->avail_out;
+ def->next_in = raw;
+ if (inf->avail_out != 0)
+ flush = Z_FINISH;
+ ret = deflate(def, flush);
+ assert(ret != Z_STREAM_ERROR);
+ } while (ret != Z_STREAM_END && def->avail_out != 0);
+ return ret;
+}
+
+#define EXCESS 256 /* empirically determined stream overage */
+#define MARGIN 8 /* amount to back off for completion */
+
+/* compress from stdin to fixed-size block on stdout */
+int main(int argc, char **argv)
+{
+ int ret; /* return code */
+ unsigned size; /* requested fixed output block size */
+ unsigned have; /* bytes written by deflate() call */
+ unsigned char *blk; /* intermediate and final stream */
+ unsigned char *tmp; /* close to desired size stream */
+ z_stream def, inf; /* zlib deflate and inflate states */
+
+ /* get requested output size */
+ if (argc != 2)
+ quit("need one argument: size of output block");
+ ret = strtol(argv[1], argv + 1, 10);
+ if (argv[1][0] != 0)
+ quit("argument must be a number");
+ if (ret < 8) /* 8 is minimum zlib stream size */
+ quit("need positive size of 8 or greater");
+ size = (unsigned)ret;
+
+ /* allocate memory for buffers and compression engine */
+ blk = malloc(size + EXCESS);
+ def.zalloc = Z_NULL;
+ def.zfree = Z_NULL;
+ def.opaque = Z_NULL;
+ ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
+ if (ret != Z_OK || blk == NULL)
+ quit("out of memory");
+
+ /* compress from stdin until output full, or no more input */
+ def.avail_out = size + EXCESS;
+ def.next_out = blk;
+ ret = partcompress(stdin, &def);
+ if (ret == Z_ERRNO)
+ quit("error reading input");
+
+ /* if it all fit, then size was undersubscribed -- done! */
+ if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
+ /* write block to stdout */
+ have = size + EXCESS - def.avail_out;
+ if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
+ quit("error writing output");
+
+ /* clean up and print results to stderr */
+ ret = deflateEnd(&def);
+ assert(ret != Z_STREAM_ERROR);
+ free(blk);
+ fprintf(stderr,
+ "%u bytes unused out of %u requested (all input)\n",
+ size - have, size);
+ return 0;
+ }
+
+ /* it didn't all fit -- set up for recompression */
+ inf.zalloc = Z_NULL;
+ inf.zfree = Z_NULL;
+ inf.opaque = Z_NULL;
+ inf.avail_in = 0;
+ inf.next_in = Z_NULL;
+ ret = inflateInit(&inf);
+ tmp = malloc(size + EXCESS);
+ if (ret != Z_OK || tmp == NULL)
+ quit("out of memory");
+ ret = deflateReset(&def);
+ assert(ret != Z_STREAM_ERROR);
+
+ /* do first recompression close to the right amount */
+ inf.avail_in = size + EXCESS;
+ inf.next_in = blk;
+ def.avail_out = size + EXCESS;
+ def.next_out = tmp;
+ ret = recompress(&inf, &def);
+ if (ret == Z_MEM_ERROR)
+ quit("out of memory");
+
+ /* set up for next reocmpression */
+ ret = inflateReset(&inf);
+ assert(ret != Z_STREAM_ERROR);
+ ret = deflateReset(&def);
+ assert(ret != Z_STREAM_ERROR);
+
+ /* do second and final recompression (third compression) */
+ inf.avail_in = size - MARGIN; /* assure stream will complete */
+ inf.next_in = tmp;
+ def.avail_out = size;
+ def.next_out = blk;
+ ret = recompress(&inf, &def);
+ if (ret == Z_MEM_ERROR)
+ quit("out of memory");
+ assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */
+
+ /* done -- write block to stdout */
+ have = size - def.avail_out;
+ if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
+ quit("error writing output");
+
+ /* clean up and print results to stderr */
+ free(tmp);
+ ret = inflateEnd(&inf);
+ assert(ret != Z_STREAM_ERROR);
+ ret = deflateEnd(&def);
+ assert(ret != Z_STREAM_ERROR);
+ free(blk);
+ fprintf(stderr,
+ "%u bytes unused out of %u requested (%lu input)\n",
+ size - have, size, def.total_in);
+ return 0;
+}
diff --git a/zlib/examples/gun.c b/zlib/examples/gun.c
new file mode 100644
index 0000000..72b0882
--- /dev/null
+++ b/zlib/examples/gun.c
@@ -0,0 +1,701 @@
+/* gun.c -- simple gunzip to give an example of the use of inflateBack()
+ * Copyright (C) 2003, 2005, 2008, 2010 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ Version 1.6 17 January 2010 Mark Adler */
+
+/* Version history:
+ 1.0 16 Feb 2003 First version for testing of inflateBack()
+ 1.1 21 Feb 2005 Decompress concatenated gzip streams
+ Remove use of "this" variable (C++ keyword)
+ Fix return value for in()
+ Improve allocation failure checking
+ Add typecasting for void * structures
+ Add -h option for command version and usage
+ Add a bunch of comments
+ 1.2 20 Mar 2005 Add Unix compress (LZW) decompression
+ Copy file attributes from input file to output file
+ 1.3 12 Jun 2005 Add casts for error messages [Oberhumer]
+ 1.4 8 Dec 2006 LZW decompression speed improvements
+ 1.5 9 Feb 2008 Avoid warning in latest version of gcc
+ 1.6 17 Jan 2010 Avoid signed/unsigned comparison warnings
+ */
+
+/*
+ gun [ -t ] [ name ... ]
+
+ decompresses the data in the named gzip files. If no arguments are given,
+ gun will decompress from stdin to stdout. The names must end in .gz, -gz,
+ .z, -z, _z, or .Z. The uncompressed data will be written to a file name
+ with the suffix stripped. On success, the original file is deleted. On
+ failure, the output file is deleted. For most failures, the command will
+ continue to process the remaining names on the command line. A memory
+ allocation failure will abort the command. If -t is specified, then the
+ listed files or stdin will be tested as gzip files for integrity (without
+ checking for a proper suffix), no output will be written, and no files
+ will be deleted.
+
+ Like gzip, gun allows concatenated gzip streams and will decompress them,
+ writing all of the uncompressed data to the output. Unlike gzip, gun allows
+ an empty file on input, and will produce no error writing an empty output
+ file.
+
+ gun will also decompress files made by Unix compress, which uses LZW
+ compression. These files are automatically detected by virtue of their
+ magic header bytes. Since the end of Unix compress stream is marked by the
+ end-of-file, they cannot be concantenated. If a Unix compress stream is
+ encountered in an input file, it is the last stream in that file.
+
+ Like gunzip and uncompress, the file attributes of the orignal compressed
+ file are maintained in the final uncompressed file, to the extent that the
+ user permissions allow it.
+
+ On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
+ 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the
+ LZW decompression provided by gun is about twice as fast as the standard
+ Unix uncompress command.
+ */
+
+/* external functions and related types and constants */
+#include <stdio.h> /* fprintf() */
+#include <stdlib.h> /* malloc(), free() */
+#include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */
+#include <errno.h> /* errno */
+#include <fcntl.h> /* open() */
+#include <unistd.h> /* read(), write(), close(), chown(), unlink() */
+#include <sys/types.h>
+#include <sys/stat.h> /* stat(), chmod() */
+#include <utime.h> /* utime() */
+#include "zlib.h" /* inflateBackInit(), inflateBack(), */
+ /* inflateBackEnd(), crc32() */
+
+/* function declaration */
+#define local static
+
+/* buffer constants */
+#define SIZE 32768U /* input and output buffer sizes */
+#define PIECE 16384 /* limits i/o chunks for 16-bit int case */
+
+/* structure for infback() to pass to input function in() -- it maintains the
+ input file and a buffer of size SIZE */
+struct ind {
+ int infile;
+ unsigned char *inbuf;
+};
+
+/* Load input buffer, assumed to be empty, and return bytes loaded and a
+ pointer to them. read() is called until the buffer is full, or until it
+ returns end-of-file or error. Return 0 on error. */
+local unsigned in(void *in_desc, unsigned char **buf)
+{
+ int ret;
+ unsigned len;
+ unsigned char *next;
+ struct ind *me = (struct ind *)in_desc;
+
+ next = me->inbuf;
+ *buf = next;
+ len = 0;
+ do {
+ ret = PIECE;
+ if ((unsigned)ret > SIZE - len)
+ ret = (int)(SIZE - len);
+ ret = (int)read(me->infile, next, ret);
+ if (ret == -1) {
+ len = 0;
+ break;
+ }
+ next += ret;
+ len += ret;
+ } while (ret != 0 && len < SIZE);
+ return len;
+}
+
+/* structure for infback() to pass to output function out() -- it maintains the
+ output file, a running CRC-32 check on the output and the total number of
+ bytes output, both for checking against the gzip trailer. (The length in
+ the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
+ the output is greater than 4 GB.) */
+struct outd {
+ int outfile;
+ int check; /* true if checking crc and total */
+ unsigned long crc;
+ unsigned long total;
+};
+
+/* Write output buffer and update the CRC-32 and total bytes written. write()
+ is called until all of the output is written or an error is encountered.
+ On success out() returns 0. For a write failure, out() returns 1. If the
+ output file descriptor is -1, then nothing is written.
+ */
+local int out(void *out_desc, unsigned char *buf, unsigned len)
+{
+ int ret;
+ struct outd *me = (struct outd *)out_desc;
+
+ if (me->check) {
+ me->crc = crc32(me->crc, buf, len);
+ me->total += len;
+ }
+ if (me->outfile != -1)
+ do {
+ ret = PIECE;
+ if ((unsigned)ret > len)
+ ret = (int)len;
+ ret = (int)write(me->outfile, buf, ret);
+ if (ret == -1)
+ return 1;
+ buf += ret;
+ len -= ret;
+ } while (len != 0);
+ return 0;
+}
+
+/* next input byte macro for use inside lunpipe() and gunpipe() */
+#define NEXT() (have ? 0 : (have = in(indp, &next)), \
+ last = have ? (have--, (int)(*next++)) : -1)
+
+/* memory for gunpipe() and lunpipe() --
+ the first 256 entries of prefix[] and suffix[] are never used, could
+ have offset the index, but it's faster to waste the memory */
+unsigned char inbuf[SIZE]; /* input buffer */
+unsigned char outbuf[SIZE]; /* output buffer */
+unsigned short prefix[65536]; /* index to LZW prefix string */
+unsigned char suffix[65536]; /* one-character LZW suffix */
+unsigned char match[65280 + 2]; /* buffer for reversed match or gzip
+ 32K sliding window */
+
+/* throw out what's left in the current bits byte buffer (this is a vestigial
+ aspect of the compressed data format derived from an implementation that
+ made use of a special VAX machine instruction!) */
+#define FLUSHCODE() \
+ do { \
+ left = 0; \
+ rem = 0; \
+ if (chunk > have) { \
+ chunk -= have; \
+ have = 0; \
+ if (NEXT() == -1) \
+ break; \
+ chunk--; \
+ if (chunk > have) { \
+ chunk = have = 0; \
+ break; \
+ } \
+ } \
+ have -= chunk; \
+ next += chunk; \
+ chunk = 0; \
+ } while (0)
+
+/* Decompress a compress (LZW) file from indp to outfile. The compress magic
+ header (two bytes) has already been read and verified. There are have bytes
+ of buffered input at next. strm is used for passing error information back
+ to gunpipe().
+
+ lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
+ file, read error, or write error (a write error indicated by strm->next_in
+ not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
+ */
+local int lunpipe(unsigned have, unsigned char *next, struct ind *indp,
+ int outfile, z_stream *strm)
+{
+ int last; /* last byte read by NEXT(), or -1 if EOF */
+ unsigned chunk; /* bytes left in current chunk */
+ int left; /* bits left in rem */
+ unsigned rem; /* unused bits from input */
+ int bits; /* current bits per code */
+ unsigned code; /* code, table traversal index */
+ unsigned mask; /* mask for current bits codes */
+ int max; /* maximum bits per code for this stream */
+ unsigned flags; /* compress flags, then block compress flag */
+ unsigned end; /* last valid entry in prefix/suffix tables */
+ unsigned temp; /* current code */
+ unsigned prev; /* previous code */
+ unsigned final; /* last character written for previous code */
+ unsigned stack; /* next position for reversed string */
+ unsigned outcnt; /* bytes in output buffer */
+ struct outd outd; /* output structure */
+ unsigned char *p;
+
+ /* set up output */
+ outd.outfile = outfile;
+ outd.check = 0;
+
+ /* process remainder of compress header -- a flags byte */
+ flags = NEXT();
+ if (last == -1)
+ return Z_BUF_ERROR;
+ if (flags & 0x60) {
+ strm->msg = (char *)"unknown lzw flags set";
+ return Z_DATA_ERROR;
+ }
+ max = flags & 0x1f;
+ if (max < 9 || max > 16) {
+ strm->msg = (char *)"lzw bits out of range";
+ return Z_DATA_ERROR;
+ }
+ if (max == 9) /* 9 doesn't really mean 9 */
+ max = 10;
+ flags &= 0x80; /* true if block compress */
+
+ /* clear table */
+ bits = 9;
+ mask = 0x1ff;
+ end = flags ? 256 : 255;
+
+ /* set up: get first 9-bit code, which is the first decompressed byte, but
+ don't create a table entry until the next code */
+ if (NEXT() == -1) /* no compressed data is ok */
+ return Z_OK;
+ final = prev = (unsigned)last; /* low 8 bits of code */
+ if (NEXT() == -1) /* missing a bit */
+ return Z_BUF_ERROR;
+ if (last & 1) { /* code must be < 256 */
+ strm->msg = (char *)"invalid lzw code";
+ return Z_DATA_ERROR;
+ }
+ rem = (unsigned)last >> 1; /* remaining 7 bits */
+ left = 7;
+ chunk = bits - 2; /* 7 bytes left in this chunk */
+ outbuf[0] = (unsigned char)final; /* write first decompressed byte */
+ outcnt = 1;
+
+ /* decode codes */
+ stack = 0;
+ for (;;) {
+ /* if the table will be full after this, increment the code size */
+ if (end >= mask && bits < max) {
+ FLUSHCODE();
+ bits++;
+ mask <<= 1;
+ mask++;
+ }
+
+ /* get a code of length bits */
+ if (chunk == 0) /* decrement chunk modulo bits */
+ chunk = bits;
+ code = rem; /* low bits of code */
+ if (NEXT() == -1) { /* EOF is end of compressed data */
+ /* write remaining buffered output */
+ if (outcnt && out(&outd, outbuf, outcnt)) {
+ strm->next_in = outbuf; /* signal write error */
+ return Z_BUF_ERROR;
+ }
+ return Z_OK;
+ }
+ code += (unsigned)last << left; /* middle (or high) bits of code */
+ left += 8;
+ chunk--;
+ if (bits > left) { /* need more bits */
+ if (NEXT() == -1) /* can't end in middle of code */
+ return Z_BUF_ERROR;
+ code += (unsigned)last << left; /* high bits of code */
+ left += 8;
+ chunk--;
+ }
+ code &= mask; /* mask to current code length */
+ left -= bits; /* number of unused bits */
+ rem = (unsigned)last >> (8 - left); /* unused bits from last byte */
+
+ /* process clear code (256) */
+ if (code == 256 && flags) {
+ FLUSHCODE();
+ bits = 9; /* initialize bits and mask */
+ mask = 0x1ff;
+ end = 255; /* empty table */
+ continue; /* get next code */
+ }
+
+ /* special code to reuse last match */
+ temp = code; /* save the current code */
+ if (code > end) {
+ /* Be picky on the allowed code here, and make sure that the code
+ we drop through (prev) will be a valid index so that random
+ input does not cause an exception. The code != end + 1 check is
+ empirically derived, and not checked in the original uncompress
+ code. If this ever causes a problem, that check could be safely
+ removed. Leaving this check in greatly improves gun's ability
+ to detect random or corrupted input after a compress header.
+ In any case, the prev > end check must be retained. */
+ if (code != end + 1 || prev > end) {
+ strm->msg = (char *)"invalid lzw code";
+ return Z_DATA_ERROR;
+ }
+ match[stack++] = (unsigned char)final;
+ code = prev;
+ }
+
+ /* walk through linked list to generate output in reverse order */
+ p = match + stack;
+ while (code >= 256) {
+ *p++ = suffix[code];
+ code = prefix[code];
+ }
+ stack = p - match;
+ match[stack++] = (unsigned char)code;
+ final = code;
+
+ /* link new table entry */
+ if (end < mask) {
+ end++;
+ prefix[end] = (unsigned short)prev;
+ suffix[end] = (unsigned char)final;
+ }
+
+ /* set previous code for next iteration */
+ prev = temp;
+
+ /* write output in forward order */
+ while (stack > SIZE - outcnt) {
+ while (outcnt < SIZE)
+ outbuf[outcnt++] = match[--stack];
+ if (out(&outd, outbuf, outcnt)) {
+ strm->next_in = outbuf; /* signal write error */
+ return Z_BUF_ERROR;
+ }
+ outcnt = 0;
+ }
+ p = match + stack;
+ do {
+ outbuf[outcnt++] = *--p;
+ } while (p > match);
+ stack = 0;
+
+ /* loop for next code with final and prev as the last match, rem and
+ left provide the first 0..7 bits of the next code, end is the last
+ valid table entry */
+ }
+}
+
+/* Decompress a gzip file from infile to outfile. strm is assumed to have been
+ successfully initialized with inflateBackInit(). The input file may consist
+ of a series of gzip streams, in which case all of them will be decompressed
+ to the output file. If outfile is -1, then the gzip stream(s) integrity is
+ checked and nothing is written.
+
+ The return value is a zlib error code: Z_MEM_ERROR if out of memory,
+ Z_DATA_ERROR if the header or the compressed data is invalid, or if the
+ trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
+ prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
+ stream) follows a valid gzip stream.
+ */
+local int gunpipe(z_stream *strm, int infile, int outfile)
+{
+ int ret, first, last;
+ unsigned have, flags, len;
+ unsigned char *next = NULL;
+ struct ind ind, *indp;
+ struct outd outd;
+
+ /* setup input buffer */
+ ind.infile = infile;
+ ind.inbuf = inbuf;
+ indp = &ind;
+
+ /* decompress concatenated gzip streams */
+ have = 0; /* no input data read in yet */
+ first = 1; /* looking for first gzip header */
+ strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
+ for (;;) {
+ /* look for the two magic header bytes for a gzip stream */
+ if (NEXT() == -1) {
+ ret = Z_OK;
+ break; /* empty gzip stream is ok */
+ }
+ if (last != 31 || (NEXT() != 139 && last != 157)) {
+ strm->msg = (char *)"incorrect header check";
+ ret = first ? Z_DATA_ERROR : Z_ERRNO;
+ break; /* not a gzip or compress header */
+ }
+ first = 0; /* next non-header is junk */
+
+ /* process a compress (LZW) file -- can't be concatenated after this */
+ if (last == 157) {
+ ret = lunpipe(have, next, indp, outfile, strm);
+ break;
+ }
+
+ /* process remainder of gzip header */
+ ret = Z_BUF_ERROR;
+ if (NEXT() != 8) { /* only deflate method allowed */
+ if (last == -1) break;
+ strm->msg = (char *)"unknown compression method";
+ ret = Z_DATA_ERROR;
+ break;
+ }
+ flags = NEXT(); /* header flags */
+ NEXT(); /* discard mod time, xflgs, os */
+ NEXT();
+ NEXT();
+ NEXT();
+ NEXT();
+ NEXT();
+ if (last == -1) break;
+ if (flags & 0xe0) {
+ strm->msg = (char *)"unknown header flags set";
+ ret = Z_DATA_ERROR;
+ break;
+ }
+ if (flags & 4) { /* extra field */
+ len = NEXT();
+ len += (unsigned)(NEXT()) << 8;
+ if (last == -1) break;
+ while (len > have) {
+ len -= have;
+ have = 0;
+ if (NEXT() == -1) break;
+ len--;
+ }
+ if (last == -1) break;
+ have -= len;
+ next += len;
+ }
+ if (flags & 8) /* file name */
+ while (NEXT() != 0 && last != -1)
+ ;
+ if (flags & 16) /* comment */
+ while (NEXT() != 0 && last != -1)
+ ;
+ if (flags & 2) { /* header crc */
+ NEXT();
+ NEXT();
+ }
+ if (last == -1) break;
+
+ /* set up output */
+ outd.outfile = outfile;
+ outd.check = 1;
+ outd.crc = crc32(0L, Z_NULL, 0);
+ outd.total = 0;
+
+ /* decompress data to output */
+ strm->next_in = next;
+ strm->avail_in = have;
+ ret = inflateBack(strm, in, indp, out, &outd);
+ if (ret != Z_STREAM_END) break;
+ next = strm->next_in;
+ have = strm->avail_in;
+ strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
+
+ /* check trailer */
+ ret = Z_BUF_ERROR;
+ if (NEXT() != (int)(outd.crc & 0xff) ||
+ NEXT() != (int)((outd.crc >> 8) & 0xff) ||
+ NEXT() != (int)((outd.crc >> 16) & 0xff) ||
+ NEXT() != (int)((outd.crc >> 24) & 0xff)) {
+ /* crc error */
+ if (last != -1) {
+ strm->msg = (char *)"incorrect data check";
+ ret = Z_DATA_ERROR;
+ }
+ break;
+ }
+ if (NEXT() != (int)(outd.total & 0xff) ||
+ NEXT() != (int)((outd.total >> 8) & 0xff) ||
+ NEXT() != (int)((outd.total >> 16) & 0xff) ||
+ NEXT() != (int)((outd.total >> 24) & 0xff)) {
+ /* length error */
+ if (last != -1) {
+ strm->msg = (char *)"incorrect length check";
+ ret = Z_DATA_ERROR;
+ }
+ break;
+ }
+
+ /* go back and look for another gzip stream */
+ }
+
+ /* clean up and return */
+ return ret;
+}
+
+/* Copy file attributes, from -> to, as best we can. This is best effort, so
+ no errors are reported. The mode bits, including suid, sgid, and the sticky
+ bit are copied (if allowed), the owner's user id and group id are copied
+ (again if allowed), and the access and modify times are copied. */
+local void copymeta(char *from, char *to)
+{
+ struct stat was;
+ struct utimbuf when;
+
+ /* get all of from's Unix meta data, return if not a regular file */
+ if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG)
+ return;
+
+ /* set to's mode bits, ignore errors */
+ (void)chmod(to, was.st_mode & 07777);
+
+ /* copy owner's user and group, ignore errors */
+ (void)chown(to, was.st_uid, was.st_gid);
+
+ /* copy access and modify times, ignore errors */
+ when.actime = was.st_atime;
+ when.modtime = was.st_mtime;
+ (void)utime(to, &when);
+}
+
+/* Decompress the file inname to the file outnname, of if test is true, just
+ decompress without writing and check the gzip trailer for integrity. If
+ inname is NULL or an empty string, read from stdin. If outname is NULL or
+ an empty string, write to stdout. strm is a pre-initialized inflateBack
+ structure. When appropriate, copy the file attributes from inname to
+ outname.
+
+ gunzip() returns 1 if there is an out-of-memory error or an unexpected
+ return code from gunpipe(). Otherwise it returns 0.
+ */
+local int gunzip(z_stream *strm, char *inname, char *outname, int test)
+{
+ int ret;
+ int infile, outfile;
+
+ /* open files */
+ if (inname == NULL || *inname == 0) {
+ inname = "-";
+ infile = 0; /* stdin */
+ }
+ else {
+ infile = open(inname, O_RDONLY, 0);
+ if (infile == -1) {
+ fprintf(stderr, "gun cannot open %s\n", inname);
+ return 0;
+ }
+ }
+ if (test)
+ outfile = -1;
+ else if (outname == NULL || *outname == 0) {
+ outname = "-";
+ outfile = 1; /* stdout */
+ }
+ else {
+ outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666);
+ if (outfile == -1) {
+ close(infile);
+ fprintf(stderr, "gun cannot create %s\n", outname);
+ return 0;
+ }
+ }
+ errno = 0;
+
+ /* decompress */
+ ret = gunpipe(strm, infile, outfile);
+ if (outfile > 2) close(outfile);
+ if (infile > 2) close(infile);
+
+ /* interpret result */
+ switch (ret) {
+ case Z_OK:
+ case Z_ERRNO:
+ if (infile > 2 && outfile > 2) {
+ copymeta(inname, outname); /* copy attributes */
+ unlink(inname);
+ }
+ if (ret == Z_ERRNO)
+ fprintf(stderr, "gun warning: trailing garbage ignored in %s\n",
+ inname);
+ break;
+ case Z_DATA_ERROR:
+ if (outfile > 2) unlink(outname);
+ fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg);
+ break;
+ case Z_MEM_ERROR:
+ if (outfile > 2) unlink(outname);
+ fprintf(stderr, "gun out of memory error--aborting\n");
+ return 1;
+ case Z_BUF_ERROR:
+ if (outfile > 2) unlink(outname);
+ if (strm->next_in != Z_NULL) {
+ fprintf(stderr, "gun write error on %s: %s\n",
+ outname, strerror(errno));
+ }
+ else if (errno) {
+ fprintf(stderr, "gun read error on %s: %s\n",
+ inname, strerror(errno));
+ }
+ else {
+ fprintf(stderr, "gun unexpected end of file on %s\n",
+ inname);
+ }
+ break;
+ default:
+ if (outfile > 2) unlink(outname);
+ fprintf(stderr, "gun internal error--aborting\n");
+ return 1;
+ }
+ return 0;
+}
+
+/* Process the gun command line arguments. See the command syntax near the
+ beginning of this source file. */
+int main(int argc, char **argv)
+{
+ int ret, len, test;
+ char *outname;
+ unsigned char *window;
+ z_stream strm;
+
+ /* initialize inflateBack state for repeated use */
+ window = match; /* reuse LZW match buffer */
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ ret = inflateBackInit(&strm, 15, window);
+ if (ret != Z_OK) {
+ fprintf(stderr, "gun out of memory error--aborting\n");
+ return 1;
+ }
+
+ /* decompress each file to the same name with the suffix removed */
+ argc--;
+ argv++;
+ test = 0;
+ if (argc && strcmp(*argv, "-h") == 0) {
+ fprintf(stderr, "gun 1.6 (17 Jan 2010)\n");
+ fprintf(stderr, "Copyright (C) 2003-2010 Mark Adler\n");
+ fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
+ return 0;
+ }
+ if (argc && strcmp(*argv, "-t") == 0) {
+ test = 1;
+ argc--;
+ argv++;
+ }
+ if (argc)
+ do {
+ if (test)
+ outname = NULL;
+ else {
+ len = (int)strlen(*argv);
+ if (strcmp(*argv + len - 3, ".gz") == 0 ||
+ strcmp(*argv + len - 3, "-gz") == 0)
+ len -= 3;
+ else if (strcmp(*argv + len - 2, ".z") == 0 ||
+ strcmp(*argv + len - 2, "-z") == 0 ||
+ strcmp(*argv + len - 2, "_z") == 0 ||
+ strcmp(*argv + len - 2, ".Z") == 0)
+ len -= 2;
+ else {
+ fprintf(stderr, "gun error: no gz type on %s--skipping\n",
+ *argv);
+ continue;
+ }
+ outname = malloc(len + 1);
+ if (outname == NULL) {
+ fprintf(stderr, "gun out of memory error--aborting\n");
+ ret = 1;
+ break;
+ }
+ memcpy(outname, *argv, len);
+ outname[len] = 0;
+ }
+ ret = gunzip(&strm, *argv, outname, test);
+ if (outname != NULL) free(outname);
+ if (ret) break;
+ } while (argv++, --argc);
+ else
+ ret = gunzip(&strm, NULL, NULL, test);
+
+ /* clean up */
+ inflateBackEnd(&strm);
+ return ret;
+}
diff --git a/zlib/examples/gzappend.c b/zlib/examples/gzappend.c
new file mode 100644
index 0000000..e9e878e
--- /dev/null
+++ b/zlib/examples/gzappend.c
@@ -0,0 +1,500 @@
+/* gzappend -- command to append to a gzip file
+
+ Copyright (C) 2003 Mark Adler, all rights reserved
+ version 1.1, 4 Nov 2003
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the author be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Mark Adler madler@alumni.caltech.edu
+ */
+
+/*
+ * Change history:
+ *
+ * 1.0 19 Oct 2003 - First version
+ * 1.1 4 Nov 2003 - Expand and clarify some comments and notes
+ * - Add version and copyright to help
+ * - Send help to stdout instead of stderr
+ * - Add some preemptive typecasts
+ * - Add L to constants in lseek() calls
+ * - Remove some debugging information in error messages
+ * - Use new data_type definition for zlib 1.2.1
+ * - Simplfy and unify file operations
+ * - Finish off gzip file in gztack()
+ * - Use deflatePrime() instead of adding empty blocks
+ * - Keep gzip file clean on appended file read errors
+ * - Use in-place rotate instead of auxiliary buffer
+ * (Why you ask? Because it was fun to write!)
+ */
+
+/*
+ gzappend takes a gzip file and appends to it, compressing files from the
+ command line or data from stdin. The gzip file is written to directly, to
+ avoid copying that file, in case it's large. Note that this results in the
+ unfriendly behavior that if gzappend fails, the gzip file is corrupted.
+
+ This program was written to illustrate the use of the new Z_BLOCK option of
+ zlib 1.2.x's inflate() function. This option returns from inflate() at each
+ block boundary to facilitate locating and modifying the last block bit at
+ the start of the final deflate block. Also whether using Z_BLOCK or not,
+ another required feature of zlib 1.2.x is that inflate() now provides the
+ number of unusued bits in the last input byte used. gzappend will not work
+ with versions of zlib earlier than 1.2.1.
+
+ gzappend first decompresses the gzip file internally, discarding all but
+ the last 32K of uncompressed data, and noting the location of the last block
+ bit and the number of unused bits in the last byte of the compressed data.
+ The gzip trailer containing the CRC-32 and length of the uncompressed data
+ is verified. This trailer will be later overwritten.
+
+ Then the last block bit is cleared by seeking back in the file and rewriting
+ the byte that contains it. Seeking forward, the last byte of the compressed
+ data is saved along with the number of unused bits to initialize deflate.
+
+ A deflate process is initialized, using the last 32K of the uncompressed
+ data from the gzip file to initialize the dictionary. If the total
+ uncompressed data was less than 32K, then all of it is used to initialize
+ the dictionary. The deflate output bit buffer is also initialized with the
+ last bits from the original deflate stream. From here on, the data to
+ append is simply compressed using deflate, and written to the gzip file.
+ When that is complete, the new CRC-32 and uncompressed length are written
+ as the trailer of the gzip file.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "zlib.h"
+
+#define local static
+#define LGCHUNK 14
+#define CHUNK (1U << LGCHUNK)
+#define DSIZE 32768U
+
+/* print an error message and terminate with extreme prejudice */
+local void bye(char *msg1, char *msg2)
+{
+ fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2);
+ exit(1);
+}
+
+/* return the greatest common divisor of a and b using Euclid's algorithm,
+ modified to be fast when one argument much greater than the other, and
+ coded to avoid unnecessary swapping */
+local unsigned gcd(unsigned a, unsigned b)
+{
+ unsigned c;
+
+ while (a && b)
+ if (a > b) {
+ c = b;
+ while (a - c >= c)
+ c <<= 1;
+ a -= c;
+ }
+ else {
+ c = a;
+ while (b - c >= c)
+ c <<= 1;
+ b -= c;
+ }
+ return a + b;
+}
+
+/* rotate list[0..len-1] left by rot positions, in place */
+local void rotate(unsigned char *list, unsigned len, unsigned rot)
+{
+ unsigned char tmp;
+ unsigned cycles;
+ unsigned char *start, *last, *to, *from;
+
+ /* normalize rot and handle degenerate cases */
+ if (len < 2) return;
+ if (rot >= len) rot %= len;
+ if (rot == 0) return;
+
+ /* pointer to last entry in list */
+ last = list + (len - 1);
+
+ /* do simple left shift by one */
+ if (rot == 1) {
+ tmp = *list;
+ memcpy(list, list + 1, len - 1);
+ *last = tmp;
+ return;
+ }
+
+ /* do simple right shift by one */
+ if (rot == len - 1) {
+ tmp = *last;
+ memmove(list + 1, list, len - 1);
+ *list = tmp;
+ return;
+ }
+
+ /* otherwise do rotate as a set of cycles in place */
+ cycles = gcd(len, rot); /* number of cycles */
+ do {
+ start = from = list + cycles; /* start index is arbitrary */
+ tmp = *from; /* save entry to be overwritten */
+ for (;;) {
+ to = from; /* next step in cycle */
+ from += rot; /* go right rot positions */
+ if (from > last) from -= len; /* (pointer better not wrap) */
+ if (from == start) break; /* all but one shifted */
+ *to = *from; /* shift left */
+ }
+ *to = tmp; /* complete the circle */
+ } while (--cycles);
+}
+
+/* structure for gzip file read operations */
+typedef struct {
+ int fd; /* file descriptor */
+ int size; /* 1 << size is bytes in buf */
+ unsigned left; /* bytes available at next */
+ unsigned char *buf; /* buffer */
+ unsigned char *next; /* next byte in buffer */
+ char *name; /* file name for error messages */
+} file;
+
+/* reload buffer */
+local int readin(file *in)
+{
+ int len;
+
+ len = read(in->fd, in->buf, 1 << in->size);
+ if (len == -1) bye("error reading ", in->name);
+ in->left = (unsigned)len;
+ in->next = in->buf;
+ return len;
+}
+
+/* read from file in, exit if end-of-file */
+local int readmore(file *in)
+{
+ if (readin(in) == 0) bye("unexpected end of ", in->name);
+ return 0;
+}
+
+#define read1(in) (in->left == 0 ? readmore(in) : 0, \
+ in->left--, *(in->next)++)
+
+/* skip over n bytes of in */
+local void skip(file *in, unsigned n)
+{
+ unsigned bypass;
+
+ if (n > in->left) {
+ n -= in->left;
+ bypass = n & ~((1U << in->size) - 1);
+ if (bypass) {
+ if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1)
+ bye("seeking ", in->name);
+ n -= bypass;
+ }
+ readmore(in);
+ if (n > in->left)
+ bye("unexpected end of ", in->name);
+ }
+ in->left -= n;
+ in->next += n;
+}
+
+/* read a four-byte unsigned integer, little-endian, from in */
+unsigned long read4(file *in)
+{
+ unsigned long val;
+
+ val = read1(in);
+ val += (unsigned)read1(in) << 8;
+ val += (unsigned long)read1(in) << 16;
+ val += (unsigned long)read1(in) << 24;
+ return val;
+}
+
+/* skip over gzip header */
+local void gzheader(file *in)
+{
+ int flags;
+ unsigned n;
+
+ if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file");
+ if (read1(in) != 8) bye("unknown compression method in", in->name);
+ flags = read1(in);
+ if (flags & 0xe0) bye("unknown header flags set in", in->name);
+ skip(in, 6);
+ if (flags & 4) {
+ n = read1(in);
+ n += (unsigned)(read1(in)) << 8;
+ skip(in, n);
+ }
+ if (flags & 8) while (read1(in) != 0) ;
+ if (flags & 16) while (read1(in) != 0) ;
+ if (flags & 2) skip(in, 2);
+}
+
+/* decompress gzip file "name", return strm with a deflate stream ready to
+ continue compression of the data in the gzip file, and return a file
+ descriptor pointing to where to write the compressed data -- the deflate
+ stream is initialized to compress using level "level" */
+local int gzscan(char *name, z_stream *strm, int level)
+{
+ int ret, lastbit, left, full;
+ unsigned have;
+ unsigned long crc, tot;
+ unsigned char *window;
+ off_t lastoff, end;
+ file gz;
+
+ /* open gzip file */
+ gz.name = name;
+ gz.fd = open(name, O_RDWR, 0);
+ if (gz.fd == -1) bye("cannot open ", name);
+ gz.buf = malloc(CHUNK);
+ if (gz.buf == NULL) bye("out of memory", "");
+ gz.size = LGCHUNK;
+ gz.left = 0;
+
+ /* skip gzip header */
+ gzheader(&gz);
+
+ /* prepare to decompress */
+ window = malloc(DSIZE);
+ if (window == NULL) bye("out of memory", "");
+ strm->zalloc = Z_NULL;
+ strm->zfree = Z_NULL;
+ strm->opaque = Z_NULL;
+ ret = inflateInit2(strm, -15);
+ if (ret != Z_OK) bye("out of memory", " or library mismatch");
+
+ /* decompress the deflate stream, saving append information */
+ lastbit = 0;
+ lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
+ left = 0;
+ strm->avail_in = gz.left;
+ strm->next_in = gz.next;
+ crc = crc32(0L, Z_NULL, 0);
+ have = full = 0;
+ do {
+ /* if needed, get more input */
+ if (strm->avail_in == 0) {
+ readmore(&gz);
+ strm->avail_in = gz.left;
+ strm->next_in = gz.next;
+ }
+
+ /* set up output to next available section of sliding window */
+ strm->avail_out = DSIZE - have;
+ strm->next_out = window + have;
+
+ /* inflate and check for errors */
+ ret = inflate(strm, Z_BLOCK);
+ if (ret == Z_STREAM_ERROR) bye("internal stream error!", "");
+ if (ret == Z_MEM_ERROR) bye("out of memory", "");
+ if (ret == Z_DATA_ERROR)
+ bye("invalid compressed data--format violated in", name);
+
+ /* update crc and sliding window pointer */
+ crc = crc32(crc, window + have, DSIZE - have - strm->avail_out);
+ if (strm->avail_out)
+ have = DSIZE - strm->avail_out;
+ else {
+ have = 0;
+ full = 1;
+ }
+
+ /* process end of block */
+ if (strm->data_type & 128) {
+ if (strm->data_type & 64)
+ left = strm->data_type & 0x1f;
+ else {
+ lastbit = strm->data_type & 0x1f;
+ lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in;
+ }
+ }
+ } while (ret != Z_STREAM_END);
+ inflateEnd(strm);
+ gz.left = strm->avail_in;
+ gz.next = strm->next_in;
+
+ /* save the location of the end of the compressed data */
+ end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
+
+ /* check gzip trailer and save total for deflate */
+ if (crc != read4(&gz))
+ bye("invalid compressed data--crc mismatch in ", name);
+ tot = strm->total_out;
+ if ((tot & 0xffffffffUL) != read4(&gz))
+ bye("invalid compressed data--length mismatch in", name);
+
+ /* if not at end of file, warn */
+ if (gz.left || readin(&gz))
+ fprintf(stderr,
+ "gzappend warning: junk at end of gzip file overwritten\n");
+
+ /* clear last block bit */
+ lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET);
+ if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
+ *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7)));
+ lseek(gz.fd, -1L, SEEK_CUR);
+ if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name);
+
+ /* if window wrapped, build dictionary from window by rotating */
+ if (full) {
+ rotate(window, DSIZE, have);
+ have = DSIZE;
+ }
+
+ /* set up deflate stream with window, crc, total_in, and leftover bits */
+ ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
+ if (ret != Z_OK) bye("out of memory", "");
+ deflateSetDictionary(strm, window, have);
+ strm->adler = crc;
+ strm->total_in = tot;
+ if (left) {
+ lseek(gz.fd, --end, SEEK_SET);
+ if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
+ deflatePrime(strm, 8 - left, *gz.buf);
+ }
+ lseek(gz.fd, end, SEEK_SET);
+
+ /* clean up and return */
+ free(window);
+ free(gz.buf);
+ return gz.fd;
+}
+
+/* append file "name" to gzip file gd using deflate stream strm -- if last
+ is true, then finish off the deflate stream at the end */
+local void gztack(char *name, int gd, z_stream *strm, int last)
+{
+ int fd, len, ret;
+ unsigned left;
+ unsigned char *in, *out;
+
+ /* open file to compress and append */
+ fd = 0;
+ if (name != NULL) {
+ fd = open(name, O_RDONLY, 0);
+ if (fd == -1)
+ fprintf(stderr, "gzappend warning: %s not found, skipping ...\n",
+ name);
+ }
+
+ /* allocate buffers */
+ in = fd == -1 ? NULL : malloc(CHUNK);
+ out = malloc(CHUNK);
+ if (out == NULL) bye("out of memory", "");
+
+ /* compress input file and append to gzip file */
+ do {
+ /* get more input */
+ len = fd == -1 ? 0 : read(fd, in, CHUNK);
+ if (len == -1) {
+ fprintf(stderr,
+ "gzappend warning: error reading %s, skipping rest ...\n",
+ name);
+ len = 0;
+ }
+ strm->avail_in = (unsigned)len;
+ strm->next_in = in;
+ if (len) strm->adler = crc32(strm->adler, in, (unsigned)len);
+
+ /* compress and write all available output */
+ do {
+ strm->avail_out = CHUNK;
+ strm->next_out = out;
+ ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH);
+ left = CHUNK - strm->avail_out;
+ while (left) {
+ len = write(gd, out + CHUNK - strm->avail_out - left, left);
+ if (len == -1) bye("writing gzip file", "");
+ left -= (unsigned)len;
+ }
+ } while (strm->avail_out == 0 && ret != Z_STREAM_END);
+ } while (len != 0);
+
+ /* write trailer after last entry */
+ if (last) {
+ deflateEnd(strm);
+ out[0] = (unsigned char)(strm->adler);
+ out[1] = (unsigned char)(strm->adler >> 8);
+ out[2] = (unsigned char)(strm->adler >> 16);
+ out[3] = (unsigned char)(strm->adler >> 24);
+ out[4] = (unsigned char)(strm->total_in);
+ out[5] = (unsigned char)(strm->total_in >> 8);
+ out[6] = (unsigned char)(strm->total_in >> 16);
+ out[7] = (unsigned char)(strm->total_in >> 24);
+ len = 8;
+ do {
+ ret = write(gd, out + 8 - len, len);
+ if (ret == -1) bye("writing gzip file", "");
+ len -= ret;
+ } while (len);
+ close(gd);
+ }
+
+ /* clean up and return */
+ free(out);
+ if (in != NULL) free(in);
+ if (fd > 0) close(fd);
+}
+
+/* process the compression level option if present, scan the gzip file, and
+ append the specified files, or append the data from stdin if no other file
+ names are provided on the command line -- the gzip file must be writable
+ and seekable */
+int main(int argc, char **argv)
+{
+ int gd, level;
+ z_stream strm;
+
+ /* ignore command name */
+ argv++;
+
+ /* provide usage if no arguments */
+ if (*argv == NULL) {
+ printf("gzappend 1.1 (4 Nov 2003) Copyright (C) 2003 Mark Adler\n");
+ printf(
+ "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
+ return 0;
+ }
+
+ /* set compression level */
+ level = Z_DEFAULT_COMPRESSION;
+ if (argv[0][0] == '-') {
+ if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0)
+ bye("invalid compression level", "");
+ level = argv[0][1] - '0';
+ if (*++argv == NULL) bye("no gzip file name after options", "");
+ }
+
+ /* prepare to append to gzip file */
+ gd = gzscan(*argv++, &strm, level);
+
+ /* append files on command line, or from stdin if none */
+ if (*argv == NULL)
+ gztack(NULL, gd, &strm, 1);
+ else
+ do {
+ gztack(*argv, gd, &strm, argv[1] == NULL);
+ } while (*++argv != NULL);
+ return 0;
+}
diff --git a/zlib/examples/gzjoin.c b/zlib/examples/gzjoin.c
new file mode 100644
index 0000000..129347c
--- /dev/null
+++ b/zlib/examples/gzjoin.c
@@ -0,0 +1,448 @@
+/* gzjoin -- command to join gzip files into one gzip file
+
+ Copyright (C) 2004 Mark Adler, all rights reserved
+ version 1.0, 11 Dec 2004
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the author be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Mark Adler madler@alumni.caltech.edu
+ */
+
+/*
+ * Change history:
+ *
+ * 1.0 11 Dec 2004 - First version
+ * 1.1 12 Jun 2005 - Changed ssize_t to long for portability
+ */
+
+/*
+ gzjoin takes one or more gzip files on the command line and writes out a
+ single gzip file that will uncompress to the concatenation of the
+ uncompressed data from the individual gzip files. gzjoin does this without
+ having to recompress any of the data and without having to calculate a new
+ crc32 for the concatenated uncompressed data. gzjoin does however have to
+ decompress all of the input data in order to find the bits in the compressed
+ data that need to be modified to concatenate the streams.
+
+ gzjoin does not do an integrity check on the input gzip files other than
+ checking the gzip header and decompressing the compressed data. They are
+ otherwise assumed to be complete and correct.
+
+ Each joint between gzip files removes at least 18 bytes of previous trailer
+ and subsequent header, and inserts an average of about three bytes to the
+ compressed data in order to connect the streams. The output gzip file
+ has a minimal ten-byte gzip header with no file name or modification time.
+
+ This program was written to illustrate the use of the Z_BLOCK option of
+ inflate() and the crc32_combine() function. gzjoin will not compile with
+ versions of zlib earlier than 1.2.3.
+ */
+
+#include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */
+#include <stdlib.h> /* exit(), malloc(), free() */
+#include <fcntl.h> /* open() */
+#include <unistd.h> /* close(), read(), lseek() */
+#include "zlib.h"
+ /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
+
+#define local static
+
+/* exit with an error (return a value to allow use in an expression) */
+local int bail(char *why1, char *why2)
+{
+ fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
+ exit(1);
+ return 0;
+}
+
+/* -- simple buffered file input with access to the buffer -- */
+
+#define CHUNK 32768 /* must be a power of two and fit in unsigned */
+
+/* bin buffered input file type */
+typedef struct {
+ char *name; /* name of file for error messages */
+ int fd; /* file descriptor */
+ unsigned left; /* bytes remaining at next */
+ unsigned char *next; /* next byte to read */
+ unsigned char *buf; /* allocated buffer of length CHUNK */
+} bin;
+
+/* close a buffered file and free allocated memory */
+local void bclose(bin *in)
+{
+ if (in != NULL) {
+ if (in->fd != -1)
+ close(in->fd);
+ if (in->buf != NULL)
+ free(in->buf);
+ free(in);
+ }
+}
+
+/* open a buffered file for input, return a pointer to type bin, or NULL on
+ failure */
+local bin *bopen(char *name)
+{
+ bin *in;
+
+ in = malloc(sizeof(bin));
+ if (in == NULL)
+ return NULL;
+ in->buf = malloc(CHUNK);
+ in->fd = open(name, O_RDONLY, 0);
+ if (in->buf == NULL || in->fd == -1) {
+ bclose(in);
+ return NULL;
+ }
+ in->left = 0;
+ in->next = in->buf;
+ in->name = name;
+ return in;
+}
+
+/* load buffer from file, return -1 on read error, 0 or 1 on success, with
+ 1 indicating that end-of-file was reached */
+local int bload(bin *in)
+{
+ long len;
+
+ if (in == NULL)
+ return -1;
+ if (in->left != 0)
+ return 0;
+ in->next = in->buf;
+ do {
+ len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
+ if (len < 0)
+ return -1;
+ in->left += (unsigned)len;
+ } while (len != 0 && in->left < CHUNK);
+ return len == 0 ? 1 : 0;
+}
+
+/* get a byte from the file, bail if end of file */
+#define bget(in) (in->left ? 0 : bload(in), \
+ in->left ? (in->left--, *(in->next)++) : \
+ bail("unexpected end of file on ", in->name))
+
+/* get a four-byte little-endian unsigned integer from file */
+local unsigned long bget4(bin *in)
+{
+ unsigned long val;
+
+ val = bget(in);
+ val += (unsigned long)(bget(in)) << 8;
+ val += (unsigned long)(bget(in)) << 16;
+ val += (unsigned long)(bget(in)) << 24;
+ return val;
+}
+
+/* skip bytes in file */
+local void bskip(bin *in, unsigned skip)
+{
+ /* check pointer */
+ if (in == NULL)
+ return;
+
+ /* easy case -- skip bytes in buffer */
+ if (skip <= in->left) {
+ in->left -= skip;
+ in->next += skip;
+ return;
+ }
+
+ /* skip what's in buffer, discard buffer contents */
+ skip -= in->left;
+ in->left = 0;
+
+ /* seek past multiples of CHUNK bytes */
+ if (skip > CHUNK) {
+ unsigned left;
+
+ left = skip & (CHUNK - 1);
+ if (left == 0) {
+ /* exact number of chunks: seek all the way minus one byte to check
+ for end-of-file with a read */
+ lseek(in->fd, skip - 1, SEEK_CUR);
+ if (read(in->fd, in->buf, 1) != 1)
+ bail("unexpected end of file on ", in->name);
+ return;
+ }
+
+ /* skip the integral chunks, update skip with remainder */
+ lseek(in->fd, skip - left, SEEK_CUR);
+ skip = left;
+ }
+
+ /* read more input and skip remainder */
+ bload(in);
+ if (skip > in->left)
+ bail("unexpected end of file on ", in->name);
+ in->left -= skip;
+ in->next += skip;
+}
+
+/* -- end of buffered input functions -- */
+
+/* skip the gzip header from file in */
+local void gzhead(bin *in)
+{
+ int flags;
+
+ /* verify gzip magic header and compression method */
+ if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
+ bail(in->name, " is not a valid gzip file");
+
+ /* get and verify flags */
+ flags = bget(in);
+ if ((flags & 0xe0) != 0)
+ bail("unknown reserved bits set in ", in->name);
+
+ /* skip modification time, extra flags, and os */
+ bskip(in, 6);
+
+ /* skip extra field if present */
+ if (flags & 4) {
+ unsigned len;
+
+ len = bget(in);
+ len += (unsigned)(bget(in)) << 8;
+ bskip(in, len);
+ }
+
+ /* skip file name if present */
+ if (flags & 8)
+ while (bget(in) != 0)
+ ;
+
+ /* skip comment if present */
+ if (flags & 16)
+ while (bget(in) != 0)
+ ;
+
+ /* skip header crc if present */
+ if (flags & 2)
+ bskip(in, 2);
+}
+
+/* write a four-byte little-endian unsigned integer to out */
+local void put4(unsigned long val, FILE *out)
+{
+ putc(val & 0xff, out);
+ putc((val >> 8) & 0xff, out);
+ putc((val >> 16) & 0xff, out);
+ putc((val >> 24) & 0xff, out);
+}
+
+/* Load up zlib stream from buffered input, bail if end of file */
+local void zpull(z_streamp strm, bin *in)
+{
+ if (in->left == 0)
+ bload(in);
+ if (in->left == 0)
+ bail("unexpected end of file on ", in->name);
+ strm->avail_in = in->left;
+ strm->next_in = in->next;
+}
+
+/* Write header for gzip file to out and initialize trailer. */
+local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
+{
+ fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
+ *crc = crc32(0L, Z_NULL, 0);
+ *tot = 0;
+}
+
+/* Copy the compressed data from name, zeroing the last block bit of the last
+ block if clr is true, and adding empty blocks as needed to get to a byte
+ boundary. If clr is false, then the last block becomes the last block of
+ the output, and the gzip trailer is written. crc and tot maintains the
+ crc and length (modulo 2^32) of the output for the trailer. The resulting
+ gzip file is written to out. gzinit() must be called before the first call
+ of gzcopy() to write the gzip header and to initialize crc and tot. */
+local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
+ FILE *out)
+{
+ int ret; /* return value from zlib functions */
+ int pos; /* where the "last block" bit is in byte */
+ int last; /* true if processing the last block */
+ bin *in; /* buffered input file */
+ unsigned char *start; /* start of compressed data in buffer */
+ unsigned char *junk; /* buffer for uncompressed data -- discarded */
+ z_off_t len; /* length of uncompressed data (support > 4 GB) */
+ z_stream strm; /* zlib inflate stream */
+
+ /* open gzip file and skip header */
+ in = bopen(name);
+ if (in == NULL)
+ bail("could not open ", name);
+ gzhead(in);
+
+ /* allocate buffer for uncompressed data and initialize raw inflate
+ stream */
+ junk = malloc(CHUNK);
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.avail_in = 0;
+ strm.next_in = Z_NULL;
+ ret = inflateInit2(&strm, -15);
+ if (junk == NULL || ret != Z_OK)
+ bail("out of memory", "");
+
+ /* inflate and copy compressed data, clear last-block bit if requested */
+ len = 0;
+ zpull(&strm, in);
+ start = strm.next_in;
+ last = start[0] & 1;
+ if (last && clr)
+ start[0] &= ~1;
+ strm.avail_out = 0;
+ for (;;) {
+ /* if input used and output done, write used input and get more */
+ if (strm.avail_in == 0 && strm.avail_out != 0) {
+ fwrite(start, 1, strm.next_in - start, out);
+ start = in->buf;
+ in->left = 0;
+ zpull(&strm, in);
+ }
+
+ /* decompress -- return early when end-of-block reached */
+ strm.avail_out = CHUNK;
+ strm.next_out = junk;
+ ret = inflate(&strm, Z_BLOCK);
+ switch (ret) {
+ case Z_MEM_ERROR:
+ bail("out of memory", "");
+ case Z_DATA_ERROR:
+ bail("invalid compressed data in ", in->name);
+ }
+
+ /* update length of uncompressed data */
+ len += CHUNK - strm.avail_out;
+
+ /* check for block boundary (only get this when block copied out) */
+ if (strm.data_type & 128) {
+ /* if that was the last block, then done */
+ if (last)
+ break;
+
+ /* number of unused bits in last byte */
+ pos = strm.data_type & 7;
+
+ /* find the next last-block bit */
+ if (pos != 0) {
+ /* next last-block bit is in last used byte */
+ pos = 0x100 >> pos;
+ last = strm.next_in[-1] & pos;
+ if (last && clr)
+ strm.next_in[-1] &= ~pos;
+ }
+ else {
+ /* next last-block bit is in next unused byte */
+ if (strm.avail_in == 0) {
+ /* don't have that byte yet -- get it */
+ fwrite(start, 1, strm.next_in - start, out);
+ start = in->buf;
+ in->left = 0;
+ zpull(&strm, in);
+ }
+ last = strm.next_in[0] & 1;
+ if (last && clr)
+ strm.next_in[0] &= ~1;
+ }
+ }
+ }
+
+ /* update buffer with unused input */
+ in->left = strm.avail_in;
+ in->next = strm.next_in;
+
+ /* copy used input, write empty blocks to get to byte boundary */
+ pos = strm.data_type & 7;
+ fwrite(start, 1, in->next - start - 1, out);
+ last = in->next[-1];
+ if (pos == 0 || !clr)
+ /* already at byte boundary, or last file: write last byte */
+ putc(last, out);
+ else {
+ /* append empty blocks to last byte */
+ last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */
+ if (pos & 1) {
+ /* odd -- append an empty stored block */
+ putc(last, out);
+ if (pos == 1)
+ putc(0, out); /* two more bits in block header */
+ fwrite("\0\0\xff\xff", 1, 4, out);
+ }
+ else {
+ /* even -- append 1, 2, or 3 empty fixed blocks */
+ switch (pos) {
+ case 6:
+ putc(last | 8, out);
+ last = 0;
+ case 4:
+ putc(last | 0x20, out);
+ last = 0;
+ case 2:
+ putc(last | 0x80, out);
+ putc(0, out);
+ }
+ }
+ }
+
+ /* update crc and tot */
+ *crc = crc32_combine(*crc, bget4(in), len);
+ *tot += (unsigned long)len;
+
+ /* clean up */
+ inflateEnd(&strm);
+ free(junk);
+ bclose(in);
+
+ /* write trailer if this is the last gzip file */
+ if (!clr) {
+ put4(*crc, out);
+ put4(*tot, out);
+ }
+}
+
+/* join the gzip files on the command line, write result to stdout */
+int main(int argc, char **argv)
+{
+ unsigned long crc, tot; /* running crc and total uncompressed length */
+
+ /* skip command name */
+ argc--;
+ argv++;
+
+ /* show usage if no arguments */
+ if (argc == 0) {
+ fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
+ stderr);
+ return 0;
+ }
+
+ /* join gzip files on command line and write to stdout */
+ gzinit(&crc, &tot, stdout);
+ while (argc--)
+ gzcopy(*argv++, argc, &crc, &tot, stdout);
+
+ /* done */
+ return 0;
+}
diff --git a/zlib/examples/gzlog.c b/zlib/examples/gzlog.c
new file mode 100644
index 0000000..d70aaca
--- /dev/null
+++ b/zlib/examples/gzlog.c
@@ -0,0 +1,1058 @@
+/*
+ * gzlog.c
+ * Copyright (C) 2004, 2008 Mark Adler, all rights reserved
+ * For conditions of distribution and use, see copyright notice in gzlog.h
+ * version 2.0, 25 Apr 2008
+ */
+
+/*
+ gzlog provides a mechanism for frequently appending short strings to a gzip
+ file that is efficient both in execution time and compression ratio. The
+ strategy is to write the short strings in an uncompressed form to the end of
+ the gzip file, only compressing when the amount of uncompressed data has
+ reached a given threshold.
+
+ gzlog also provides protection against interruptions in the process due to
+ system crashes. The status of the operation is recorded in an extra field
+ in the gzip file, and is only updated once the gzip file is brought to a
+ valid state. The last data to be appended or compressed is saved in an
+ auxiliary file, so that if the operation is interrupted, it can be completed
+ the next time an append operation is attempted.
+
+ gzlog maintains another auxiliary file with the last 32K of data from the
+ compressed portion, which is preloaded for the compression of the subsequent
+ data. This minimizes the impact to the compression ratio of appending.
+ */
+
+/*
+ Operations Concept:
+
+ Files (log name "foo"):
+ foo.gz -- gzip file with the complete log
+ foo.add -- last message to append or last data to compress
+ foo.dict -- dictionary of the last 32K of data for next compression
+ foo.temp -- temporary dictionary file for compression after this one
+ foo.lock -- lock file for reading and writing the other files
+ foo.repairs -- log file for log file recovery operations (not compressed)
+
+ gzip file structure:
+ - fixed-length (no file name) header with extra field (see below)
+ - compressed data ending initially with empty stored block
+ - uncompressed data filling out originally empty stored block and
+ subsequent stored blocks as needed (16K max each)
+ - gzip trailer
+ - no junk at end (no other gzip streams)
+
+ When appending data, the information in the first three items above plus the
+ foo.add file are sufficient to recover an interrupted append operation. The
+ extra field has the necessary information to restore the start of the last
+ stored block and determine where to append the data in the foo.add file, as
+ well as the crc and length of the gzip data before the append operation.
+
+ The foo.add file is created before the gzip file is marked for append, and
+ deleted after the gzip file is marked as complete. So if the append
+ operation is interrupted, the data to add will still be there. If due to
+ some external force, the foo.add file gets deleted between when the append
+ operation was interrupted and when recovery is attempted, the gzip file will
+ still be restored, but without the appended data.
+
+ When compressing data, the information in the first two items above plus the
+ foo.add file are sufficient to recover an interrupted compress operation.
+ The extra field has the necessary information to find the end of the
+ compressed data, and contains both the crc and length of just the compressed
+ data and of the complete set of data including the contents of the foo.add
+ file.
+
+ Again, the foo.add file is maintained during the compress operation in case
+ of an interruption. If in the unlikely event the foo.add file with the data
+ to be compressed is missing due to some external force, a gzip file with
+ just the previous compressed data will be reconstructed. In this case, all
+ of the data that was to be compressed is lost (approximately one megabyte).
+ This will not occur if all that happened was an interruption of the compress
+ operation.
+
+ The third state that is marked is the replacement of the old dictionary with
+ the new dictionary after a compress operation. Once compression is
+ complete, the gzip file is marked as being in the replace state. This
+ completes the gzip file, so an interrupt after being so marked does not
+ result in recompression. Then the dictionary file is replaced, and the gzip
+ file is marked as completed. This state prevents the possibility of
+ restarting compression with the wrong dictionary file.
+
+ All three operations are wrapped by a lock/unlock procedure. In order to
+ gain exclusive access to the log files, first a foo.lock file must be
+ exclusively created. When all operations are complete, the lock is
+ released by deleting the foo.lock file. If when attempting to create the
+ lock file, it already exists and the modify time of the lock file is more
+ than five minutes old (set by the PATIENCE define below), then the old
+ lock file is considered stale and deleted, and the exclusive creation of
+ the lock file is retried. To assure that there are no false assessments
+ of the staleness of the lock file, the operations periodically touch the
+ lock file to update the modified date.
+
+ Following is the definition of the extra field with all of the information
+ required to enable the above append and compress operations and their
+ recovery if interrupted. Multi-byte values are stored little endian
+ (consistent with the gzip format). File pointers are eight bytes long.
+ The crc's and lengths for the gzip trailer are four bytes long. (Note that
+ the length at the end of a gzip file is used for error checking only, and
+ for large files is actually the length modulo 2^32.) The stored block
+ length is two bytes long. The gzip extra field two-byte identification is
+ "ap" for append. It is assumed that writing the extra field to the file is
+ an "atomic" operation. That is, either all of the extra field is written
+ to the file, or none of it is, if the operation is interrupted right at the
+ point of updating the extra field. This is a reasonable assumption, since
+ the extra field is within the first 52 bytes of the file, which is smaller
+ than any expected block size for a mass storage device (usually 512 bytes or
+ larger).
+
+ Extra field (35 bytes):
+ - Pointer to first stored block length -- this points to the two-byte length
+ of the first stored block, which is followed by the two-byte, one's
+ complement of that length. The stored block length is preceded by the
+ three-bit header of the stored block, which is the actual start of the
+ stored block in the deflate format. See the bit offset field below.
+ - Pointer to the last stored block length. This is the same as above, but
+ for the last stored block of the uncompressed data in the gzip file.
+ Initially this is the same as the first stored block length pointer.
+ When the stored block gets to 16K (see the MAX_STORE define), then a new
+ stored block as added, at which point the last stored block length pointer
+ is different from the first stored block length pointer. When they are
+ different, the first bit of the last stored block header is eight bits, or
+ one byte back from the block length.
+ - Compressed data crc and length. This is the crc and length of the data
+ that is in the compressed portion of the deflate stream. These are used
+ only in the event that the foo.add file containing the data to compress is
+ lost after a compress operation is interrupted.
+ - Total data crc and length. This is the crc and length of all of the data
+ stored in the gzip file, compressed and uncompressed. It is used to
+ reconstruct the gzip trailer when compressing, as well as when recovering
+ interrupted operations.
+ - Final stored block length. This is used to quickly find where to append,
+ and allows the restoration of the original final stored block state when
+ an append operation is interrupted.
+ - First stored block start as the number of bits back from the final stored
+ block first length byte. This value is in the range of 3..10, and is
+ stored as the low three bits of the final byte of the extra field after
+ subtracting three (0..7). This allows the last-block bit of the stored
+ block header to be updated when a new stored block is added, for the case
+ when the first stored block and the last stored block are the same. (When
+ they are different, the numbers of bits back is known to be eight.) This
+ also allows for new compressed data to be appended to the old compressed
+ data in the compress operation, overwriting the previous first stored
+ block, or for the compressed data to be terminated and a valid gzip file
+ reconstructed on the off chance that a compression operation was
+ interrupted and the data to compress in the foo.add file was deleted.
+ - The operation in process. This is the next two bits in the last byte (the
+ bits under the mask 0x18). The are interpreted as 0: nothing in process,
+ 1: append in process, 2: compress in process, 3: replace in process.
+ - The top three bits of the last byte in the extra field are reserved and
+ are currently set to zero.
+
+ Main procedure:
+ - Exclusively create the foo.lock file using the O_CREAT and O_EXCL modes of
+ the system open() call. If the modify time of an existing lock file is
+ more than PATIENCE seconds old, then the lock file is deleted and the
+ exclusive create is retried.
+ - Load the extra field from the foo.gz file, and see if an operation was in
+ progress but not completed. If so, apply the recovery procedure below.
+ - Perform the append procedure with the provided data.
+ - If the uncompressed data in the foo.gz file is 1MB or more, apply the
+ compress procedure.
+ - Delete the foo.lock file.
+
+ Append procedure:
+ - Put what to append in the foo.add file so that the operation can be
+ restarted if this procedure is interrupted.
+ - Mark the foo.gz extra field with the append operation in progress.
+ + Restore the original last-block bit and stored block length of the last
+ stored block from the information in the extra field, in case a previous
+ append operation was interrupted.
+ - Append the provided data to the last stored block, creating new stored
+ blocks as needed and updating the stored blocks last-block bits and
+ lengths.
+ - Update the crc and length with the new data, and write the gzip trailer.
+ - Write over the extra field (with a single write operation) with the new
+ pointers, lengths, and crc's, and mark the gzip file as not in process.
+ Though there is still a foo.add file, it will be ignored since nothing
+ is in process. If a foo.add file is leftover from a previously
+ completed operation, it is truncated when writing new data to it.
+ - Delete the foo.add file.
+
+ Compress and replace procedures:
+ - Read all of the uncompressed data in the stored blocks in foo.gz and write
+ it to foo.add. Also write foo.temp with the last 32K of that data to
+ provide a dictionary for the next invocation of this procedure.
+ - Rewrite the extra field marking foo.gz with a compression in process.
+ * If there is no data provided to compress (due to a missing foo.add file
+ when recovering), reconstruct and truncate the foo.gz file to contain
+ only the previous compressed data and proceed to the step after the next
+ one. Otherwise ...
+ - Compress the data with the dictionary in foo.dict, and write to the
+ foo.gz file starting at the bit immediately following the last previously
+ compressed block. If there is no foo.dict, proceed anyway with the
+ compression at slightly reduced efficiency. (For the foo.dict file to be
+ missing requires some external failure beyond simply the interruption of
+ a compress operation.) During this process, the foo.lock file is
+ periodically touched to assure that that file is not considered stale by
+ another process before we're done. The deflation is terminated with a
+ non-last empty static block (10 bits long), that is then located and
+ written over by a last-bit-set empty stored block.
+ - Append the crc and length of the data in the gzip file (previously
+ calculated during the append operations).
+ - Write over the extra field with the updated stored block offsets, bits
+ back, crc's, and lengths, and mark foo.gz as in process for a replacement
+ of the dictionary.
+ @ Delete the foo.add file.
+ - Replace foo.dict with foo.temp.
+ - Write over the extra field, marking foo.gz as complete.
+
+ Recovery procedure:
+ - If not a replace recovery, read in the foo.add file, and provide that data
+ to the appropriate recovery below. If there is no foo.add file, provide
+ a zero data length to the recovery. In that case, the append recovery
+ restores the foo.gz to the previous compressed + uncompressed data state.
+ For the the compress recovery, a missing foo.add file results in foo.gz
+ being restored to the previous compressed-only data state.
+ - Append recovery:
+ - Pick up append at + step above
+ - Compress recovery:
+ - Pick up compress at * step above
+ - Replace recovery:
+ - Pick up compress at @ step above
+ - Log the repair with a date stamp in foo.repairs
+ */
+
+#include <sys/types.h>
+#include <stdio.h> /* rename, fopen, fprintf, fclose */
+#include <stdlib.h> /* malloc, free */
+#include <string.h> /* strlen, strrchr, strcpy, strncpy, strcmp */
+#include <fcntl.h> /* open */
+#include <unistd.h> /* lseek, read, write, close, unlink, sleep, */
+ /* ftruncate, fsync */
+#include <errno.h> /* errno */
+#include <time.h> /* time, ctime */
+#include <sys/stat.h> /* stat */
+#include <sys/time.h> /* utimes */
+#include "zlib.h" /* crc32 */
+
+#include "gzlog.h" /* header for external access */
+
+#define local static
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+/* Macro for debugging to deterministically force recovery operations */
+#ifdef DEBUG
+ #include <setjmp.h> /* longjmp */
+ jmp_buf gzlog_jump; /* where to go back to */
+ int gzlog_bail = 0; /* which point to bail at (1..8) */
+ int gzlog_count = -1; /* number of times through to wait */
+# define BAIL(n) do { if (n == gzlog_bail && gzlog_count-- == 0) \
+ longjmp(gzlog_jump, gzlog_bail); } while (0)
+#else
+# define BAIL(n)
+#endif
+
+/* how old the lock file can be in seconds before considering it stale */
+#define PATIENCE 300
+
+/* maximum stored block size in Kbytes -- must be in 1..63 */
+#define MAX_STORE 16
+
+/* number of stored Kbytes to trigger compression (must be >= 32 to allow
+ dictionary construction, and <= 204 * MAX_STORE, in order for >> 10 to
+ discard the stored block headers contribution of five bytes each) */
+#define TRIGGER 1024
+
+/* size of a deflate dictionary (this cannot be changed) */
+#define DICT 32768U
+
+/* values for the operation (2 bits) */
+#define NO_OP 0
+#define APPEND_OP 1
+#define COMPRESS_OP 2
+#define REPLACE_OP 3
+
+/* macros to extract little-endian integers from an unsigned byte buffer */
+#define PULL2(p) ((p)[0]+((uint)((p)[1])<<8))
+#define PULL4(p) (PULL2(p)+((ulong)PULL2(p+2)<<16))
+#define PULL8(p) (PULL4(p)+((off_t)PULL4(p+4)<<32))
+
+/* macros to store integers into a byte buffer in little-endian order */
+#define PUT2(p,a) do {(p)[0]=a;(p)[1]=(a)>>8;} while(0)
+#define PUT4(p,a) do {PUT2(p,a);PUT2(p+2,a>>16);} while(0)
+#define PUT8(p,a) do {PUT4(p,a);PUT4(p+4,a>>32);} while(0)
+
+/* internal structure for log information */
+#define LOGID "\106\035\172" /* should be three non-zero characters */
+struct log {
+ char id[4]; /* contains LOGID to detect inadvertent overwrites */
+ int fd; /* file descriptor for .gz file, opened read/write */
+ char *path; /* allocated path, e.g. "/var/log/foo" or "foo" */
+ char *end; /* end of path, for appending suffices such as ".gz" */
+ off_t first; /* offset of first stored block first length byte */
+ int back; /* location of first block id in bits back from first */
+ uint stored; /* bytes currently in last stored block */
+ off_t last; /* offset of last stored block first length byte */
+ ulong ccrc; /* crc of compressed data */
+ ulong clen; /* length (modulo 2^32) of compressed data */
+ ulong tcrc; /* crc of total data */
+ ulong tlen; /* length (modulo 2^32) of total data */
+ time_t lock; /* last modify time of our lock file */
+};
+
+/* gzip header for gzlog */
+local unsigned char log_gzhead[] = {
+ 0x1f, 0x8b, /* magic gzip id */
+ 8, /* compression method is deflate */
+ 4, /* there is an extra field (no file name) */
+ 0, 0, 0, 0, /* no modification time provided */
+ 0, 0xff, /* no extra flags, no OS specified */
+ 39, 0, 'a', 'p', 35, 0 /* extra field with "ap" subfield */
+ /* 35 is EXTRA, 39 is EXTRA + 4 */
+};
+
+#define HEAD sizeof(log_gzhead) /* should be 16 */
+
+/* initial gzip extra field content (52 == HEAD + EXTRA + 1) */
+local unsigned char log_gzext[] = {
+ 52, 0, 0, 0, 0, 0, 0, 0, /* offset of first stored block length */
+ 52, 0, 0, 0, 0, 0, 0, 0, /* offset of last stored block length */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* compressed data crc and length */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* total data crc and length */
+ 0, 0, /* final stored block data length */
+ 5 /* op is NO_OP, last bit 8 bits back */
+};
+
+#define EXTRA sizeof(log_gzext) /* should be 35 */
+
+/* initial gzip data and trailer */
+local unsigned char log_gzbody[] = {
+ 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */
+ 0, 0, 0, 0, /* crc */
+ 0, 0, 0, 0 /* uncompressed length */
+};
+
+#define BODY sizeof(log_gzbody)
+
+/* Exclusively create foo.lock in order to negotiate exclusive access to the
+ foo.* files. If the modify time of an existing lock file is greater than
+ PATIENCE seconds in the past, then consider the lock file to have been
+ abandoned, delete it, and try the exclusive create again. Save the lock
+ file modify time for verification of ownership. Return 0 on success, or -1
+ on failure, usually due to an access restriction or invalid path. Note that
+ if stat() or unlink() fails, it may be due to another process noticing the
+ abandoned lock file a smidge sooner and deleting it, so those are not
+ flagged as an error. */
+local int log_lock(struct log *log)
+{
+ int fd;
+ struct stat st;
+
+ strcpy(log->end, ".lock");
+ while ((fd = open(log->path, O_CREAT | O_EXCL, 0644)) < 0) {
+ if (errno != EEXIST)
+ return -1;
+ if (stat(log->path, &st) == 0 && time(NULL) - st.st_mtime > PATIENCE) {
+ unlink(log->path);
+ continue;
+ }
+ sleep(2); /* relinquish the CPU for two seconds while waiting */
+ }
+ close(fd);
+ if (stat(log->path, &st) == 0)
+ log->lock = st.st_mtime;
+ return 0;
+}
+
+/* Update the modify time of the lock file to now, in order to prevent another
+ task from thinking that the lock is stale. Save the lock file modify time
+ for verification of ownership. */
+local void log_touch(struct log *log)
+{
+ struct stat st;
+
+ strcpy(log->end, ".lock");
+ utimes(log->path, NULL);
+ if (stat(log->path, &st) == 0)
+ log->lock = st.st_mtime;
+}
+
+/* Check the log file modify time against what is expected. Return true if
+ this is not our lock. If it is our lock, touch it to keep it. */
+local int log_check(struct log *log)
+{
+ struct stat st;
+
+ strcpy(log->end, ".lock");
+ if (stat(log->path, &st) || st.st_mtime != log->lock)
+ return 1;
+ log_touch(log);
+ return 0;
+}
+
+/* Unlock a previously acquired lock, but only if it's ours. */
+local void log_unlock(struct log *log)
+{
+ if (log_check(log))
+ return;
+ strcpy(log->end, ".lock");
+ unlink(log->path);
+ log->lock = 0;
+}
+
+/* Check the gzip header and read in the extra field, filling in the values in
+ the log structure. Return op on success or -1 if the gzip header was not as
+ expected. op is the current operation in progress last written to the extra
+ field. This assumes that the gzip file has already been opened, with the
+ file descriptor log->fd. */
+local int log_head(struct log *log)
+{
+ int op;
+ unsigned char buf[HEAD + EXTRA];
+
+ if (lseek(log->fd, 0, SEEK_SET) < 0 ||
+ read(log->fd, buf, HEAD + EXTRA) != HEAD + EXTRA ||
+ memcmp(buf, log_gzhead, HEAD)) {
+ return -1;
+ }
+ log->first = PULL8(buf + HEAD);
+ log->last = PULL8(buf + HEAD + 8);
+ log->ccrc = PULL4(buf + HEAD + 16);
+ log->clen = PULL4(buf + HEAD + 20);
+ log->tcrc = PULL4(buf + HEAD + 24);
+ log->tlen = PULL4(buf + HEAD + 28);
+ log->stored = PULL2(buf + HEAD + 32);
+ log->back = 3 + (buf[HEAD + 34] & 7);
+ op = (buf[HEAD + 34] >> 3) & 3;
+ return op;
+}
+
+/* Write over the extra field contents, marking the operation as op. Use fsync
+ to assure that the device is written to, and in the requested order. This
+ operation, and only this operation, is assumed to be atomic in order to
+ assure that the log is recoverable in the event of an interruption at any
+ point in the process. Return -1 if the write to foo.gz failed. */
+local int log_mark(struct log *log, int op)
+{
+ int ret;
+ unsigned char ext[EXTRA];
+
+ PUT8(ext, log->first);
+ PUT8(ext + 8, log->last);
+ PUT4(ext + 16, log->ccrc);
+ PUT4(ext + 20, log->clen);
+ PUT4(ext + 24, log->tcrc);
+ PUT4(ext + 28, log->tlen);
+ PUT2(ext + 32, log->stored);
+ ext[34] = log->back - 3 + (op << 3);
+ fsync(log->fd);
+ ret = lseek(log->fd, HEAD, SEEK_SET) < 0 ||
+ write(log->fd, ext, EXTRA) != EXTRA ? -1 : 0;
+ fsync(log->fd);
+ return ret;
+}
+
+/* Rewrite the last block header bits and subsequent zero bits to get to a byte
+ boundary, setting the last block bit if last is true, and then write the
+ remainder of the stored block header (length and one's complement). Leave
+ the file pointer after the end of the last stored block data. Return -1 if
+ there is a read or write failure on the foo.gz file */
+local int log_last(struct log *log, int last)
+{
+ int back, len, mask;
+ unsigned char buf[6];
+
+ /* determine the locations of the bytes and bits to modify */
+ back = log->last == log->first ? log->back : 8;
+ len = back > 8 ? 2 : 1; /* bytes back from log->last */
+ mask = 0x80 >> ((back - 1) & 7); /* mask for block last-bit */
+
+ /* get the byte to modify (one or two back) into buf[0] -- don't need to
+ read the byte if the last-bit is eight bits back, since in that case
+ the entire byte will be modified */
+ buf[0] = 0;
+ if (back != 8 && (lseek(log->fd, log->last - len, SEEK_SET) < 0 ||
+ read(log->fd, buf, 1) != 1))
+ return -1;
+
+ /* change the last-bit of the last stored block as requested -- note
+ that all bits above the last-bit are set to zero, per the type bits
+ of a stored block being 00 and per the convention that the bits to
+ bring the stream to a byte boundary are also zeros */
+ buf[1] = 0;
+ buf[2 - len] = (*buf & (mask - 1)) + (last ? mask : 0);
+
+ /* write the modified stored block header and lengths, move the file
+ pointer to after the last stored block data */
+ PUT2(buf + 2, log->stored);
+ PUT2(buf + 4, log->stored ^ 0xffff);
+ return lseek(log->fd, log->last - len, SEEK_SET) < 0 ||
+ write(log->fd, buf + 2 - len, len + 4) != len + 4 ||
+ lseek(log->fd, log->stored, SEEK_CUR) < 0 ? -1 : 0;
+}
+
+/* Append len bytes from data to the locked and open log file. len may be zero
+ if recovering and no .add file was found. In that case, the previous state
+ of the foo.gz file is restored. The data is appended uncompressed in
+ deflate stored blocks. Return -1 if there was an error reading or writing
+ the foo.gz file. */
+local int log_append(struct log *log, unsigned char *data, size_t len)
+{
+ uint put;
+ off_t end;
+ unsigned char buf[8];
+
+ /* set the last block last-bit and length, in case recovering an
+ interrupted append, then position the file pointer to append to the
+ block */
+ if (log_last(log, 1))
+ return -1;
+
+ /* append, adding stored blocks and updating the offset of the last stored
+ block as needed, and update the total crc and length */
+ while (len) {
+ /* append as much as we can to the last block */
+ put = (MAX_STORE << 10) - log->stored;
+ if (put > len)
+ put = (uint)len;
+ if (put) {
+ if (write(log->fd, data, put) != put)
+ return -1;
+ BAIL(1);
+ log->tcrc = crc32(log->tcrc, data, put);
+ log->tlen += put;
+ log->stored += put;
+ data += put;
+ len -= put;
+ }
+
+ /* if we need to, add a new empty stored block */
+ if (len) {
+ /* mark current block as not last */
+ if (log_last(log, 0))
+ return -1;
+
+ /* point to new, empty stored block */
+ log->last += 4 + log->stored + 1;
+ log->stored = 0;
+ }
+
+ /* mark last block as last, update its length */
+ if (log_last(log, 1))
+ return -1;
+ BAIL(2);
+ }
+
+ /* write the new crc and length trailer, and truncate just in case (could
+ be recovering from partial append with a missing foo.add file) */
+ PUT4(buf, log->tcrc);
+ PUT4(buf + 4, log->tlen);
+ if (write(log->fd, buf, 8) != 8 ||
+ (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end))
+ return -1;
+
+ /* write the extra field, marking the log file as done, delete .add file */
+ if (log_mark(log, NO_OP))
+ return -1;
+ strcpy(log->end, ".add");
+ unlink(log->path); /* ignore error, since may not exist */
+ return 0;
+}
+
+/* Replace the foo.dict file with the foo.temp file. Also delete the foo.add
+ file, since the compress operation may have been interrupted before that was
+ done. Returns 1 if memory could not be allocated, or -1 if reading or
+ writing foo.gz fails, or if the rename fails for some reason other than
+ foo.temp not existing. foo.temp not existing is a permitted error, since
+ the replace operation may have been interrupted after the rename is done,
+ but before foo.gz is marked as complete. */
+local int log_replace(struct log *log)
+{
+ int ret;
+ char *dest;
+
+ /* delete foo.add file */
+ strcpy(log->end, ".add");
+ unlink(log->path); /* ignore error, since may not exist */
+ BAIL(3);
+
+ /* rename foo.name to foo.dict, replacing foo.dict if it exists */
+ strcpy(log->end, ".dict");
+ dest = malloc(strlen(log->path) + 1);
+ if (dest == NULL)
+ return -2;
+ strcpy(dest, log->path);
+ strcpy(log->end, ".temp");
+ ret = rename(log->path, dest);
+ free(dest);
+ if (ret && errno != ENOENT)
+ return -1;
+ BAIL(4);
+
+ /* mark the foo.gz file as done */
+ return log_mark(log, NO_OP);
+}
+
+/* Compress the len bytes at data and append the compressed data to the
+ foo.gz deflate data immediately after the previous compressed data. This
+ overwrites the previous uncompressed data, which was stored in foo.add
+ and is the data provided in data[0..len-1]. If this operation is
+ interrupted, it picks up at the start of this routine, with the foo.add
+ file read in again. If there is no data to compress (len == 0), then we
+ simply terminate the foo.gz file after the previously compressed data,
+ appending a final empty stored block and the gzip trailer. Return -1 if
+ reading or writing the log.gz file failed, or -2 if there was a memory
+ allocation failure. */
+local int log_compress(struct log *log, unsigned char *data, size_t len)
+{
+ int fd;
+ uint got, max;
+ ssize_t dict;
+ off_t end;
+ z_stream strm;
+ unsigned char buf[DICT];
+
+ /* compress and append compressed data */
+ if (len) {
+ /* set up for deflate, allocating memory */
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, 8,
+ Z_DEFAULT_STRATEGY) != Z_OK)
+ return -2;
+
+ /* read in dictionary (last 32K of data that was compressed) */
+ strcpy(log->end, ".dict");
+ fd = open(log->path, O_RDONLY, 0);
+ if (fd >= 0) {
+ dict = read(fd, buf, DICT);
+ close(fd);
+ if (dict < 0) {
+ deflateEnd(&strm);
+ return -1;
+ }
+ if (dict)
+ deflateSetDictionary(&strm, buf, (uint)dict);
+ }
+ log_touch(log);
+
+ /* prime deflate with last bits of previous block, position write
+ pointer to write those bits and overwrite what follows */
+ if (lseek(log->fd, log->first - (log->back > 8 ? 2 : 1),
+ SEEK_SET) < 0 ||
+ read(log->fd, buf, 1) != 1 || lseek(log->fd, -1, SEEK_CUR) < 0) {
+ deflateEnd(&strm);
+ return -1;
+ }
+ deflatePrime(&strm, (8 - log->back) & 7, *buf);
+
+ /* compress, finishing with a partial non-last empty static block */
+ strm.next_in = data;
+ max = (((uint)0 - 1) >> 1) + 1; /* in case int smaller than size_t */
+ do {
+ strm.avail_in = len > max ? max : (uint)len;
+ len -= strm.avail_in;
+ do {
+ strm.avail_out = DICT;
+ strm.next_out = buf;
+ deflate(&strm, len ? Z_NO_FLUSH : Z_PARTIAL_FLUSH);
+ got = DICT - strm.avail_out;
+ if (got && write(log->fd, buf, got) != got) {
+ deflateEnd(&strm);
+ return -1;
+ }
+ log_touch(log);
+ } while (strm.avail_out == 0);
+ } while (len);
+ deflateEnd(&strm);
+ BAIL(5);
+
+ /* find start of empty static block -- scanning backwards the first one
+ bit is the second bit of the block, if the last byte is zero, then
+ we know the byte before that has a one in the top bit, since an
+ empty static block is ten bits long */
+ if ((log->first = lseek(log->fd, -1, SEEK_CUR)) < 0 ||
+ read(log->fd, buf, 1) != 1)
+ return -1;
+ log->first++;
+ if (*buf) {
+ log->back = 1;
+ while ((*buf & ((uint)1 << (8 - log->back++))) == 0)
+ ; /* guaranteed to terminate, since *buf != 0 */
+ }
+ else
+ log->back = 10;
+
+ /* update compressed crc and length */
+ log->ccrc = log->tcrc;
+ log->clen = log->tlen;
+ }
+ else {
+ /* no data to compress -- fix up existing gzip stream */
+ log->tcrc = log->ccrc;
+ log->tlen = log->clen;
+ }
+
+ /* complete and truncate gzip stream */
+ log->last = log->first;
+ log->stored = 0;
+ PUT4(buf, log->tcrc);
+ PUT4(buf + 4, log->tlen);
+ if (log_last(log, 1) || write(log->fd, buf, 8) != 8 ||
+ (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end))
+ return -1;
+ BAIL(6);
+
+ /* mark as being in the replace operation */
+ if (log_mark(log, REPLACE_OP))
+ return -1;
+
+ /* execute the replace operation and mark the file as done */
+ return log_replace(log);
+}
+
+/* log a repair record to the .repairs file */
+local void log_log(struct log *log, int op, char *record)
+{
+ time_t now;
+ FILE *rec;
+
+ now = time(NULL);
+ strcpy(log->end, ".repairs");
+ rec = fopen(log->path, "a");
+ if (rec == NULL)
+ return;
+ fprintf(rec, "%.24s %s recovery: %s\n", ctime(&now), op == APPEND_OP ?
+ "append" : (op == COMPRESS_OP ? "compress" : "replace"), record);
+ fclose(rec);
+ return;
+}
+
+/* Recover the interrupted operation op. First read foo.add for recovering an
+ append or compress operation. Return -1 if there was an error reading or
+ writing foo.gz or reading an existing foo.add, or -2 if there was a memory
+ allocation failure. */
+local int log_recover(struct log *log, int op)
+{
+ int fd, ret = 0;
+ unsigned char *data = NULL;
+ size_t len = 0;
+ struct stat st;
+
+ /* log recovery */
+ log_log(log, op, "start");
+
+ /* load foo.add file if expected and present */
+ if (op == APPEND_OP || op == COMPRESS_OP) {
+ strcpy(log->end, ".add");
+ if (stat(log->path, &st) == 0 && st.st_size) {
+ len = (size_t)(st.st_size);
+ if (len != st.st_size || (data = malloc(st.st_size)) == NULL) {
+ log_log(log, op, "allocation failure");
+ return -2;
+ }
+ if ((fd = open(log->path, O_RDONLY, 0)) < 0) {
+ log_log(log, op, ".add file read failure");
+ return -1;
+ }
+ ret = read(fd, data, len) != len;
+ close(fd);
+ if (ret) {
+ log_log(log, op, ".add file read failure");
+ return -1;
+ }
+ log_log(log, op, "loaded .add file");
+ }
+ else
+ log_log(log, op, "missing .add file!");
+ }
+
+ /* recover the interrupted operation */
+ switch (op) {
+ case APPEND_OP:
+ ret = log_append(log, data, len);
+ break;
+ case COMPRESS_OP:
+ ret = log_compress(log, data, len);
+ break;
+ case REPLACE_OP:
+ ret = log_replace(log);
+ }
+
+ /* log status */
+ log_log(log, op, ret ? "failure" : "complete");
+
+ /* clean up */
+ if (data != NULL)
+ free(data);
+ return ret;
+}
+
+/* Close the foo.gz file (if open) and release the lock. */
+local void log_close(struct log *log)
+{
+ if (log->fd >= 0)
+ close(log->fd);
+ log->fd = -1;
+ log_unlock(log);
+}
+
+/* Open foo.gz, verify the header, and load the extra field contents, after
+ first creating the foo.lock file to gain exclusive access to the foo.*
+ files. If foo.gz does not exist or is empty, then write the initial header,
+ extra, and body content of an empty foo.gz log file. If there is an error
+ creating the lock file due to access restrictions, or an error reading or
+ writing the foo.gz file, or if the foo.gz file is not a proper log file for
+ this object (e.g. not a gzip file or does not contain the expected extra
+ field), then return true. If there is an error, the lock is released.
+ Otherwise, the lock is left in place. */
+local int log_open(struct log *log)
+{
+ int op;
+
+ /* release open file resource if left over -- can occur if lock lost
+ between gzlog_open() and gzlog_write() */
+ if (log->fd >= 0)
+ close(log->fd);
+ log->fd = -1;
+
+ /* negotiate exclusive access */
+ if (log_lock(log) < 0)
+ return -1;
+
+ /* open the log file, foo.gz */
+ strcpy(log->end, ".gz");
+ log->fd = open(log->path, O_RDWR | O_CREAT, 0644);
+ if (log->fd < 0) {
+ log_close(log);
+ return -1;
+ }
+
+ /* if new, initialize foo.gz with an empty log, delete old dictionary */
+ if (lseek(log->fd, 0, SEEK_END) == 0) {
+ if (write(log->fd, log_gzhead, HEAD) != HEAD ||
+ write(log->fd, log_gzext, EXTRA) != EXTRA ||
+ write(log->fd, log_gzbody, BODY) != BODY) {
+ log_close(log);
+ return -1;
+ }
+ strcpy(log->end, ".dict");
+ unlink(log->path);
+ }
+
+ /* verify log file and load extra field information */
+ if ((op = log_head(log)) < 0) {
+ log_close(log);
+ return -1;
+ }
+
+ /* check for interrupted process and if so, recover */
+ if (op != NO_OP && log_recover(log, op)) {
+ log_close(log);
+ return -1;
+ }
+
+ /* touch the lock file to prevent another process from grabbing it */
+ log_touch(log);
+ return 0;
+}
+
+/* See gzlog.h for the description of the external methods below */
+gzlog *gzlog_open(char *path)
+{
+ size_t n;
+ struct log *log;
+
+ /* check arguments */
+ if (path == NULL || *path == 0)
+ return NULL;
+
+ /* allocate and initialize log structure */
+ log = malloc(sizeof(struct log));
+ if (log == NULL)
+ return NULL;
+ strcpy(log->id, LOGID);
+ log->fd = -1;
+
+ /* save path and end of path for name construction */
+ n = strlen(path);
+ log->path = malloc(n + 9); /* allow for ".repairs" */
+ if (log->path == NULL) {
+ free(log);
+ return NULL;
+ }
+ strcpy(log->path, path);
+ log->end = log->path + n;
+
+ /* gain exclusive access and verify log file -- may perform a
+ recovery operation if needed */
+ if (log_open(log)) {
+ free(log->path);
+ free(log);
+ return NULL;
+ }
+
+ /* return pointer to log structure */
+ return log;
+}
+
+/* gzlog_compress() return values:
+ 0: all good
+ -1: file i/o error (usually access issue)
+ -2: memory allocation failure
+ -3: invalid log pointer argument */
+int gzlog_compress(gzlog *logd)
+{
+ int fd, ret;
+ uint block;
+ size_t len, next;
+ unsigned char *data, buf[5];
+ struct log *log = logd;
+
+ /* check arguments */
+ if (log == NULL || strcmp(log->id, LOGID) || len < 0)
+ return -3;
+
+ /* see if we lost the lock -- if so get it again and reload the extra
+ field information (it probably changed), recover last operation if
+ necessary */
+ if (log_check(log) && log_open(log))
+ return -1;
+
+ /* create space for uncompressed data */
+ len = ((size_t)(log->last - log->first) & ~(((size_t)1 << 10) - 1)) +
+ log->stored;
+ if ((data = malloc(len)) == NULL)
+ return -2;
+
+ /* do statement here is just a cheap trick for error handling */
+ do {
+ /* read in the uncompressed data */
+ if (lseek(log->fd, log->first - 1, SEEK_SET) < 0)
+ break;
+ next = 0;
+ while (next < len) {
+ if (read(log->fd, buf, 5) != 5)
+ break;
+ block = PULL2(buf + 1);
+ if (next + block > len ||
+ read(log->fd, (char *)data + next, block) != block)
+ break;
+ next += block;
+ }
+ if (lseek(log->fd, 0, SEEK_CUR) != log->last + 4 + log->stored)
+ break;
+ log_touch(log);
+
+ /* write the uncompressed data to the .add file */
+ strcpy(log->end, ".add");
+ fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (fd < 0)
+ break;
+ ret = write(fd, data, len) != len;
+ if (ret | close(fd))
+ break;
+ log_touch(log);
+
+ /* write the dictionary for the next compress to the .temp file */
+ strcpy(log->end, ".temp");
+ fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (fd < 0)
+ break;
+ next = DICT > len ? len : DICT;
+ ret = write(fd, (char *)data + len - next, next) != next;
+ if (ret | close(fd))
+ break;
+ log_touch(log);
+
+ /* roll back to compressed data, mark the compress in progress */
+ log->last = log->first;
+ log->stored = 0;
+ if (log_mark(log, COMPRESS_OP))
+ break;
+ BAIL(7);
+
+ /* compress and append the data (clears mark) */
+ ret = log_compress(log, data, len);
+ free(data);
+ return ret;
+ } while (0);
+
+ /* broke out of do above on i/o error */
+ free(data);
+ return -1;
+}
+
+/* gzlog_write() return values:
+ 0: all good
+ -1: file i/o error (usually access issue)
+ -2: memory allocation failure
+ -3: invalid log pointer argument */
+int gzlog_write(gzlog *logd, void *data, size_t len)
+{
+ int fd, ret;
+ struct log *log = logd;
+
+ /* check arguments */
+ if (log == NULL || strcmp(log->id, LOGID) || len < 0)
+ return -3;
+ if (data == NULL || len == 0)
+ return 0;
+
+ /* see if we lost the lock -- if so get it again and reload the extra
+ field information (it probably changed), recover last operation if
+ necessary */
+ if (log_check(log) && log_open(log))
+ return -1;
+
+ /* create and write .add file */
+ strcpy(log->end, ".add");
+ fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (fd < 0)
+ return -1;
+ ret = write(fd, data, len) != len;
+ if (ret | close(fd))
+ return -1;
+ log_touch(log);
+
+ /* mark log file with append in progress */
+ if (log_mark(log, APPEND_OP))
+ return -1;
+ BAIL(8);
+
+ /* append data (clears mark) */
+ if (log_append(log, data, len))
+ return -1;
+
+ /* check to see if it's time to compress -- if not, then done */
+ if (((log->last - log->first) >> 10) + (log->stored >> 10) < TRIGGER)
+ return 0;
+
+ /* time to compress */
+ return gzlog_compress(log);
+}
+
+/* gzlog_close() return values:
+ 0: ok
+ -3: invalid log pointer argument */
+int gzlog_close(gzlog *logd)
+{
+ struct log *log = logd;
+
+ /* check arguments */
+ if (log == NULL || strcmp(log->id, LOGID))
+ return -3;
+
+ /* close the log file and release the lock */
+ log_close(log);
+
+ /* free structure and return */
+ if (log->path != NULL)
+ free(log->path);
+ strcpy(log->id, "bad");
+ free(log);
+ return 0;
+}
diff --git a/zlib/examples/gzlog.h b/zlib/examples/gzlog.h
new file mode 100644
index 0000000..c461426
--- /dev/null
+++ b/zlib/examples/gzlog.h
@@ -0,0 +1,89 @@
+/* gzlog.h
+ Copyright (C) 2004, 2008 Mark Adler, all rights reserved
+ version 2.0, 25 Apr 2008
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the author be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Mark Adler madler@alumni.caltech.edu
+ */
+
+/* Version History:
+ 1.0 26 Nov 2004 First version
+ 2.0 25 Apr 2008 Complete redesign for recovery of interrupted operations
+ Interface changed slightly in that now path is a prefix
+ Compression now occurs as needed during gzlog_write()
+ gzlog_write() now always leaves the log file as valid gzip
+ */
+
+/*
+ The gzlog object allows writing short messages to a gzipped log file,
+ opening the log file locked for small bursts, and then closing it. The log
+ object works by appending stored (uncompressed) data to the gzip file until
+ 1 MB has been accumulated. At that time, the stored data is compressed, and
+ replaces the uncompressed data in the file. The log file is truncated to
+ its new size at that time. After each write operation, the log file is a
+ valid gzip file that can decompressed to recover what was written.
+
+ The gzlog operations can be interupted at any point due to an application or
+ system crash, and the log file will be recovered the next time the log is
+ opened with gzlog_open().
+ */
+
+#ifndef GZLOG_H
+#define GZLOG_H
+
+/* gzlog object type */
+typedef void gzlog;
+
+/* Open a gzlog object, creating the log file if it does not exist. Return
+ NULL on error. Note that gzlog_open() could take a while to complete if it
+ has to wait to verify that a lock is stale (possibly for five minutes), or
+ if there is significant contention with other instantiations of this object
+ when locking the resource. path is the prefix of the file names created by
+ this object. If path is "foo", then the log file will be "foo.gz", and
+ other auxiliary files will be created and destroyed during the process:
+ "foo.dict" for a compression dictionary, "foo.temp" for a temporary (next)
+ dictionary, "foo.add" for data being added or compressed, "foo.lock" for the
+ lock file, and "foo.repairs" to log recovery operations performed due to
+ interrupted gzlog operations. A gzlog_open() followed by a gzlog_close()
+ will recover a previously interrupted operation, if any. */
+gzlog *gzlog_open(char *path);
+
+/* Write to a gzlog object. Return zero on success, -1 if there is a file i/o
+ error on any of the gzlog files (this should not happen if gzlog_open()
+ succeeded, unless the device has run out of space or leftover auxiliary
+ files have permissions or ownership that prevent their use), -2 if there is
+ a memory allocation failure, or -3 if the log argument is invalid (e.g. if
+ it was not created by gzlog_open()). This function will write data to the
+ file uncompressed, until 1 MB has been accumulated, at which time that data
+ will be compressed. The log file will be a valid gzip file upon successful
+ return. */
+int gzlog_write(gzlog *log, void *data, size_t len);
+
+/* Force compression of any uncompressed data in the log. This should be used
+ sparingly, if at all. The main application would be when a log file will
+ not be appended to again. If this is used to compress frequently while
+ appending, it will both significantly increase the execution time and
+ reduce the compression ratio. The return codes are the same as for
+ gzlog_write(). */
+int gzlog_compress(gzlog *log);
+
+/* Close a gzlog object. Return zero on success, -3 if the log argument is
+ invalid. The log object is freed, and so cannot be referenced again. */
+int gzlog_close(gzlog *log);
+
+#endif
diff --git a/zlib/examples/zlib_how.html b/zlib/examples/zlib_how.html
new file mode 100644
index 0000000..444ff1c
--- /dev/null
+++ b/zlib/examples/zlib_how.html
@@ -0,0 +1,545 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+ "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<title>zlib Usage Example</title>
+<!-- Copyright (c) 2004, 2005 Mark Adler. -->
+</head>
+<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#00A000">
+<h2 align="center"> zlib Usage Example </h2>
+We often get questions about how the <tt>deflate()</tt> and <tt>inflate()</tt> functions should be used.
+Users wonder when they should provide more input, when they should use more output,
+what to do with a <tt>Z_BUF_ERROR</tt>, how to make sure the process terminates properly, and
+so on. So for those who have read <tt>zlib.h</tt> (a few times), and
+would like further edification, below is an annotated example in C of simple routines to compress and decompress
+from an input file to an output file using <tt>deflate()</tt> and <tt>inflate()</tt> respectively. The
+annotations are interspersed between lines of the code. So please read between the lines.
+We hope this helps explain some of the intricacies of <em>zlib</em>.
+<p>
+Without further adieu, here is the program <a href="zpipe.c"><tt>zpipe.c</tt></a>:
+<pre><b>
+/* zpipe.c: example of proper use of zlib's inflate() and deflate()
+ Not copyrighted -- provided to the public domain
+ Version 1.4 11 December 2005 Mark Adler */
+
+/* Version history:
+ 1.0 30 Oct 2004 First version
+ 1.1 8 Nov 2004 Add void casting for unused return values
+ Use switch statement for inflate() return values
+ 1.2 9 Nov 2004 Add assertions to document zlib guarantees
+ 1.3 6 Apr 2005 Remove incorrect assertion in inf()
+ 1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions
+ Avoid some compiler warnings for input and output buffers
+ */
+</b></pre><!-- -->
+We now include the header files for the required definitions. From
+<tt>stdio.h</tt> we use <tt>fopen()</tt>, <tt>fread()</tt>, <tt>fwrite()</tt>,
+<tt>feof()</tt>, <tt>ferror()</tt>, and <tt>fclose()</tt> for file i/o, and
+<tt>fputs()</tt> for error messages. From <tt>string.h</tt> we use
+<tt>strcmp()</tt> for command line argument processing.
+From <tt>assert.h</tt> we use the <tt>assert()</tt> macro.
+From <tt>zlib.h</tt>
+we use the basic compression functions <tt>deflateInit()</tt>,
+<tt>deflate()</tt>, and <tt>deflateEnd()</tt>, and the basic decompression
+functions <tt>inflateInit()</tt>, <tt>inflate()</tt>, and
+<tt>inflateEnd()</tt>.
+<pre><b>
+#include &lt;stdio.h&gt;
+#include &lt;string.h&gt;
+#include &lt;assert.h&gt;
+#include "zlib.h"
+</b></pre><!-- -->
+This is an ugly hack required to avoid corruption of the input and output data on
+Windows/MS-DOS systems. Without this, those systems would assume that the input and output
+files are text, and try to convert the end-of-line characters from one standard to
+another. That would corrupt binary data, and in particular would render the compressed data unusable.
+This sets the input and output to binary which suppresses the end-of-line conversions.
+<tt>SET_BINARY_MODE()</tt> will be used later on <tt>stdin</tt> and <tt>stdout</tt>, at the beginning of <tt>main()</tt>.
+<pre><b>
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
+# include &lt;fcntl.h&gt;
+# include &lt;io.h&gt;
+# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
+#else
+# define SET_BINARY_MODE(file)
+#endif
+</b></pre><!-- -->
+<tt>CHUNK</tt> is simply the buffer size for feeding data to and pulling data
+from the <em>zlib</em> routines. Larger buffer sizes would be more efficient,
+especially for <tt>inflate()</tt>. If the memory is available, buffers sizes
+on the order of 128K or 256K bytes should be used.
+<pre><b>
+#define CHUNK 16384
+</b></pre><!-- -->
+The <tt>def()</tt> routine compresses data from an input file to an output file. The output data
+will be in the <em>zlib</em> format, which is different from the <em>gzip</em> or <em>zip</em>
+formats. The <em>zlib</em> format has a very small header of only two bytes to identify it as
+a <em>zlib</em> stream and to provide decoding information, and a four-byte trailer with a fast
+check value to verify the integrity of the uncompressed data after decoding.
+<pre><b>
+/* Compress from file source to file dest until EOF on source.
+ def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
+ allocated for processing, Z_STREAM_ERROR if an invalid compression
+ level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
+ version of the library linked do not match, or Z_ERRNO if there is
+ an error reading or writing the files. */
+int def(FILE *source, FILE *dest, int level)
+{
+</b></pre>
+Here are the local variables for <tt>def()</tt>. <tt>ret</tt> will be used for <em>zlib</em>
+return codes. <tt>flush</tt> will keep track of the current flushing state for <tt>deflate()</tt>,
+which is either no flushing, or flush to completion after the end of the input file is reached.
+<tt>have</tt> is the amount of data returned from <tt>deflate()</tt>. The <tt>strm</tt> structure
+is used to pass information to and from the <em>zlib</em> routines, and to maintain the
+<tt>deflate()</tt> state. <tt>in</tt> and <tt>out</tt> are the input and output buffers for
+<tt>deflate()</tt>.
+<pre><b>
+ int ret, flush;
+ unsigned have;
+ z_stream strm;
+ unsigned char in[CHUNK];
+ unsigned char out[CHUNK];
+</b></pre><!-- -->
+The first thing we do is to initialize the <em>zlib</em> state for compression using
+<tt>deflateInit()</tt>. This must be done before the first use of <tt>deflate()</tt>.
+The <tt>zalloc</tt>, <tt>zfree</tt>, and <tt>opaque</tt> fields in the <tt>strm</tt>
+structure must be initialized before calling <tt>deflateInit()</tt>. Here they are
+set to the <em>zlib</em> constant <tt>Z_NULL</tt> to request that <em>zlib</em> use
+the default memory allocation routines. An application may also choose to provide
+custom memory allocation routines here. <tt>deflateInit()</tt> will allocate on the
+order of 256K bytes for the internal state.
+(See <a href="zlib_tech.html"><em>zlib Technical Details</em></a>.)
+<p>
+<tt>deflateInit()</tt> is called with a pointer to the structure to be initialized and
+the compression level, which is an integer in the range of -1 to 9. Lower compression
+levels result in faster execution, but less compression. Higher levels result in
+greater compression, but slower execution. The <em>zlib</em> constant Z_DEFAULT_COMPRESSION,
+equal to -1,
+provides a good compromise between compression and speed and is equivalent to level 6.
+Level 0 actually does no compression at all, and in fact expands the data slightly to produce
+the <em>zlib</em> format (it is not a byte-for-byte copy of the input).
+More advanced applications of <em>zlib</em>
+may use <tt>deflateInit2()</tt> here instead. Such an application may want to reduce how
+much memory will be used, at some price in compression. Or it may need to request a
+<em>gzip</em> header and trailer instead of a <em>zlib</em> header and trailer, or raw
+encoding with no header or trailer at all.
+<p>
+We must check the return value of <tt>deflateInit()</tt> against the <em>zlib</em> constant
+<tt>Z_OK</tt> to make sure that it was able to
+allocate memory for the internal state, and that the provided arguments were valid.
+<tt>deflateInit()</tt> will also check that the version of <em>zlib</em> that the <tt>zlib.h</tt>
+file came from matches the version of <em>zlib</em> actually linked with the program. This
+is especially important for environments in which <em>zlib</em> is a shared library.
+<p>
+Note that an application can initialize multiple, independent <em>zlib</em> streams, which can
+operate in parallel. The state information maintained in the structure allows the <em>zlib</em>
+routines to be reentrant.
+<pre><b>
+ /* allocate deflate state */
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ ret = deflateInit(&amp;strm, level);
+ if (ret != Z_OK)
+ return ret;
+</b></pre><!-- -->
+With the pleasantries out of the way, now we can get down to business. The outer <tt>do</tt>-loop
+reads all of the input file and exits at the bottom of the loop once end-of-file is reached.
+This loop contains the only call of <tt>deflate()</tt>. So we must make sure that all of the
+input data has been processed and that all of the output data has been generated and consumed
+before we fall out of the loop at the bottom.
+<pre><b>
+ /* compress until end of file */
+ do {
+</b></pre>
+We start off by reading data from the input file. The number of bytes read is put directly
+into <tt>avail_in</tt>, and a pointer to those bytes is put into <tt>next_in</tt>. We also
+check to see if end-of-file on the input has been reached. If we are at the end of file, then <tt>flush</tt> is set to the
+<em>zlib</em> constant <tt>Z_FINISH</tt>, which is later passed to <tt>deflate()</tt> to
+indicate that this is the last chunk of input data to compress. We need to use <tt>feof()</tt>
+to check for end-of-file as opposed to seeing if fewer than <tt>CHUNK</tt> bytes have been read. The
+reason is that if the input file length is an exact multiple of <tt>CHUNK</tt>, we will miss
+the fact that we got to the end-of-file, and not know to tell <tt>deflate()</tt> to finish
+up the compressed stream. If we are not yet at the end of the input, then the <em>zlib</em>
+constant <tt>Z_NO_FLUSH</tt> will be passed to <tt>deflate</tt> to indicate that we are still
+in the middle of the uncompressed data.
+<p>
+If there is an error in reading from the input file, the process is aborted with
+<tt>deflateEnd()</tt> being called to free the allocated <em>zlib</em> state before returning
+the error. We wouldn't want a memory leak, now would we? <tt>deflateEnd()</tt> can be called
+at any time after the state has been initialized. Once that's done, <tt>deflateInit()</tt> (or
+<tt>deflateInit2()</tt>) would have to be called to start a new compression process. There is
+no point here in checking the <tt>deflateEnd()</tt> return code. The deallocation can't fail.
+<pre><b>
+ strm.avail_in = fread(in, 1, CHUNK, source);
+ if (ferror(source)) {
+ (void)deflateEnd(&amp;strm);
+ return Z_ERRNO;
+ }
+ flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
+ strm.next_in = in;
+</b></pre><!-- -->
+The inner <tt>do</tt>-loop passes our chunk of input data to <tt>deflate()</tt>, and then
+keeps calling <tt>deflate()</tt> until it is done producing output. Once there is no more
+new output, <tt>deflate()</tt> is guaranteed to have consumed all of the input, i.e.,
+<tt>avail_in</tt> will be zero.
+<pre><b>
+ /* run deflate() on input until output buffer not full, finish
+ compression if all of source has been read in */
+ do {
+</b></pre>
+Output space is provided to <tt>deflate()</tt> by setting <tt>avail_out</tt> to the number
+of available output bytes and <tt>next_out</tt> to a pointer to that space.
+<pre><b>
+ strm.avail_out = CHUNK;
+ strm.next_out = out;
+</b></pre>
+Now we call the compression engine itself, <tt>deflate()</tt>. It takes as many of the
+<tt>avail_in</tt> bytes at <tt>next_in</tt> as it can process, and writes as many as
+<tt>avail_out</tt> bytes to <tt>next_out</tt>. Those counters and pointers are then
+updated past the input data consumed and the output data written. It is the amount of
+output space available that may limit how much input is consumed.
+Hence the inner loop to make sure that
+all of the input is consumed by providing more output space each time. Since <tt>avail_in</tt>
+and <tt>next_in</tt> are updated by <tt>deflate()</tt>, we don't have to mess with those
+between <tt>deflate()</tt> calls until it's all used up.
+<p>
+The parameters to <tt>deflate()</tt> are a pointer to the <tt>strm</tt> structure containing
+the input and output information and the internal compression engine state, and a parameter
+indicating whether and how to flush data to the output. Normally <tt>deflate</tt> will consume
+several K bytes of input data before producing any output (except for the header), in order
+to accumulate statistics on the data for optimum compression. It will then put out a burst of
+compressed data, and proceed to consume more input before the next burst. Eventually,
+<tt>deflate()</tt>
+must be told to terminate the stream, complete the compression with provided input data, and
+write out the trailer check value. <tt>deflate()</tt> will continue to compress normally as long
+as the flush parameter is <tt>Z_NO_FLUSH</tt>. Once the <tt>Z_FINISH</tt> parameter is provided,
+<tt>deflate()</tt> will begin to complete the compressed output stream. However depending on how
+much output space is provided, <tt>deflate()</tt> may have to be called several times until it
+has provided the complete compressed stream, even after it has consumed all of the input. The flush
+parameter must continue to be <tt>Z_FINISH</tt> for those subsequent calls.
+<p>
+There are other values of the flush parameter that are used in more advanced applications. You can
+force <tt>deflate()</tt> to produce a burst of output that encodes all of the input data provided
+so far, even if it wouldn't have otherwise, for example to control data latency on a link with
+compressed data. You can also ask that <tt>deflate()</tt> do that as well as erase any history up to
+that point so that what follows can be decompressed independently, for example for random access
+applications. Both requests will degrade compression by an amount depending on how often such
+requests are made.
+<p>
+<tt>deflate()</tt> has a return value that can indicate errors, yet we do not check it here. Why
+not? Well, it turns out that <tt>deflate()</tt> can do no wrong here. Let's go through
+<tt>deflate()</tt>'s return values and dispense with them one by one. The possible values are
+<tt>Z_OK</tt>, <tt>Z_STREAM_END</tt>, <tt>Z_STREAM_ERROR</tt>, or <tt>Z_BUF_ERROR</tt>. <tt>Z_OK</tt>
+is, well, ok. <tt>Z_STREAM_END</tt> is also ok and will be returned for the last call of
+<tt>deflate()</tt>. This is already guaranteed by calling <tt>deflate()</tt> with <tt>Z_FINISH</tt>
+until it has no more output. <tt>Z_STREAM_ERROR</tt> is only possible if the stream is not
+initialized properly, but we did initialize it properly. There is no harm in checking for
+<tt>Z_STREAM_ERROR</tt> here, for example to check for the possibility that some
+other part of the application inadvertently clobbered the memory containing the <em>zlib</em> state.
+<tt>Z_BUF_ERROR</tt> will be explained further below, but
+suffice it to say that this is simply an indication that <tt>deflate()</tt> could not consume
+more input or produce more output. <tt>deflate()</tt> can be called again with more output space
+or more available input, which it will be in this code.
+<pre><b>
+ ret = deflate(&amp;strm, flush); /* no bad return value */
+ assert(ret != Z_STREAM_ERROR); /* state not clobbered */
+</b></pre>
+Now we compute how much output <tt>deflate()</tt> provided on the last call, which is the
+difference between how much space was provided before the call, and how much output space
+is still available after the call. Then that data, if any, is written to the output file.
+We can then reuse the output buffer for the next call of <tt>deflate()</tt>. Again if there
+is a file i/o error, we call <tt>deflateEnd()</tt> before returning to avoid a memory leak.
+<pre><b>
+ have = CHUNK - strm.avail_out;
+ if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
+ (void)deflateEnd(&amp;strm);
+ return Z_ERRNO;
+ }
+</b></pre>
+The inner <tt>do</tt>-loop is repeated until the last <tt>deflate()</tt> call fails to fill the
+provided output buffer. Then we know that <tt>deflate()</tt> has done as much as it can with
+the provided input, and that all of that input has been consumed. We can then fall out of this
+loop and reuse the input buffer.
+<p>
+The way we tell that <tt>deflate()</tt> has no more output is by seeing that it did not fill
+the output buffer, leaving <tt>avail_out</tt> greater than zero. However suppose that
+<tt>deflate()</tt> has no more output, but just so happened to exactly fill the output buffer!
+<tt>avail_out</tt> is zero, and we can't tell that <tt>deflate()</tt> has done all it can.
+As far as we know, <tt>deflate()</tt>
+has more output for us. So we call it again. But now <tt>deflate()</tt> produces no output
+at all, and <tt>avail_out</tt> remains unchanged as <tt>CHUNK</tt>. That <tt>deflate()</tt> call
+wasn't able to do anything, either consume input or produce output, and so it returns
+<tt>Z_BUF_ERROR</tt>. (See, I told you I'd cover this later.) However this is not a problem at
+all. Now we finally have the desired indication that <tt>deflate()</tt> is really done,
+and so we drop out of the inner loop to provide more input to <tt>deflate()</tt>.
+<p>
+With <tt>flush</tt> set to <tt>Z_FINISH</tt>, this final set of <tt>deflate()</tt> calls will
+complete the output stream. Once that is done, subsequent calls of <tt>deflate()</tt> would return
+<tt>Z_STREAM_ERROR</tt> if the flush parameter is not <tt>Z_FINISH</tt>, and do no more processing
+until the state is reinitialized.
+<p>
+Some applications of <em>zlib</em> have two loops that call <tt>deflate()</tt>
+instead of the single inner loop we have here. The first loop would call
+without flushing and feed all of the data to <tt>deflate()</tt>. The second loop would call
+<tt>deflate()</tt> with no more
+data and the <tt>Z_FINISH</tt> parameter to complete the process. As you can see from this
+example, that can be avoided by simply keeping track of the current flush state.
+<pre><b>
+ } while (strm.avail_out == 0);
+ assert(strm.avail_in == 0); /* all input will be used */
+</b></pre><!-- -->
+Now we check to see if we have already processed all of the input file. That information was
+saved in the <tt>flush</tt> variable, so we see if that was set to <tt>Z_FINISH</tt>. If so,
+then we're done and we fall out of the outer loop. We're guaranteed to get <tt>Z_STREAM_END</tt>
+from the last <tt>deflate()</tt> call, since we ran it until the last chunk of input was
+consumed and all of the output was generated.
+<pre><b>
+ /* done when last data in file processed */
+ } while (flush != Z_FINISH);
+ assert(ret == Z_STREAM_END); /* stream will be complete */
+</b></pre><!-- -->
+The process is complete, but we still need to deallocate the state to avoid a memory leak
+(or rather more like a memory hemorrhage if you didn't do this). Then
+finally we can return with a happy return value.
+<pre><b>
+ /* clean up and return */
+ (void)deflateEnd(&amp;strm);
+ return Z_OK;
+}
+</b></pre><!-- -->
+Now we do the same thing for decompression in the <tt>inf()</tt> routine. <tt>inf()</tt>
+decompresses what is hopefully a valid <em>zlib</em> stream from the input file and writes the
+uncompressed data to the output file. Much of the discussion above for <tt>def()</tt>
+applies to <tt>inf()</tt> as well, so the discussion here will focus on the differences between
+the two.
+<pre><b>
+/* Decompress from file source to file dest until stream ends or EOF.
+ inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
+ allocated for processing, Z_DATA_ERROR if the deflate data is
+ invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
+ the version of the library linked do not match, or Z_ERRNO if there
+ is an error reading or writing the files. */
+int inf(FILE *source, FILE *dest)
+{
+</b></pre>
+The local variables have the same functionality as they do for <tt>def()</tt>. The
+only difference is that there is no <tt>flush</tt> variable, since <tt>inflate()</tt>
+can tell from the <em>zlib</em> stream itself when the stream is complete.
+<pre><b>
+ int ret;
+ unsigned have;
+ z_stream strm;
+ unsigned char in[CHUNK];
+ unsigned char out[CHUNK];
+</b></pre><!-- -->
+The initialization of the state is the same, except that there is no compression level,
+of course, and two more elements of the structure are initialized. <tt>avail_in</tt>
+and <tt>next_in</tt> must be initialized before calling <tt>inflateInit()</tt>. This
+is because the application has the option to provide the start of the zlib stream in
+order for <tt>inflateInit()</tt> to have access to information about the compression
+method to aid in memory allocation. In the current implementation of <em>zlib</em>
+(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of
+<tt>inflate()</tt> anyway. However those fields must be initialized since later versions
+of <em>zlib</em> that provide more compression methods may take advantage of this interface.
+In any case, no decompression is performed by <tt>inflateInit()</tt>, so the
+<tt>avail_out</tt> and <tt>next_out</tt> fields do not need to be initialized before calling.
+<p>
+Here <tt>avail_in</tt> is set to zero and <tt>next_in</tt> is set to <tt>Z_NULL</tt> to
+indicate that no input data is being provided.
+<pre><b>
+ /* allocate inflate state */
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.avail_in = 0;
+ strm.next_in = Z_NULL;
+ ret = inflateInit(&amp;strm);
+ if (ret != Z_OK)
+ return ret;
+</b></pre><!-- -->
+The outer <tt>do</tt>-loop decompresses input until <tt>inflate()</tt> indicates
+that it has reached the end of the compressed data and has produced all of the uncompressed
+output. This is in contrast to <tt>def()</tt> which processes all of the input file.
+If end-of-file is reached before the compressed data self-terminates, then the compressed
+data is incomplete and an error is returned.
+<pre><b>
+ /* decompress until deflate stream ends or end of file */
+ do {
+</b></pre>
+We read input data and set the <tt>strm</tt> structure accordingly. If we've reached the
+end of the input file, then we leave the outer loop and report an error, since the
+compressed data is incomplete. Note that we may read more data than is eventually consumed
+by <tt>inflate()</tt>, if the input file continues past the <em>zlib</em> stream.
+For applications where <em>zlib</em> streams are embedded in other data, this routine would
+need to be modified to return the unused data, or at least indicate how much of the input
+data was not used, so the application would know where to pick up after the <em>zlib</em> stream.
+<pre><b>
+ strm.avail_in = fread(in, 1, CHUNK, source);
+ if (ferror(source)) {
+ (void)inflateEnd(&amp;strm);
+ return Z_ERRNO;
+ }
+ if (strm.avail_in == 0)
+ break;
+ strm.next_in = in;
+</b></pre><!-- -->
+The inner <tt>do</tt>-loop has the same function it did in <tt>def()</tt>, which is to
+keep calling <tt>inflate()</tt> until has generated all of the output it can with the
+provided input.
+<pre><b>
+ /* run inflate() on input until output buffer not full */
+ do {
+</b></pre>
+Just like in <tt>def()</tt>, the same output space is provided for each call of <tt>inflate()</tt>.
+<pre><b>
+ strm.avail_out = CHUNK;
+ strm.next_out = out;
+</b></pre>
+Now we run the decompression engine itself. There is no need to adjust the flush parameter, since
+the <em>zlib</em> format is self-terminating. The main difference here is that there are
+return values that we need to pay attention to. <tt>Z_DATA_ERROR</tt>
+indicates that <tt>inflate()</tt> detected an error in the <em>zlib</em> compressed data format,
+which means that either the data is not a <em>zlib</em> stream to begin with, or that the data was
+corrupted somewhere along the way since it was compressed. The other error to be processed is
+<tt>Z_MEM_ERROR</tt>, which can occur since memory allocation is deferred until <tt>inflate()</tt>
+needs it, unlike <tt>deflate()</tt>, whose memory is allocated at the start by <tt>deflateInit()</tt>.
+<p>
+Advanced applications may use
+<tt>deflateSetDictionary()</tt> to prime <tt>deflate()</tt> with a set of likely data to improve the
+first 32K or so of compression. This is noted in the <em>zlib</em> header, so <tt>inflate()</tt>
+requests that that dictionary be provided before it can start to decompress. Without the dictionary,
+correct decompression is not possible. For this routine, we have no idea what the dictionary is,
+so the <tt>Z_NEED_DICT</tt> indication is converted to a <tt>Z_DATA_ERROR</tt>.
+<p>
+<tt>inflate()</tt> can also return <tt>Z_STREAM_ERROR</tt>, which should not be possible here,
+but could be checked for as noted above for <tt>def()</tt>. <tt>Z_BUF_ERROR</tt> does not need to be
+checked for here, for the same reasons noted for <tt>def()</tt>. <tt>Z_STREAM_END</tt> will be
+checked for later.
+<pre><b>
+ ret = inflate(&amp;strm, Z_NO_FLUSH);
+ assert(ret != Z_STREAM_ERROR); /* state not clobbered */
+ switch (ret) {
+ case Z_NEED_DICT:
+ ret = Z_DATA_ERROR; /* and fall through */
+ case Z_DATA_ERROR:
+ case Z_MEM_ERROR:
+ (void)inflateEnd(&amp;strm);
+ return ret;
+ }
+</b></pre>
+The output of <tt>inflate()</tt> is handled identically to that of <tt>deflate()</tt>.
+<pre><b>
+ have = CHUNK - strm.avail_out;
+ if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
+ (void)inflateEnd(&amp;strm);
+ return Z_ERRNO;
+ }
+</b></pre>
+The inner <tt>do</tt>-loop ends when <tt>inflate()</tt> has no more output as indicated
+by not filling the output buffer, just as for <tt>deflate()</tt>. In this case, we cannot
+assert that <tt>strm.avail_in</tt> will be zero, since the deflate stream may end before the file
+does.
+<pre><b>
+ } while (strm.avail_out == 0);
+</b></pre><!-- -->
+The outer <tt>do</tt>-loop ends when <tt>inflate()</tt> reports that it has reached the
+end of the input <em>zlib</em> stream, has completed the decompression and integrity
+check, and has provided all of the output. This is indicated by the <tt>inflate()</tt>
+return value <tt>Z_STREAM_END</tt>. The inner loop is guaranteed to leave <tt>ret</tt>
+equal to <tt>Z_STREAM_END</tt> if the last chunk of the input file read contained the end
+of the <em>zlib</em> stream. So if the return value is not <tt>Z_STREAM_END</tt>, the
+loop continues to read more input.
+<pre><b>
+ /* done when inflate() says it's done */
+ } while (ret != Z_STREAM_END);
+</b></pre><!-- -->
+At this point, decompression successfully completed, or we broke out of the loop due to no
+more data being available from the input file. If the last <tt>inflate()</tt> return value
+is not <tt>Z_STREAM_END</tt>, then the <em>zlib</em> stream was incomplete and a data error
+is returned. Otherwise, we return with a happy return value. Of course, <tt>inflateEnd()</tt>
+is called first to avoid a memory leak.
+<pre><b>
+ /* clean up and return */
+ (void)inflateEnd(&amp;strm);
+ return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
+}
+</b></pre><!-- -->
+That ends the routines that directly use <em>zlib</em>. The following routines make this
+a command-line program by running data through the above routines from <tt>stdin</tt> to
+<tt>stdout</tt>, and handling any errors reported by <tt>def()</tt> or <tt>inf()</tt>.
+<p>
+<tt>zerr()</tt> is used to interpret the possible error codes from <tt>def()</tt>
+and <tt>inf()</tt>, as detailed in their comments above, and print out an error message.
+Note that these are only a subset of the possible return values from <tt>deflate()</tt>
+and <tt>inflate()</tt>.
+<pre><b>
+/* report a zlib or i/o error */
+void zerr(int ret)
+{
+ fputs("zpipe: ", stderr);
+ switch (ret) {
+ case Z_ERRNO:
+ if (ferror(stdin))
+ fputs("error reading stdin\n", stderr);
+ if (ferror(stdout))
+ fputs("error writing stdout\n", stderr);
+ break;
+ case Z_STREAM_ERROR:
+ fputs("invalid compression level\n", stderr);
+ break;
+ case Z_DATA_ERROR:
+ fputs("invalid or incomplete deflate data\n", stderr);
+ break;
+ case Z_MEM_ERROR:
+ fputs("out of memory\n", stderr);
+ break;
+ case Z_VERSION_ERROR:
+ fputs("zlib version mismatch!\n", stderr);
+ }
+}
+</b></pre><!-- -->
+Here is the <tt>main()</tt> routine used to test <tt>def()</tt> and <tt>inf()</tt>. The
+<tt>zpipe</tt> command is simply a compression pipe from <tt>stdin</tt> to <tt>stdout</tt>, if
+no arguments are given, or it is a decompression pipe if <tt>zpipe -d</tt> is used. If any other
+arguments are provided, no compression or decompression is performed. Instead a usage
+message is displayed. Examples are <tt>zpipe < foo.txt > foo.txt.z</tt> to compress, and
+<tt>zpipe -d < foo.txt.z > foo.txt</tt> to decompress.
+<pre><b>
+/* compress or decompress from stdin to stdout */
+int main(int argc, char **argv)
+{
+ int ret;
+
+ /* avoid end-of-line conversions */
+ SET_BINARY_MODE(stdin);
+ SET_BINARY_MODE(stdout);
+
+ /* do compression if no arguments */
+ if (argc == 1) {
+ ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
+ if (ret != Z_OK)
+ zerr(ret);
+ return ret;
+ }
+
+ /* do decompression if -d specified */
+ else if (argc == 2 &amp;&amp; strcmp(argv[1], "-d") == 0) {
+ ret = inf(stdin, stdout);
+ if (ret != Z_OK)
+ zerr(ret);
+ return ret;
+ }
+
+ /* otherwise, report usage */
+ else {
+ fputs("zpipe usage: zpipe [-d] &lt; source &gt; dest\n", stderr);
+ return 1;
+ }
+}
+</b></pre>
+<hr>
+<i>Copyright (c) 2004, 2005 by Mark Adler<br>Last modified 11 December 2005</i>
+</body>
+</html>
diff --git a/zlib/examples/zpipe.c b/zlib/examples/zpipe.c
new file mode 100644
index 0000000..83535d1
--- /dev/null
+++ b/zlib/examples/zpipe.c
@@ -0,0 +1,205 @@
+/* zpipe.c: example of proper use of zlib's inflate() and deflate()
+ Not copyrighted -- provided to the public domain
+ Version 1.4 11 December 2005 Mark Adler */
+
+/* Version history:
+ 1.0 30 Oct 2004 First version
+ 1.1 8 Nov 2004 Add void casting for unused return values
+ Use switch statement for inflate() return values
+ 1.2 9 Nov 2004 Add assertions to document zlib guarantees
+ 1.3 6 Apr 2005 Remove incorrect assertion in inf()
+ 1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions
+ Avoid some compiler warnings for input and output buffers
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "zlib.h"
+
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
+# include <fcntl.h>
+# include <io.h>
+# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
+#else
+# define SET_BINARY_MODE(file)
+#endif
+
+#define CHUNK 16384
+
+/* Compress from file source to file dest until EOF on source.
+ def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
+ allocated for processing, Z_STREAM_ERROR if an invalid compression
+ level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
+ version of the library linked do not match, or Z_ERRNO if there is
+ an error reading or writing the files. */
+int def(FILE *source, FILE *dest, int level)
+{
+ int ret, flush;
+ unsigned have;
+ z_stream strm;
+ unsigned char in[CHUNK];
+ unsigned char out[CHUNK];
+
+ /* allocate deflate state */
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ ret = deflateInit(&strm, level);
+ if (ret != Z_OK)
+ return ret;
+
+ /* compress until end of file */
+ do {
+ strm.avail_in = fread(in, 1, CHUNK, source);
+ if (ferror(source)) {
+ (void)deflateEnd(&strm);
+ return Z_ERRNO;
+ }
+ flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
+ strm.next_in = in;
+
+ /* run deflate() on input until output buffer not full, finish
+ compression if all of source has been read in */
+ do {
+ strm.avail_out = CHUNK;
+ strm.next_out = out;
+ ret = deflate(&strm, flush); /* no bad return value */
+ assert(ret != Z_STREAM_ERROR); /* state not clobbered */
+ have = CHUNK - strm.avail_out;
+ if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
+ (void)deflateEnd(&strm);
+ return Z_ERRNO;
+ }
+ } while (strm.avail_out == 0);
+ assert(strm.avail_in == 0); /* all input will be used */
+
+ /* done when last data in file processed */
+ } while (flush != Z_FINISH);
+ assert(ret == Z_STREAM_END); /* stream will be complete */
+
+ /* clean up and return */
+ (void)deflateEnd(&strm);
+ return Z_OK;
+}
+
+/* Decompress from file source to file dest until stream ends or EOF.
+ inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
+ allocated for processing, Z_DATA_ERROR if the deflate data is
+ invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
+ the version of the library linked do not match, or Z_ERRNO if there
+ is an error reading or writing the files. */
+int inf(FILE *source, FILE *dest)
+{
+ int ret;
+ unsigned have;
+ z_stream strm;
+ unsigned char in[CHUNK];
+ unsigned char out[CHUNK];
+
+ /* allocate inflate state */
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.avail_in = 0;
+ strm.next_in = Z_NULL;
+ ret = inflateInit(&strm);
+ if (ret != Z_OK)
+ return ret;
+
+ /* decompress until deflate stream ends or end of file */
+ do {
+ strm.avail_in = fread(in, 1, CHUNK, source);
+ if (ferror(source)) {
+ (void)inflateEnd(&strm);
+ return Z_ERRNO;
+ }
+ if (strm.avail_in == 0)
+ break;
+ strm.next_in = in;
+
+ /* run inflate() on input until output buffer not full */
+ do {
+ strm.avail_out = CHUNK;
+ strm.next_out = out;
+ ret = inflate(&strm, Z_NO_FLUSH);
+ assert(ret != Z_STREAM_ERROR); /* state not clobbered */
+ switch (ret) {
+ case Z_NEED_DICT:
+ ret = Z_DATA_ERROR; /* and fall through */
+ case Z_DATA_ERROR:
+ case Z_MEM_ERROR:
+ (void)inflateEnd(&strm);
+ return ret;
+ }
+ have = CHUNK - strm.avail_out;
+ if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
+ (void)inflateEnd(&strm);
+ return Z_ERRNO;
+ }
+ } while (strm.avail_out == 0);
+
+ /* done when inflate() says it's done */
+ } while (ret != Z_STREAM_END);
+
+ /* clean up and return */
+ (void)inflateEnd(&strm);
+ return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
+}
+
+/* report a zlib or i/o error */
+void zerr(int ret)
+{
+ fputs("zpipe: ", stderr);
+ switch (ret) {
+ case Z_ERRNO:
+ if (ferror(stdin))
+ fputs("error reading stdin\n", stderr);
+ if (ferror(stdout))
+ fputs("error writing stdout\n", stderr);
+ break;
+ case Z_STREAM_ERROR:
+ fputs("invalid compression level\n", stderr);
+ break;
+ case Z_DATA_ERROR:
+ fputs("invalid or incomplete deflate data\n", stderr);
+ break;
+ case Z_MEM_ERROR:
+ fputs("out of memory\n", stderr);
+ break;
+ case Z_VERSION_ERROR:
+ fputs("zlib version mismatch!\n", stderr);
+ }
+}
+
+/* compress or decompress from stdin to stdout */
+int main(int argc, char **argv)
+{
+ int ret;
+
+ /* avoid end-of-line conversions */
+ SET_BINARY_MODE(stdin);
+ SET_BINARY_MODE(stdout);
+
+ /* do compression if no arguments */
+ if (argc == 1) {
+ ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
+ if (ret != Z_OK)
+ zerr(ret);
+ return ret;
+ }
+
+ /* do decompression if -d specified */
+ else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
+ ret = inf(stdin, stdout);
+ if (ret != Z_OK)
+ zerr(ret);
+ return ret;
+ }
+
+ /* otherwise, report usage */
+ else {
+ fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
+ return 1;
+ }
+}
diff --git a/zlib/examples/zran.c b/zlib/examples/zran.c
new file mode 100644
index 0000000..617a130
--- /dev/null
+++ b/zlib/examples/zran.c
@@ -0,0 +1,404 @@
+/* zran.c -- example of zlib/gzip stream indexing and random access
+ * Copyright (C) 2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ Version 1.0 29 May 2005 Mark Adler */
+
+/* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary()
+ for random access of a compressed file. A file containing a zlib or gzip
+ stream is provided on the command line. The compressed stream is decoded in
+ its entirety, and an index built with access points about every SPAN bytes
+ in the uncompressed output. The compressed file is left open, and can then
+ be read randomly, having to decompress on the average SPAN/2 uncompressed
+ bytes before getting to the desired block of data.
+
+ An access point can be created at the start of any deflate block, by saving
+ the starting file offset and bit of that block, and the 32K bytes of
+ uncompressed data that precede that block. Also the uncompressed offset of
+ that block is saved to provide a referece for locating a desired starting
+ point in the uncompressed stream. build_index() works by decompressing the
+ input zlib or gzip stream a block at a time, and at the end of each block
+ deciding if enough uncompressed data has gone by to justify the creation of
+ a new access point. If so, that point is saved in a data structure that
+ grows as needed to accommodate the points.
+
+ To use the index, an offset in the uncompressed data is provided, for which
+ the latest accees point at or preceding that offset is located in the index.
+ The input file is positioned to the specified location in the index, and if
+ necessary the first few bits of the compressed data is read from the file.
+ inflate is initialized with those bits and the 32K of uncompressed data, and
+ the decompression then proceeds until the desired offset in the file is
+ reached. Then the decompression continues to read the desired uncompressed
+ data from the file.
+
+ Another approach would be to generate the index on demand. In that case,
+ requests for random access reads from the compressed data would try to use
+ the index, but if a read far enough past the end of the index is required,
+ then further index entries would be generated and added.
+
+ There is some fair bit of overhead to starting inflation for the random
+ access, mainly copying the 32K byte dictionary. So if small pieces of the
+ file are being accessed, it would make sense to implement a cache to hold
+ some lookahead and avoid many calls to extract() for small lengths.
+
+ Another way to build an index would be to use inflateCopy(). That would
+ not be constrained to have access points at block boundaries, but requires
+ more memory per access point, and also cannot be saved to file due to the
+ use of pointers in the state. The approach here allows for storage of the
+ index in a file.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "zlib.h"
+
+#define local static
+
+#define SPAN 1048576L /* desired distance between access points */
+#define WINSIZE 32768U /* sliding window size */
+#define CHUNK 16384 /* file input buffer size */
+
+/* access point entry */
+struct point {
+ off_t out; /* corresponding offset in uncompressed data */
+ off_t in; /* offset in input file of first full byte */
+ int bits; /* number of bits (1-7) from byte at in - 1, or 0 */
+ unsigned char window[WINSIZE]; /* preceding 32K of uncompressed data */
+};
+
+/* access point list */
+struct access {
+ int have; /* number of list entries filled in */
+ int size; /* number of list entries allocated */
+ struct point *list; /* allocated list */
+};
+
+/* Deallocate an index built by build_index() */
+local void free_index(struct access *index)
+{
+ if (index != NULL) {
+ free(index->list);
+ free(index);
+ }
+}
+
+/* Add an entry to the access point list. If out of memory, deallocate the
+ existing list and return NULL. */
+local struct access *addpoint(struct access *index, int bits,
+ off_t in, off_t out, unsigned left, unsigned char *window)
+{
+ struct point *next;
+
+ /* if list is empty, create it (start with eight points) */
+ if (index == NULL) {
+ index = malloc(sizeof(struct access));
+ if (index == NULL) return NULL;
+ index->list = malloc(sizeof(struct point) << 3);
+ if (index->list == NULL) {
+ free(index);
+ return NULL;
+ }
+ index->size = 8;
+ index->have = 0;
+ }
+
+ /* if list is full, make it bigger */
+ else if (index->have == index->size) {
+ index->size <<= 1;
+ next = realloc(index->list, sizeof(struct point) * index->size);
+ if (next == NULL) {
+ free_index(index);
+ return NULL;
+ }
+ index->list = next;
+ }
+
+ /* fill in entry and increment how many we have */
+ next = index->list + index->have;
+ next->bits = bits;
+ next->in = in;
+ next->out = out;
+ if (left)
+ memcpy(next->window, window + WINSIZE - left, left);
+ if (left < WINSIZE)
+ memcpy(next->window + left, window, WINSIZE - left);
+ index->have++;
+
+ /* return list, possibly reallocated */
+ return index;
+}
+
+/* Make one entire pass through the compressed stream and build an index, with
+ access points about every span bytes of uncompressed output -- span is
+ chosen to balance the speed of random access against the memory requirements
+ of the list, about 32K bytes per access point. Note that data after the end
+ of the first zlib or gzip stream in the file is ignored. build_index()
+ returns the number of access points on success (>= 1), Z_MEM_ERROR for out
+ of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a
+ file read error. On success, *built points to the resulting index. */
+local int build_index(FILE *in, off_t span, struct access **built)
+{
+ int ret;
+ off_t totin, totout; /* our own total counters to avoid 4GB limit */
+ off_t last; /* totout value of last access point */
+ struct access *index; /* access points being generated */
+ z_stream strm;
+ unsigned char input[CHUNK];
+ unsigned char window[WINSIZE];
+
+ /* initialize inflate */
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.avail_in = 0;
+ strm.next_in = Z_NULL;
+ ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */
+ if (ret != Z_OK)
+ return ret;
+
+ /* inflate the input, maintain a sliding window, and build an index -- this
+ also validates the integrity of the compressed data using the check
+ information at the end of the gzip or zlib stream */
+ totin = totout = last = 0;
+ index = NULL; /* will be allocated by first addpoint() */
+ strm.avail_out = 0;
+ do {
+ /* get some compressed data from input file */
+ strm.avail_in = fread(input, 1, CHUNK, in);
+ if (ferror(in)) {
+ ret = Z_ERRNO;
+ goto build_index_error;
+ }
+ if (strm.avail_in == 0) {
+ ret = Z_DATA_ERROR;
+ goto build_index_error;
+ }
+ strm.next_in = input;
+
+ /* process all of that, or until end of stream */
+ do {
+ /* reset sliding window if necessary */
+ if (strm.avail_out == 0) {
+ strm.avail_out = WINSIZE;
+ strm.next_out = window;
+ }
+
+ /* inflate until out of input, output, or at end of block --
+ update the total input and output counters */
+ totin += strm.avail_in;
+ totout += strm.avail_out;
+ ret = inflate(&strm, Z_BLOCK); /* return at end of block */
+ totin -= strm.avail_in;
+ totout -= strm.avail_out;
+ if (ret == Z_NEED_DICT)
+ ret = Z_DATA_ERROR;
+ if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
+ goto build_index_error;
+ if (ret == Z_STREAM_END)
+ break;
+
+ /* if at end of block, consider adding an index entry (note that if
+ data_type indicates an end-of-block, then all of the
+ uncompressed data from that block has been delivered, and none
+ of the compressed data after that block has been consumed,
+ except for up to seven bits) -- the totout == 0 provides an
+ entry point after the zlib or gzip header, and assures that the
+ index always has at least one access point; we avoid creating an
+ access point after the last block by checking bit 6 of data_type
+ */
+ if ((strm.data_type & 128) && !(strm.data_type & 64) &&
+ (totout == 0 || totout - last > span)) {
+ index = addpoint(index, strm.data_type & 7, totin,
+ totout, strm.avail_out, window);
+ if (index == NULL) {
+ ret = Z_MEM_ERROR;
+ goto build_index_error;
+ }
+ last = totout;
+ }
+ } while (strm.avail_in != 0);
+ } while (ret != Z_STREAM_END);
+
+ /* clean up and return index (release unused entries in list) */
+ (void)inflateEnd(&strm);
+ index = realloc(index, sizeof(struct point) * index->have);
+ index->size = index->have;
+ *built = index;
+ return index->size;
+
+ /* return error */
+ build_index_error:
+ (void)inflateEnd(&strm);
+ if (index != NULL)
+ free_index(index);
+ return ret;
+}
+
+/* Use the index to read len bytes from offset into buf, return bytes read or
+ negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
+ the end of the uncompressed data, then extract() will return a value less
+ than len, indicating how much as actually read into buf. This function
+ should not return a data error unless the file was modified since the index
+ was generated. extract() may also return Z_ERRNO if there is an error on
+ reading or seeking the input file. */
+local int extract(FILE *in, struct access *index, off_t offset,
+ unsigned char *buf, int len)
+{
+ int ret, skip;
+ z_stream strm;
+ struct point *here;
+ unsigned char input[CHUNK];
+ unsigned char discard[WINSIZE];
+
+ /* proceed only if something reasonable to do */
+ if (len < 0)
+ return 0;
+
+ /* find where in stream to start */
+ here = index->list;
+ ret = index->have;
+ while (--ret && here[1].out <= offset)
+ here++;
+
+ /* initialize file and inflate state to start there */
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.avail_in = 0;
+ strm.next_in = Z_NULL;
+ ret = inflateInit2(&strm, -15); /* raw inflate */
+ if (ret != Z_OK)
+ return ret;
+ ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET);
+ if (ret == -1)
+ goto extract_ret;
+ if (here->bits) {
+ ret = getc(in);
+ if (ret == -1) {
+ ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
+ goto extract_ret;
+ }
+ (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits));
+ }
+ (void)inflateSetDictionary(&strm, here->window, WINSIZE);
+
+ /* skip uncompressed bytes until offset reached, then satisfy request */
+ offset -= here->out;
+ strm.avail_in = 0;
+ skip = 1; /* while skipping to offset */
+ do {
+ /* define where to put uncompressed data, and how much */
+ if (offset == 0 && skip) { /* at offset now */
+ strm.avail_out = len;
+ strm.next_out = buf;
+ skip = 0; /* only do this once */
+ }
+ if (offset > WINSIZE) { /* skip WINSIZE bytes */
+ strm.avail_out = WINSIZE;
+ strm.next_out = discard;
+ offset -= WINSIZE;
+ }
+ else if (offset != 0) { /* last skip */
+ strm.avail_out = (unsigned)offset;
+ strm.next_out = discard;
+ offset = 0;
+ }
+
+ /* uncompress until avail_out filled, or end of stream */
+ do {
+ if (strm.avail_in == 0) {
+ strm.avail_in = fread(input, 1, CHUNK, in);
+ if (ferror(in)) {
+ ret = Z_ERRNO;
+ goto extract_ret;
+ }
+ if (strm.avail_in == 0) {
+ ret = Z_DATA_ERROR;
+ goto extract_ret;
+ }
+ strm.next_in = input;
+ }
+ ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
+ if (ret == Z_NEED_DICT)
+ ret = Z_DATA_ERROR;
+ if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
+ goto extract_ret;
+ if (ret == Z_STREAM_END)
+ break;
+ } while (strm.avail_out != 0);
+
+ /* if reach end of stream, then don't keep trying to get more */
+ if (ret == Z_STREAM_END)
+ break;
+
+ /* do until offset reached and requested data read, or stream ends */
+ } while (skip);
+
+ /* compute number of uncompressed bytes read after offset */
+ ret = skip ? 0 : len - strm.avail_out;
+
+ /* clean up and return bytes read or error */
+ extract_ret:
+ (void)inflateEnd(&strm);
+ return ret;
+}
+
+/* Demonstrate the use of build_index() and extract() by processing the file
+ provided on the command line, and the extracting 16K from about 2/3rds of
+ the way through the uncompressed output, and writing that to stdout. */
+int main(int argc, char **argv)
+{
+ int len;
+ off_t offset;
+ FILE *in;
+ struct access *index = NULL;
+ unsigned char buf[CHUNK];
+
+ /* open input file */
+ if (argc != 2) {
+ fprintf(stderr, "usage: zran file.gz\n");
+ return 1;
+ }
+ in = fopen(argv[1], "rb");
+ if (in == NULL) {
+ fprintf(stderr, "zran: could not open %s for reading\n", argv[1]);
+ return 1;
+ }
+
+ /* build index */
+ len = build_index(in, SPAN, &index);
+ if (len < 0) {
+ fclose(in);
+ switch (len) {
+ case Z_MEM_ERROR:
+ fprintf(stderr, "zran: out of memory\n");
+ break;
+ case Z_DATA_ERROR:
+ fprintf(stderr, "zran: compressed data error in %s\n", argv[1]);
+ break;
+ case Z_ERRNO:
+ fprintf(stderr, "zran: read error on %s\n", argv[1]);
+ break;
+ default:
+ fprintf(stderr, "zran: error %d while building index\n", len);
+ }
+ return 1;
+ }
+ fprintf(stderr, "zran: built index with %d access points\n", len);
+
+ /* use index by reading some bytes from an arbitrary offset */
+ offset = (index->list[index->have - 1].out << 1) / 3;
+ len = extract(in, index, offset, buf, CHUNK);
+ if (len < 0)
+ fprintf(stderr, "zran: extraction failed: %s error\n",
+ len == Z_MEM_ERROR ? "out of memory" : "input corrupted");
+ else {
+ fwrite(buf, 1, len, stdout);
+ fprintf(stderr, "zran: extracted %d bytes at %llu\n", len, offset);
+ }
+
+ /* clean up and exit */
+ free_index(index);
+ fclose(in);
+ return 0;
+}