Move brotlidump.py to research/ (#487)

author: Eugene Kliuchnikov <eustas@google.com> 2016-12-20 18:00:51 +0100
committer: GitHub <noreply@github.com> 2016-12-20 18:00:51 +0100
commit: fd96151b2a04667dac73242b72bc62e007b2b543 (patch)
tree: 7442df2539cd00495f6553e22ff78b4e2c5ce4d3 /python
parent: 5814438791fb2d4394b46e5682a96b68cd092803 (diff)
download: brotli-fd96151b2a04667dac73242b72bc62e007b2b543.zip
brotli-fd96151b2a04667dac73242b72bc62e007b2b543.tar.gz
brotli-fd96151b2a04667dac73242b72bc62e007b2b543.tar.bz2
1 files changed, 0 insertions, 2361 deletions
diff --git a/python/brotlidump.py b/python/brotlidump.py
deleted file mode 100644
index a625368..0000000
--- a/python/brotlidump.py
+++ /dev/null
@@ -1,2361 +0,0 @@
-#!python3
-"""Program to dump contents of Brotli compressed files showing the compression format.
-Jurjen N.E. Bos, 2016.
-I found the following issues with the Brotli format:
-- The distance alphabet has size 16+(48<<POSTFIX),
-  but the last symbols are useless.
-  It could be lowered to 16+(44-POSTFIX<<POSTFIX), and this could matter.
-- The block type code is useless if NBLTYPES==2, you would only need 1 symbol
-  anyway, so why don't you just switch to "the other" type?
-"""
-import struct
-from operator import itemgetter, methodcaller
-from itertools import accumulate, repeat
-from collections import defaultdict, deque
-from functools import partial
-
-class InvalidStream(Exception): pass
-#lookup table
-L, I, D = "literal", "insert&copy", "distance"
-pL, pI, pD = 'P'+L, 'P'+I, 'P'+D
-
-def outputCharFormatter(c):
-    """Show character in readable format
-    """
-    #TODO 2: allow hex only output
-    if 32<c<127: return chr(c)
-    elif c==10: return '\\n'
-    elif c==13: return '\\r'
-    elif c==32: return '" "'
-    else: return '\\x{:02x}'.format(c)
-
-def outputFormatter(s):
-    """Show string or char.
-    """
-    result = ''
-    def formatSubString(s):
-        for c in s:
-            if c==32: yield ' '
-            else: yield outputCharFormatter(c)
-    if len(result)<200: return ''.join(formatSubString(s))
-    else:
-        return ''.join(formatSubString(s[:100]))+'...'+ \
-               ''.join(formatSubString(s[-100:]))
-
-
-class BitStream:
-    """Represent a bytes object. Can read bits and prefix codes the way
-    Brotli does.
-    """
-    def __init__(self, byteString):
-        self.data = byteString
-        #position in bits: byte pos is pos>>3, bit pos is pos&7
-        self.pos = 0
-
-    def __repr__(self):
-        """Representation
-        >>> olleke
-        BitStream(pos=0:0)
-        """
-        return "BitStream(pos={:x}:{})".format(self.pos>>3, self.pos&7)
-
-    def read(self, n):
-        """Read n bits from the stream and return as an integer.
-        Produces zero bits beyond the stream.
-        >>> olleke.data[0]==27
-        True
-        >>> olleke.read(5)
-        27
-
-        >>> olleke
-        BitStream(pos=0:5)
-        """
-        value = self.peek(n)
-        self.pos += n
-        if self.pos>len(self.data)*8:
-            raise ValueError('Read past end of stream')
-        return value
-
-    def peek(self, n):
-        """Peek an n bit integer from the stream without updating the pointer.
-        It is not an error to read beyond the end of the stream.
-        >>> olleke.data[:2]==b'\x1b\x2e' and 0x2e1b==11803
-        True
-        >>> olleke.peek(15)
-        11803
-        >>> hex(olleke.peek(32))
-        '0x2e1b'
-        """
-        #read bytes that contain the data: self.data[self.pos>>3:self.pos+n+7>>3]
-        #convert to int: int.from_bytes(..., 'little')
-        #shift out the bits from the first byte: >>(self.pos&7)
-        #mask unwanted bits: & (1<<n)-1
-        return int.from_bytes(
-            self.data[self.pos>>3:self.pos+n+7>>3],
-            'little')>>(self.pos&7) & (1<<n)-1
-
-    def readBytes(self, n):
-        """Read n bytes from the stream on a byte boundary.
-        """
-        if self.pos&7: raise ValueError('readBytes: need byte boundary')
-        result = self.data[self.pos>>3:(self.pos>>3)+n]
-        self.pos += 8*n
-        return result
-
-#-----------------------Symbol-------------------------------------------
-class Symbol:
-    """A symbol in a code.
-    Refers back to the code that contains it.
-    Index is the place in the alphabet of the symbol.
-    """
-    __slots__ = 'code', 'index'
-    def __init__(self, code, value):
-        self.code = code
-        self.index = value
-
-    def __repr__(self):
-        return 'Symbol({}, {})'.format(self.code.name, self.index)
-
-    def __len__(self):
-        """Number of bits in the prefix notation of this symbol
-        """
-        return self.code.length(self.index)
-
-    def __int__(self):
-        return self.index
-
-    #these routines call equivalent routine in Code class
-    def bitPattern(self):
-        """Value of the symbol in the stream
-        """
-        return self.code.bitPattern(self.index)
-
-    def extraBits(self):
-        """Number of extra bits to read for this symbol
-        """
-        return self.code.extraBits(self.index)
-
-    def __str__(self):
-        """Short descriptor of the symbol without extra bits.
-        """
-        return self.code.mnemonic(self.index)
-
-    #requiring optional extra bits, if self.code supports them
-    def value(self, extra=None):
-        """The value used for processing. Can be a tuple.
-        with optional extra bits
-        """
-        if isinstance(self.code, WithExtra):
-            if not 0<=extra<1<<self.extraBits():
-                raise ValueError("value: extra value doesn't fit in extraBits")
-            return self.code.value(self.index, extra)
-        if extra is not None:
-            raise ValueError('value: no extra bits for this code')
-        return self.code.value(self.index)
-
-    def explanation(self, extra=None):
-        """Long explanation of the value from the numeric value
-        with optional extra bits
-        Used by Layout.verboseRead when printing the value
-        """
-        if isinstance(self.code, WithExtra):
-            return self.code.callback(self, extra)
-        return self.code.callback(self)
-
-#========================Code definitions==================================
-class RangeDecoder:
-    """A decoder for the Code class that assumes the symbols
-    are encoded consecutively in binary.
-    It all depends on the "alphabetSize" property.
-    The range runs from 0 to alphabetSize-1.
-    This is the default decoder.
-    """
-    def __init__(self, *, alphabetSize=None, bitLength=None, **args):
-        if bitLength is not None: alphabetSize = 1<<bitLength
-        if alphabetSize is not None:
-            self.alphabetSize = alphabetSize
-            self.maxLength = (alphabetSize-1).bit_length()
-
-    def __len__(self):
-        return self.alphabetSize
-
-    def __iter__(self):
-        """Produce all symbols.
-        """
-        return map(partial(Symbol, self), range(len(self)))
-
-    def __getitem__(self, index):
-        if index>=self.alphabetSize: raise ValueError('index out of range')
-        return Symbol(self, index)
-
-    def bitPattern(self, index):
-        return '{:0{}b}'.format(index, self.maxLength)
-
-    def length(self, index):
-        """Encoding length of given symbol.
-        Does not depend on index in this case.
-        """
-        return self.maxLength
-
-    def decodePeek(self, data):
-        """Find which symbol index matches the given data (from peek, as a number)
-        and return the number of bits decoded.
-        Can also be used to figure out length of a symbol.
-        """
-        return self.maxLength, Symbol(self, data&(1<<self.maxLength)-1)
-
-class PrefixDecoder:
-    """A decoder for the Code class that uses a prefix code.
-    The code is determined by encoding:
-    encode[p] gives the index corresponding to bit pattern p.
-    Used setDecode(decodeTable) to switch the decoder from the default
-    to a prefix decoder, or pass decodeTable at init.
-    You can also use setLength(lengthTable)
-    to define the encoding from the lengths.
-    The set of symbol values does not need to be consecutive.
-    """
-    def __init__(self, *, decodeTable=None, **args):
-        if decodeTable is not None: self.setDecode(decodeTable)
-
-    def __len__(self):
-        return len(self.decodeTable)
-
-    def __iter__(self):
-        def revBits(index):
-            return self.bitPattern(index)[::-1]
-        return (
-            Symbol(self, index)
-            for index in sorted(self.decodeTable.values(), key=revBits)
-            )
-
-    def __getitem__(self, index):
-        if index not in self.lengthTable:
-            raise ValueError('No symbol {}[{}]'.format(
-                self.__class__.__name__, index))
-        return Symbol(self, index)
-
-    def bitPattern(self, index):
-        bits = next(b for (b,s) in self.decodeTable.items() if s==index)
-        return '{:0{}b}'.format(bits, self.length(index))
-
-    def length(self, index):
-        """Encoding length of given symbol.
-        """
-        return self.lengthTable[index]
-
-    def decodePeek(self, data):
-        """Find which symbol index matches the given data (from peek, as a number)
-        and return the number of bits decoded.
-        Can also be used to figure out length of a symbol.
-        """
-        #do binary search for word length
-        #invariant: lo<=length<=hi
-        lo, hi = self.minLength, self.maxLength
-        while lo<=hi:
-            mid = lo+hi>>1
-            #note lo<=mid<hi at this point
-            mask = (1<<mid)-1
-            #lets see what happens if we guess length is mid
-            try: index = self.decodeTable[data&mask]
-            except KeyError:
-                #too many bits specified, reduce estimated length
-                hi = mid-1
-                continue
-            #we found a symbol, but there could be a longer match
-            symbolLength = self.lengthTable[index]
-            if symbolLength<=mid:
-                #all bits match, symbol must be right
-                return symbolLength, Symbol(self, index)
-            #there must be more bits to match
-            lo = mid+1
-        return lo, Symbol(self, index)
-
-    #routine to set up the tables
-    def setDecode(self, decodeTable):
-        """Store decodeTable,
-        and compute lengthTable, minLength, maxLength from encodings.
-        """
-        self.decodeTable = decodeTable
-        #set of symbols with unknown length
-        todo = set(decodeTable)
-        #bit size under investigation
-        maskLength = 0
-        lengthTable = {}
-        while todo:
-            mask = (1<<maskLength)-1
-            #split the encodings that we didn't find yet using b bits
-            splitSymbols = defaultdict(list)
-            for s in todo: splitSymbols[s&mask].append(s)
-            #unique encodings have a length of maskLength bits
-            #set length, and remove from todo list
-            for s,subset in splitSymbols.items():
-                if len(subset)==1:
-                    lengthTable[self.decodeTable[s]] = maskLength
-                    todo.remove(s)
-            #now investigate with longer mask
-            maskLength +=1
-        #save result
-        self.lengthTable = lengthTable
-        self.minLength = min(lengthTable.values())
-        self.maxLength = max(lengthTable.values())
-        self.switchToPrefix()
-
-    def setLength(self, lengthTable):
-        """Given the bit pattern lengths for symbols given in lengthTable,
-        set decodeTable, minLength, maxLength
-        """
-        self.lengthTable = lengthTable
-        self.minLength = min(lengthTable.values())
-        self.maxLength = max(lengthTable.values())
-        #compute the backwards codes first; then reverse them
-        #compute (backwards) first code for every separate lengths
-        nextCodes = []
-        #build codes for each length, from right to left
-        code = 0
-        for bits in range(self.maxLength+1):
-            code <<= 1
-            nextCodes.append(code)
-            code += sum(x==bits for x in lengthTable.values())
-        self.decodeTable = {}
-        #count codes for each length, and store reversed in the table
-        for symbol in sorted(lengthTable):
-            bits = lengthTable[symbol]
-            bitpattern = '{:0{}b}'.format(nextCodes[bits], bits)
-            self.decodeTable[int(bitpattern[::-1], 2)] = symbol
-            nextCodes[bits] += 1
-        self.switchToPrefix()
-
-    def switchToPrefix(self):
-        """This routine makes sure the prefix decoder is activated.
-        """
-        self.mode = PrefixDecoder
-
-class Code(RangeDecoder, PrefixDecoder):
-    """An alphabet of symbols, that can be read from a stream.
-    If you use setDecode or setLength, you have a prefix code,
-    otherwise you have a range code.
-    Features:
-    code[index] produces symbol with given index
-    value(index): value of symbol
-    mnemonic(index): short description of symbol
-    explanation(index): show meaning of symbol, shown in Layout.verboseRead
-    iter(code): produce all symbols in some order
-    name: show as context in Layout.verboseRead
-    """
-    name = '?'
-    #callback is a function that gets the symbol and the extra bits
-    #default callback calls explanation
-    def __init__(self, name=None, *, callback=None, description='', **args):
-        """Don't forget to set either alphabetSize or decodeTable
-        """
-        #set name when provided, otherwise take class variable
-        if name is not None: self.name = name
-        if callback is not None: self.callback = callback
-        self.description = description
-        #mode switch
-        if 'bitLength' in args or 'alphabetSize' in args:
-            self.mode = RangeDecoder
-            RangeDecoder.__init__(self, **args)
-        elif 'decodeTable' in args:
-            self.mode = PrefixDecoder
-            PrefixDecoder.__init__(self, **args)
-        else:
-            super().__init__(**args)
-
-    def __repr__(self):
-        return self.__class__.__name__+' '+self.name
-
-    #the routines that get switched between RangeDecoder and PrefixDecoder
-    def __len__(self): return self.mode.__len__(self)
-    def __iter__(self): return self.mode.__iter__(self)
-    def __getitem__(self, index): return self.mode.__getitem__(self, index)
-    def bitPattern(self, index): return self.mode.bitPattern(self, index)
-    def length(self, index): return self.mode.length(self, index)
-    def decodePeek(self, data): return self.mode.decodePeek(self, data)
-    #general routines
-    def value(self, index, extra=None):
-        """Get value of symbol for computations.
-        Override where needed.
-        """
-        if extra is not None:
-            raise ValueError('value: no extra for this symbol')
-        return index
-
-    def mnemonic(self, index):
-        """Give mnemonic of symbol.
-        Override where needed.
-        """
-        return str(self.value(index))
-
-    def callback(self, symbol):
-        return self.explanation(symbol.index)
-
-    def explanation(self, index):
-        """Long explanation of the value from the numeric value
-        This is a default routine.
-        You can customize in three ways:
-        - set description to add some text
-        - override to get more control
-        - set callback to make it dependent on you local variables
-        """
-        value = self.value(index)
-        return '{0}{1}: {2}'.format(
-            self.description and self.description+': ',
-            self.bitPattern(index),
-            value,
-            )
-
-    def extraBits(self, index):
-        return 0
-
-    #Routines that use the decode interface
-    def showCode(self, width=80):
-        """Show all words of the code in a nice format.
-        """
-        #make table of all symbols with binary strings
-        symbolStrings = [
-            (self.bitPattern(s.index), self.mnemonic(s.index))
-            for s in self
-            ]
-        #determine column widths the way Lisp programmers do it
-        leftColWidth, rightColWidth = map(max, map(
-            map,
-            repeat(len),
-            zip(*symbolStrings)
-            ))
-        colwidth = leftColWidth+rightColWidth
-        columns = 81//(colwidth+2)
-        rows = -(-len(symbolStrings)//columns)
-        def justify(bs):
-            b,s = bs
-            return b.rjust(leftColWidth)+':'+s.ljust(rightColWidth)
-        for i in range(rows):
-            print(' '.join(map(justify, symbolStrings[i::rows])).rstrip())
-
-    def readTuple(self, stream):
-        """Read symbol from stream. Returns symbol, length.
-        """
-        length, symbol = self.decodePeek(stream.peek(self.maxLength))
-        stream.pos += length
-        return length, symbol
-
-    def readTupleAndExtra(self, stream):
-        return self.readTuple(stream)+(0, None)
-
-class WithExtra(Code):
-    """Extension for Code so that symbol may have extra bits associated.
-    If you supply an extraTable, you can use extraBits
-    You can define an extraTable,
-    which allows to call extraBits to get the number of extraBits.
-    Otherwise, you can supply extraBits yourself.
-    Routine readTupleAndExtra now reads the extra bits too.
-    Value probably needs to be overridden; see Enumerator.
-    Note: this does not give you an decodeTable.
-    """
-    #redefine these if you don't want to use an extraTable
-    def extraBits(self, index):
-        """Get the number of extra bits for this symbol.
-        """
-        return self.extraTable[index]
-
-    def mnemonic(self, index):
-        """This value must be independent of extra.
-        """
-        return str(index)
-
-    def readTupleAndExtra(self, stream):
-        """Read symbol and extrabits from stream.
-        Returns symbol length, symbol, extraBits, extra
-        >>> olleke.pos = 6
-        >>> MetablockLengthAlphabet().readTupleAndExtra(olleke)
-        (2, Symbol(MLEN, 4), 16, 46)
-        """
-        length, symbol = self.decodePeek(stream.peek(self.maxLength))
-        stream.pos += length
-        extraBits = self.extraBits(symbol.index)
-        return length, symbol, extraBits, stream.read(extraBits)
-
-    def explanation(self, index, extra=None):
-        """Expanded version of Code.explanation supporting extra bits.
-        If you don't supply extra, it is not mentioned.
-        """
-        extraBits = 0 if extra is None else self.extraBits(index)
-        if not hasattr(self, 'extraTable'):
-            formatString = '{0}{3}'
-            lo = hi = value = self.value(index, extra)
-        elif extraBits==0:
-            formatString = '{0}{2}: {3}'
-            lo, hi = self.span(index)
-            value = lo
-        else:
-            formatString = '{0}{1} {2}: {3}-{4}; {3}+{5}={6}'
-            lo, hi = self.span(index)
-            value = lo+extra
-        return formatString.format(
-            self.description and self.description+': ',
-            'x'*extraBits,
-            self.bitPattern(index),
-            lo, hi,
-            extra,
-            value,
-            )
-
-    def callback(self, symbol, extra):
-        return self.explanation(symbol.index, extra)
-
-class BoolCode(Code):
-    """Same as Code(bitLength=1), but shows a boolean.
-    """
-    def __init__(self, name=None, **args):
-        super().__init__(name, bitLength=1, **args)
-
-    def value(self, index, extra=None):
-        return bool(super().value(index, extra))
-
-class Enumerator(WithExtra):
-    """Code that is defined by the ExtraTable.
-    extraTable is a class variable that contains
-    the extraBits of the symbols from 0
-    value0 contains the value of symbol 0
-    encodings is not neccessary, but allowed.
-    Note: place for FixedCode to make sure extraBits works
-    """
-    def __init__(self, name=None, **args):
-        #if there is no decodeTable to determine length, compute it ourselves
-        if 'decodeTable' not in args:
-            args['alphabetSize'] = len(self.extraTable)
-        super().__init__(name, **args)
-
-    def __len__(self):
-        return len(self.extraTable)
-
-    def __getitem__(self, index):
-        """Faster than PrefixDecoder
-        """
-        if index>=len(self.extraTable):
-            raise ValueError("No symbol {}[{}]".format(
-                self.__class__.__name__, index))
-        return Symbol(self, index)
-
-    def value(self, index, extra):
-        """Override if you don't define value0 and extraTable
-        """
-        lower, upper = self.span(index)
-        value = lower+(extra or 0)
-        if value>upper:
-            raise ValueError('value: extra out of range')
-        return value
-
-    def span(self, index):
-        """Give the range of possible values in a tuple
-        Useful for mnemonic and explanation
-        """
-        lower = self.value0+sum(1<<x for x in self.extraTable[:index])
-        upper = lower+(1<<self.extraTable[index])
-        return lower, upper-1
-
-#======================Code subclasses======================================
-#Alphabets used in the metablock header----------------------------------
-#For prefix codes
-class PrefixCodeHeader(WithExtra):
-    """Header of prefix codes.
-    """
-    def __init__(self, codename):
-        super().__init__('PFX', bitLength=2)
-        #this is the name of the code that it describes
-        self.codename = codename
-
-    def extraBits(self, index):
-        return 2 if index==1 else 0
-
-    def value(self, index, extra):
-        """Returns ('Simple', #codewords) or ('Complex', HSKIP)
-        """
-        if index==1:
-            if extra>3:
-                raise ValueError('value: extra out of range')
-            return 'Simple', extra+1
-        if extra:
-            raise ValueError('value: extra out of range')
-        return 'Complex', index
-
-    def explanation(self, index, extra):
-        if index==1:
-            return '{} is simple with {} code word{}'.format(
-                self.codename, extra+1, 's' if extra else '')
-        lengths = [1, 2, 3, 4, 0, 5, 17, 6]
-        return '{} is complex with lengths {}...'.format(
-            self.codename,
-            ','.join(
-                map(str, lengths[index:index+5]))
-            )
-
-class TreeShapeAlhabet(BoolCode):
-    """The bit used to indicate if four word code is "deep" or "wide"
-    """
-    name = 'SHAPE'
-    def value(self, index):
-        return [(2,2,2,2), (1,2,3,3)][index]
-
-    def explanation(self, index):
-        return str(bool(index))+': lengths {},{},{},{}'.format(*self.value(index))
-
-class LengthOfLengthAlphabet(Code):
-    """For use in decoding complex code descriptors.
-    >>> lengthOfLengthAlphabet = LengthOfLengthAlphabet('')
-    >>> print(lengthOfLengthAlphabet[2])
-    coded with 2 bits
-    >>> len(lengthOfLengthAlphabet[0])
-    2
-    >>> [len(lengthOfLengthAlphabet[x]) for x in range(6)]
-    [2, 4, 3, 2, 2, 4]
-    >>> lengthOfLengthAlphabet.showCode()
-      00:skipped             01:coded with 4 bits 0111:coded with 1 bits
-      10:coded with 3 bits  011:coded with 2 bits 1111:coded with 5 bits
-    """
-    decodeTable = {
-         0b00:0,     0b10:3,
-       0b0111:1,     0b01:4,
-        0b011:2,   0b1111:5,
-       }
-
-    def __init__(self, name=None, **args):
-        super().__init__(name, decodeTable=self.decodeTable, **args)
-
-    def mnemonic(self, index):
-        if index==0: return 'skipped'
-        return 'coded with {} bits'.format(index)
-
-    def explanation(self, index, extra=None):
-        return self.description+': '+self.mnemonic(index)
-
-class LengthAlphabet(WithExtra):
-    """Length of symbols
-    Used during construction of a code.
-    """
-    def __init__(self, name):
-        super().__init__(name, alphabetSize=18)
-
-    def extraBits(self, index):
-        return {16:2, 17:3}.get(index, 0)
-
-    def mnemonic(self, index):
-        if index==0: return 'unused'
-        elif index==16: return 'rep xx'
-        elif index==17: return 'zero xxx'
-        else: return 'len {}'.format(index)
-
-    def explanation(self, index, extra):
-        return self.description.format(self[index], extra)
-
-    def value(self, index, extra):
-        #the caller got the length already, so extra is enough
-        return extra
-
-#Stream header
-class WindowSizeAlphabet(Code):
-    """The alphabet used for window size in the stream header.
-    >>> WindowSizeAlphabet()[10].explanation()
-    'windowsize=(1<<10)-16=1008'
-    """
-    decodeTable = {
-        0b0100001: 10,   0b1100001: 14,   0b0011: 18,   0b1011: 22,
-        0b0110001: 11,   0b1110001: 15,   0b0101: 19,   0b1101: 23,
-        0b1000001: 12,         0b0: 16,   0b0111: 20,   0b1111: 24,
-        0b1010001: 13,   0b0000001: 17,   0b1001: 21,
-        0b0010001: None,
-        }
-
-    name = 'WSIZE'
-
-    def __init__(self, name=None):
-        super().__init__(name, decodeTable=self.decodeTable)
-
-    def value(self, index):
-        #missing value gives index None
-        if index is None: return None
-        return (1<<index)-16
-
-    def explanation(self, index):
-        return 'windowsize=(1<<{})-16={}'.format(
-            index, (1<<index)-16)
-
-#Metablock
-class MetablockLengthAlphabet(WithExtra):
-    """Used for the meta block length;
-    also indicates a block with no data
-    >>> metablockLengthAlphabet = MetablockLengthAlphabet()
-    >>> metablockLengthAlphabet[0]; str(metablockLengthAlphabet[0])
-    Symbol(MLEN, 0)
-    'empty'
-    >>> metablockLengthAlphabet[3]
-    Traceback (most recent call last):
-        ...
-    ValueError: No symbol MetablockLengthAlphabet[3]
-    >>> print(metablockLengthAlphabet[4])
-    hhhh00
-    >>> metablockLengthAlphabet[4].value(0x1000)
-    4097
-    >>> metablockLengthAlphabet[5].value(0x1000)
-    Traceback (most recent call last):
-        ...
-    InvalidStream: Zeros in high nibble of MLEN
-    >>> metablockLengthAlphabet[5].explanation(0x12345)
-    'data length: 12345h+1=74566'
-    >>> metablockLengthAlphabet.showCode()
-    00:hhhh00   10:hhhhhh10 01:hhhhh01  11:empty
-    """
-    decodeTable = {0b11:0, 0b00:4, 0b01:5, 0b10:6}
-
-    name = 'MLEN'
-    def __init__(self, name=None):
-        super().__init__(name, decodeTable=self.decodeTable)
-
-    def extraBits(self, index):
-        return index*4
-
-    def mnemonic(self, index):
-        if index==0: return 'empty'
-        return 'h'*(self.extraBits(index)//4)+self.bitPattern(index)
-
-    def value(self, index, extra):
-        extraBits = self.extraBits(index)
-        if not 0<=extra<1<<extraBits:
-            raise ValueError('value: extra out of range')
-        if index==0: return 0
-        if index>4 and extra>>extraBits-4==0: raise InvalidStream(
-            'Zeros in high nibble of MLEN')
-        return extra+1
-
-    def explanation(self, index, extra):
-        if index==0: return '11: empty block'
-        extraBits = self.extraBits(index)
-        return 'data length: {:0{}x}h+1={}'.format(extra, extraBits//4, extra+1)
-
-
-class ReservedAlphabet(BoolCode):
-    """The reserved bit that must be zero.
-    """
-    name = 'RSVD'
-    def value(self, index):
-        if index: raise ValueError('Reserved bit is not zero')
-
-    def explanation(self, index):
-        return 'Reserved (must be zero)'
-
-class FillerAlphabet(Code):
-    def __init__(self, *, streamPos):
-        super().__init__('SKIP', bitLength=(-streamPos)&7)
-
-    def explanation(self, index):
-        return '{} bit{} ignored'.format(
-            self.length(index),
-            '' if self.length(index)==1 else 's',
-            )
-
-class SkipLengthAlphabet(WithExtra):
-    """Used for the skip length in an empty metablock
-    >>> skipLengthAlphabet = SkipLengthAlphabet()
-    >>> skipLengthAlphabet[0]; str(skipLengthAlphabet[0])
-    Symbol(SKIP, 0)
-    'empty'
-    >>> skipLengthAlphabet[4]
-    Traceback (most recent call last):
-        ...
-    ValueError: index out of range
-    >>> print(skipLengthAlphabet[3])
-    hhhhhh11
-    >>> skipLengthAlphabet[2].value(0x1000)
-    4097
-    >>> skipLengthAlphabet[3].value(0x1000)
-    Traceback (most recent call last):
-        ...
-    InvalidStream: Zeros in high byte of SKIPBYTES
-    >>> skipLengthAlphabet[3].explanation(0x12345)
-    'skip length: 12345h+1=74566'
-    >>> skipLengthAlphabet.showCode()
-    00:empty    01:hh01     10:hhhh10   11:hhhhhh11
-    """
-    def __init__(self):
-        super().__init__('SKIP', bitLength=2)
-
-    def extraBits(self, index):
-        return index*8
-
-    def mnemonic(self, index):
-        if index==0: return 'empty'
-        return 'h'*(self.extraBits(index)//4)+self.bitPattern(index)
-
-    def value(self, index, extra):
-        extraBits = self.extraBits(index)
-        if not 0<=extra<1<<extraBits:
-            raise ValueError('value: extra out of range')
-        if index==0: return 0
-        if index>1 and extra>>extraBits-8==0:
-            raise InvalidStream('Zeros in high byte of SKIPBYTES')
-        return extra+1
-
-    def explanation(self, index, extra):
-        if index==0: return '00: no skip'
-        extraBits = self.extraBits(index)
-        return 'skip length: {:{}x}h+1={}'.format(extra, extraBits//8, extra+1)
-
-
-class TypeCountAlphabet(Enumerator):
-    """Used for giving block type counts and tree counts.
-    >>> TypeCountAlphabet(description='').showCode()
-       0:0            0101:xx,0101      1011:xxxxx,1011
-    0001:0001         1101:xxxxxx,1101  0111:xxx,0111
-    1001:xxxx,1001    0011:x,0011       1111:xxxxxxx,1111
-    """
-    decodeTable = {
-             0b0: 0,   0b1001: 5,
-          0b0001: 1,   0b1011: 6,
-          0b0011: 2,   0b1101: 7,
-          0b0101: 3,   0b1111: 8,
-          0b0111: 4,
-          }
-
-    value0 = 1
-    extraTable = [0, 0, 1, 2, 3, 4, 5, 6, 7]
-    name = 'BT#'
-
-    def __init__(self, name=None, *, description):
-        super().__init__(
-            name,
-            decodeTable=self.decodeTable,
-            description=description)
-
-    def mnemonic(self, index):
-        if index==0: return '0'
-        if index==1: return '0001'
-        return 'x'*(self.extraBits(index))+','+self.bitPattern(index)
-
-    def explanation(self, index, extra):
-        value = self.value(index, extra)
-        description = self.description
-        if value==1: description = description[:-1]
-        return '{}: {} {}'.format(
-            self.mnemonic(index),
-            value,
-            description)
-
-class BlockTypeAlphabet(Code):
-    """The block types; this code works for all three kinds.
-    >>> b = BlockTypeAlphabet('T', NBLTYPES=5)
-    >>> print(*(x for x in b))
-    prev +1 #0 #1 #2 #3 #4
-    """
-    def __init__(self, name, NBLTYPES, **args):
-        super().__init__(name, alphabetSize=NBLTYPES+2, **args)
-        self.NBLTYPES = NBLTYPES
-
-    def mnemonic(self, index):
-        if index==0: return 'prev'
-        elif index==1: return '+1'
-        else: return '#'+str(index-2)
-
-    def value(self, index):
-        return index-2
-
-    def explanation(self, index):
-        if index==0: return '0: previous'
-        elif index==1: return '1: increment'
-        else: return 'Set block type to: '+str(index-2)
-
-class BlockCountAlphabet(Enumerator):
-    """Block counts
-    >>> b = BlockCountAlphabet('L')
-    >>> print(b[25])
-    [24*x]: BC16625-16793840
-    """
-
-    value0 = 1
-    extraTable = [2,2,2,2,3, 3,3,3,4,4, 4,4,5,5,5, 5,6,6,7,8, 9,10,11,12,13, 24]
-    def __init__(self, name, **args):
-        super().__init__(name, alphabetSize=26, **args)
-
-    def mnemonic(self, index):
-        extraBits = self.extraBits(index)
-        return '{}: BC{}-{}'.format(
-            'x'*extraBits if index<5 else '[{}*x]'.format(extraBits),
-            *self.span(index))
-
-    def explanation(self, index, extra):
-        return 'Block count: '+super().explanation(index, extra)
-
-class DistanceParamAlphabet(WithExtra):
-    """The distance parameters NPOSTFIX and NDIRECT.
-    Although these are treated as two in the description, this is easier.
-    """
-    def __init__(self):
-        super().__init__('DIST', bitLength=2)
-
-    def extraBits(self, index):
-        return 4
-
-    def value(self, index, extra):
-        """Returns NPOSTFIX and NDIRECT<<NPOSTFIX
-        """
-        if extra>15:
-            raise ValueError('value: extra out of range')
-        return index, extra<<index
-
-    def explanation(self, index, extra):
-        return '{} postfix bits and {:04b}<<{}={} direct codes'.format(
-            index, extra, index, extra<<index)
-
-    def mnemonic(self, index):
-        return 'PF'+str(index)
-
-class LiteralContextMode(Code):
-    """For the literal context modes.
-    >>> LiteralContextMode().showCode()
-    00:LSB6   01:MSB6   10:UTF8   11:Signed
-    >>> LiteralContextMode().explanation(2)
-    'Context mode for type 9: 2(UTF8)'
-    """
-
-    def __init__(self, *, number=9):
-        super().__init__('LC'+str(number), bitLength=2)
-        self.number = number
-
-    def mnemonic(self, index):
-        return ['LSB6', 'MSB6', 'UTF8', 'Signed'][index]
-
-    def explanation(self, index):
-        return 'Context mode for type {}: {}({})'.format(
-            self.number,
-            index,
-            self.mnemonic(index))
-
-class RLEmaxAlphabet(Enumerator):
-    """Used for describing the run length encoding used for describing context maps.
-    >>> RLEmaxAlphabet().showCode()
-    0:1    1:more
-    """
-    value0 = 0
-    extraTable = [0, 4]
-    name = 'RLE#'
-
-    def mnemonic(self, index):
-        return ['1', 'more'][index]
-
-    def explanation(self, index, extra):
-        description = self.description and self.description+': '
-        if index==0: return description+'No RLE coding'
-        return '{}xxxx 1: RLEMAX={}'.format(description, extra+1)
-
-class TreeAlphabet(WithExtra):
-    """The alphabet to enumerate entries (called trees) in the context map.
-    parameters are RLEMAX and NTREES
-    >>> t = TreeAlphabet('', RLEMAX=3, NTREES=5)
-    >>> len(t)
-    8
-    >>> print(t[2])
-    xx+4 zeroes
-    >>> t[3].explanation(2)
-    '8+010=10 zeroes'
-    >>> t[0].value(0)
-    (1, 0)
-    """
-    name = 'CMI'
-    def __init__(self, name=None, *, RLEMAX, NTREES, **args):
-        super().__init__(name, alphabetSize=RLEMAX+NTREES, **args)
-        self.RLEMAX = RLEMAX
-        self.NTREES = NTREES
-
-    def extraBits(self, index):
-        if 0<index<=self.RLEMAX: return index
-        return 0
-
-    def mnemonic(self, index):
-        if index==0: return 'map #0'
-        if index<=self.RLEMAX:
-            return '{}+{} zeroes'.format('x'*index, 1<<index)
-        return 'map #{}'.format(index-self.RLEMAX)
-
-    def value(self, index, extra):
-        """Give count and value."""
-        index = index
-        if index==0: return 1, 0
-        if index<=self.RLEMAX: return (1<<index)+extra, 0
-        return 1, index-self.RLEMAX
-
-    def explanation(self, index, extra):
-        description = self.description and self.description+': '
-        if index==0: return description+'map #0'
-        if index<=self.RLEMAX:
-            return '{}+{:0{}b}={} zeroes'.format(
-                (1<<index),
-                extra, self.extraBits(index),
-                (1<<index)+extra)
-        return '{}map #{}-{}={}'.format(
-            description,
-            index, self.RLEMAX, index-self.RLEMAX)
-
-#Prefix alphabets for the data stream----------------------------------
-class LiteralAlphabet(Code):
-    """Alphabet of symbols.
-    """
-    minLength = maxLength = 8
-    def __init__(self, number):
-        super().__init__('L'+str(number), alphabetSize=1<<8)
-
-    def mnemonic(self, index):
-        return outputCharFormatter(index)
-
-    def value(self, index, extra=None):
-        return index
-
-    def explanation(self, index, extra=None):
-        return self.mnemonic(index)
-
-class InsertLengthAlphabet(Enumerator):
-    """Intern code for insert counts
-    """
-    value0 = 0
-    extraTable = [0,0,0,0,0, 0,1,1,2,2, 3,3,4,4,5, 5,6,7,8,9, 10,12,14,24]
-
-class CopyLengthAlphabet(Enumerator):
-    value0 = 2
-    extraTable = [0,0,0,0,0, 0,0,0,1,1, 2,2,3,3,4, 4,5,5,6,7, 8,9,10,24]
-
-class InsertAndCopyAlphabet(WithExtra):
-    """The insert and copy code
-    >>> for x in range(0,704,704//13):
-    ...    print('{:10b}'.format(x), InsertAndCopyAlphabet()[x])
-             0 I0C2&D=0
-        110110 I6+xC8&D=0
-       1101100 I5C22+xxx&D=0
-      10100010 I4C4
-      11011000 I3C10+x
-     100001110 I14+xxC8
-     101000100 I10+xxC22+xxx
-     101111010 I98+xxxxxC14+xx
-     110110000 I6+xC70+xxxxx
-     111100110 I1090+[10*x]C8
-    1000011100 I26+xxxC326+[8*x]
-    1001010010 I322+[8*x]C14+xx
-    1010001000 I194+[7*x]C70+xxxxx
-    1010111110 I22594+[24*x]C1094+[10*x]
-    """
-    insertLengthAlphabet = InsertLengthAlphabet(None)
-    copyLengthAlphabet = CopyLengthAlphabet(None)
-
-    def __init__(self, number=''):
-        super().__init__('IC'+str(number), bitLength=10)
-
-    def __len__(self):
-        return 704
-
-    def extraBits(self, index):
-        insertSymbol, copySymbol, dist0 = self.splitSymbol(index)
-        return InsertLengthAlphabet.extraTable[insertSymbol.index] + \
-            CopyLengthAlphabet.extraTable[copySymbol.index]
-
-    def splitSymbol(self, index):
-        """Give relevant values for computations:
-        (insertSymbol, copySymbol, dist0flag)
-        """
-        #determine insert and copy upper bits from table
-        row = [0,0,1,1,2,2,1,3,2,3,3][index>>6]
-        col = [0,1,0,1,0,1,2,0,2,1,2][index>>6]
-        #determine inserts and copy sub codes
-        insertLengthCode = row<<3 | index>>3&7
-        if row: insertLengthCode -= 8
-        copyLengthCode = col<<3 | index&7
-        return (
-            Symbol(self.insertLengthAlphabet, insertLengthCode),
-            Symbol(self.copyLengthAlphabet, copyLengthCode),
-            row==0
-            )
-
-    def mnemonic(self, index):
-        """Make a nice mnemonic
-        """
-        i,c,d0 = self.splitSymbol(index)
-        iLower, _ = i.code.span(i.index)
-        iExtra = i.extraBits()
-        cLower, _ = c.code.span(c.index)
-        cExtra = c.extraBits()
-        return 'I{}{}{}C{}{}{}{}'.format(
-            iLower,
-            '+' if iExtra else '',
-            'x'*iExtra if iExtra<6 else '[{}*x]'.format(iExtra),
-            cLower,
-            '+' if cExtra else '',
-            'x'*cExtra if cExtra<6 else '[{}*x]'.format(cExtra),
-            '&D=0' if d0 else '')
-
-    def value(self, index, extra):
-        i,c,d0 = self.splitSymbol(index)
-        iExtra = i.extraBits()
-        ce, ie = extra>>iExtra, extra&(1<<iExtra)-1
-        insert = i.value(ie)
-        copy = c.value(ce)
-        return insert, copy, d0
-
-    def explanation(self, index, extra):
-        insert, copy, d0 = self.value(index, extra)
-        if d0: return 'Literal: {}, copy: {}, same distance'.format(insert, copy)
-        else: return 'Literal: {}, copy: {}'.format(insert, copy)
-
-class DistanceAlphabet(WithExtra):
-    """Represent the distance encoding.
-    Dynamically generated alphabet.
-    This is what the documentation should have said:
-    Ignoring offsets for the moment, the "long" encoding works as follows:
-    Write the distance in binary as follows:
-    1xy..yz..z, then the distance symbol consists of n..nxz..z
-    Where:
-    n is one less than number of bits in y
-    x is a single bit
-    y..y are n+1 extra bits (encoded in the bit stream)
-    z..z is NPOSTFIX bits that are part of the symbol
-    The offsets are so as to start at the lowest useable value:
-    if 1xyyyyz = distance +(4<<POSTFIX)-NDIRECT-1
-    then n..nxz..z is symbol -NDIRECT-16
-    >>> d = DistanceAlphabet('D', NPOSTFIX=2, NDIRECT=10)
-    >>> print(d[4], d[17], d[34])
-    last-1 1 10xx00-5
-    >>> [str(d[x]) for x in range(26, 32)]
-    ['10x00-5', '10x01-5', '10x10-5', '10x11-5', '11x00-5', '11x01-5']
-    """
-    def __init__(self, number, *, NPOSTFIX, NDIRECT):
-        self.NPOSTFIX = NPOSTFIX
-        self.NDIRECT = NDIRECT
-        #set length
-        #Actually, not all symbols are used,
-        #only NDIRECT+16+(44-2*POSTFIX<<NPOSTFIX)
-        super().__init__('D'+str(number),
-            alphabetSize=self.NDIRECT+16+(48<<self.NPOSTFIX))
-
-    def extraBits(self, index):
-        """Indicate how many extra bits are needed to interpret symbol
-        >>> d = DistanceAlphabet('D', NPOSTFIX=2, NDIRECT=10)
-        >>> [d[i].extraBits() for i in range(26)]
-        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-        >>> [d[i].extraBits() for i in range(26,36)]
-        [1, 1, 1, 1, 1, 1, 1, 1, 2, 2]
-        """
-        if index<16+self.NDIRECT: return 0
-        return 1 + ((index - self.NDIRECT - 16) >> (self.NPOSTFIX + 1))
-
-    def value(self, dcode, dextra):
-        """Decode value of symbol together with the extra bits.
-        >>> d = DistanceAlphabet('D', NPOSTFIX=2, NDIRECT=10)
-        >>> d[34].value(2)
-        (0, 35)
-        """
-        if dcode<16:
-            return [(1,0),(2,0),(3,0),(4,0),
-                    (1,-1),(1,+1),(1,-2),(1,+2),(1,-3),(1,+3),
-                    (2,-1),(2,+1),(2,-2),(2,+2),(2,-3),(2,+3)
-                ][dcode]
-        if dcode<16+self.NDIRECT:
-            return (0,dcode-16)
-        #we use the original formulas, instead of my clear explanation
-        POSTFIX_MASK = (1 << self.NPOSTFIX) - 1
-        ndistbits = 1 + ((dcode - self.NDIRECT - 16) >> (self.NPOSTFIX + 1))
-        hcode = (dcode - self.NDIRECT - 16) >> self.NPOSTFIX
-        lcode = (dcode - self.NDIRECT - 16) & POSTFIX_MASK
-        offset = ((2 + (hcode & 1)) << ndistbits) - 4
-        distance = ((offset + dextra) << self.NPOSTFIX) + lcode + self.NDIRECT + 1
-        return (0,distance)
-
-    def mnemonic(self, index, verbose=False):
-        """Give mnemonic representation of meaning.
-        verbose compresses strings of x's
-        """
-        if index<16:
-            return ['last', '2last', '3last', '4last',
-                'last-1', 'last+1', 'last-2', 'last+2', 'last-3', 'last+3',
-                '2last-1', '2last+1', '2last-2', '2last+2', '2last-3', '2last+3'
-                ][index]
-        if index<16+self.NDIRECT:
-            return str(index-16)
-        #construct strings like "1xx01-15"
-        index -= self.NDIRECT+16
-        hcode = index >> self.NPOSTFIX
-        lcode = index & (1<<self.NPOSTFIX)-1
-        if self.NPOSTFIX: formatString = '1{0}{1}{2:0{3}b}{4:+d}'
-        else: formatString = '1{0}{1}{4:+d}'
-        return formatString.format(
-            hcode&1,
-            'x'*(2+hcode>>1) if hcode<13 or verbose else '[{}*x]'.format(2+hcode>>1),
-            lcode, self.NPOSTFIX,
-            self.NDIRECT+1-(4<<self.NPOSTFIX))
-
-    def explanation(self, index, extra):
-        """
-        >>> d = DistanceAlphabet('D', NPOSTFIX=2, NDIRECT=10)
-        >>> d[55].explanation(13)
-        '11[1101]01-5: [0]+240'
-        """
-        extraBits = self.extraBits(index)
-        extraString = '[{:0{}b}]'.format(extra, extraBits)
-        return '{0}: [{1[0]}]{1[1]:+d}'.format(
-            self.mnemonic(index, True).replace('x'*(extraBits or 1), extraString),
-            self.value(index, extra))
-
-#Classes for doing actual work------------------------------------------
-class ContextModeKeeper:
-    """For computing the literal context mode.
-    You feed it characters, and it computes indices in the context map.
-    """
-    def __init__(self, mode):
-        self.chars = deque([0,0], maxlen=2)
-        self.mode = mode
-
-    def setContextMode(self, mode):
-        """Switch to given context mode (0..3)"""
-        self.mode = mode
-    def getIndex(self):
-        if self.mode==0:  #LSB6
-            return self.chars[1]&0x3f
-        elif self.mode==1: #MSB6
-            return self.chars[1]>>2
-        elif self.mode==2: #UTF8: character class of previous and a bit of the second
-            p2,p1 = self.chars
-            return self.lut0[p1]|self.lut1[p2]
-        elif self.mode==3: #Signed: initial bits of last two bytes
-            p2,p1 = self.chars
-            return self.lut2[p1]<<3|self.lut2[p2]
-
-    def add(self, index):
-        """Adjust the context for output char (as int)."""
-        self.chars.append(index)
-
-    #0: control     #16: quote  #32: ,:;  #48: AEIOU
-    #4: tab/lf/cr   #20: %      #36: .    #52: BC..Z
-    #8: space       #24: (<[{   #40: =    #56: aeiou
-    #12:!#$&*+-/?@| #28: )>]}   #44: 0-9  #60: bc..z
-    lut0 = [0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
-            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-            8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
-           44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
-           12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
-           52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
-           12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
-           60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0
-           ]+[0,1]*32+[2,3]*32
-    #0: space  1:punctuation  2:digit/upper 3:lower
-    lut1 = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-             0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
-             1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
-             1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0
-           ]+[0]*96+[2]*32
-    #initial bits: 8*0, 4*0, 2*0, 1*0, 1*1, 2*1, 4*1, 8*1
-    lut2 = [0]+[1]*15+[2]*48+[3]*64+[4]*64+[5]*48+[6]*15+[7]
-    assert len(lut0)==len(lut1)==len(lut2)==256
-
-class WordList:
-    """Word list.
-    >>> WordList().word(7, 35555)
-    b'Program to '
-    """
-    NDBITS = [0,  0,  0,  0, 10, 10, 11, 11, 10, 10,
-             10, 10, 10,  9,  9,  8,  7,  7,  8,  7,
-              7,  6,  6,  5,  5]
-    def __init__(self):
-        self.file = open('dict', 'rb')
-        self.compileActions()
-
-    def word(self, size, dist):
-        """Get word
-        """
-        #split dist in index and action
-        ndbits = self.NDBITS[size]
-        index = dist&(1<<ndbits)-1
-        action = dist>>ndbits
-        #compute position in file
-        position = sum(n<<self.NDBITS[n] for n in range(4,size))+size*index
-        self.file.seek(position)
-        return self.doAction(self.file.read(size), action)
-
-    def upperCase1(self, word):
-        word = word.decode('utf8')
-        word = word[0].upper()+word[1:]
-        return word.encode('utf8')
-
-
-    #Super compact form of action table.
-    #_ means space, .U means UpperCaseAll, U(w) means UpperCaseFirst
-    actionTable = r"""
-        0:w        25:w+_for_     50:w+\n\t       75:w+. This_100:w+ize_
-        1:w+_      26:w[3:]       51:w+:          76:w+,      101:w.U+.
-        2:_+w+_    27:w[:-2]      52:_+w+._       77:.+w+_    102:\xc2\xa0+w
-        3:w[1:]    28:w+_a_       53:w+ed_        78:U(w)+(   103:_+w+,
-        4:U(w)+_   29:w+_that_    54:w[9:]        79:U(w)+.   104:U(w)+="
-        5:w+_the_  30:_+U(w)      55:w[7:]        80:w+_not_  105:w.U+="
-        6:_+w      31:w+._        56:w[:-6]       81:_+w+="   106:w+ous_
-        7:s_+w+_   32:.+w         57:w+(          82:w+er_    107:w.U+,_
-        8:w+_of_   33:_+w+,_      58:U(w)+,_      83:_+w.U+_  108:U(w)+=\'
-        9:U(w)     34:w[4:]       59:w[:-8]       84:w+al_    109:_+U(w)+,
-       10:w+_and_  35:w+_with_    60:w+_at_       85:_+w.U    110:_+w.U+="
-       11:w[2:]    36:w+\'        61:w+ly_        86:w+=\'    111:_+w.U+,_
-       12:w[:-1]   37:w+_from_    62:_the_+w+_of_ 87:w.U+"    112:_+w.U+,
-       13:,_+w+_   38:w+_by_      63:w[:-5]       88:U(w)+._  113:w.U+(
-       14:w+,_     39:w[5:]       64:w[:-9]       89:_+w+(    114:w.U+._
-       15:_+U(w)+_ 40:w[6:]       65:_+U(w)+,_    90:w+ful_   115:_+w.U+.
-       16:w+_in_   41:_the_+w     66:U(w)+"       91:_+U(w)+._116:w.U+=\'
-       17:w+_to_   42:w[:-4]      67:.+w+(        92:w+ive_   117:_+w.U+._
-       18:e_+w+_   43:w+. The_    68:w.U+_        93:w+less_  118:_+U(w)+="
-       19:w+"      44:w.U         69:U(w)+">      94:w.U+\'   119:_+w.U+=\'
-       20:w+.      45:w+_on_      70:w+="         95:w+est_   120:_+U(w)+=\'
-       21:w+">     46:w+_as_      71:_+w+.        96:_+U(w)+.
-       22:w+\n     47:w+_is_      72:.com/+w      97:w.U+">
-       23:w[:-3]   48:w[:-7]                      98:_+w+=\'
-       24:w+]      49:w[:-1]+ing_ 74:U(w)+\'      99:U(w)+,
-        """
-
-    def compileActions(self):
-        """Build the action table from the text above
-        """
-        import re
-        self.actionList = actions = [None]*121
-        #Action 73, which is too long, looks like this when expanded:
-        actions[73] = "b' the '+w+b' of the '"
-        #find out what the columns are
-        actionLines = self.actionTable.splitlines()
-        colonPositions = [m.start()
-            for m in re.finditer(':',actionLines[1])
-            ]+[100]
-        columns = [(colonPositions[i]-3,colonPositions[i+1]-3)
-            for i in range(len(colonPositions)-1)]
-        for line in self.actionTable.splitlines(keepends=False):
-            for start,end in columns:
-                action = line[start:end]
-                #skip empty actions
-                if not action or action.isspace(): continue
-                #chop it up, and check if the colon is properly placed
-                index, colon, action = action[:3], action[3], action[4:]
-                assert colon==':'
-                #remove filler spaces at right
-                action = action.rstrip()
-                #replace space symbols
-                action = action.replace('_', ' ')
-                wPos = action.index('w')
-                #add quotes around left string when present
-                #translation: any pattern from beginning, up to
-                #(but not including) a + following by a w later on
-                action = re.sub(r"^(.*)(?=\+[U(]*w)", r"b'\1'", action)
-                #add quotes around right string when present
-                #translation: anything with a w in it, followed by a +
-                #and a pattern up to the end
-                #(there is no variable lookbehind assertion,
-                #so we have to copy the pattern)
-                action = re.sub(r"(w[[:\-1\]).U]*)\+(.*)$", r"\1+b'\2'", action)
-                #expand shortcut for uppercaseAll
-                action = action.replace(".U", ".upper()")
-                #store action
-                actions[int(index)] = action
-
-    def doAction(self, w, action):
-        """Perform the proper action
-        """
-        #set environment for the UpperCaseFirst
-        U = self.upperCase1
-        return eval(self.actionList[action], locals())
-
-class Layout:
-    """Class to layout the output.
-    """
-    #display width of hexdata+bitdata
-    width = 25
-    #general
-    def __init__(self, stream):
-        self.stream = stream
-        self.bitPtr = self.width
-
-    def makeHexData(self, pos):
-        """Produce hex dump of all data containing the bits
-        from pos to stream.pos
-        """
-        firstAddress = pos+7>>3
-        lastAddress = self.stream.pos+7>>3
-        return ''.join(map('{:02x} '.format,
-            self.stream.data[firstAddress:lastAddress]))
-
-    def formatBitData(self, pos, width1, width2=0):
-        """Show formatted bit data:
-        Bytes are separated by commas
-        whole bytes are displayed in hex
-        >>> Layout(olleke).formatBitData(6, 2, 16)
-        '|00h|2Eh,|00'
-        >>> Layout(olleke).formatBitData(4, 1, 0)
-        '1'
-        """
-        result = []
-        #make empty prefix code explicit
-        if width1==0: result = ['()', ',']
-        for width in width1, width2:
-            #skip empty width2
-            if width==0: continue
-            #build result backwards in a list
-            while width>0:
-                availableBits = 8-(pos&7)
-                if width<availableBits:
-                    #read partial byte, beginning nor ending at boundary
-                    data = self.stream.data[pos>>3] >> (pos&7) & (1<<width)-1
-                    result.append('{:0{}b}'.format(data, width))
-                elif availableBits<8:
-                    #read rest of byte, ending at boundary
-                    data = self.stream.data[pos>>3] >> (pos&7)
-                    result.append('|{:0{}b}'.format(data, availableBits))
-                else:
-                    #read whole byte (in hex), beginning and ending at boundary
-                    data = self.stream.data[pos>>3]
-                    result.append('|{:02X}h'.format(data))
-                width -= availableBits
-                pos += availableBits
-            #if width overshot from the availableBits subtraction, fix it
-            pos += width
-            #add comma to separate fields
-            result.append(',')
-        #concatenate pieces, reversed, skipping the last space
-        return ''.join(result[-2::-1])
-
-    def readPrefixCode(self, alphabet):
-        """give alphabet the prefix code that is read from the stream
-        Called for the following alphabets, in this order:
-        The alphabet in question must have a "logical" order,
-        otherwise the assignment of symbols doesn't work.
-        """
-        mode, numberOfSymbols = self.verboseRead(PrefixCodeHeader(alphabet.name))
-        if mode=='Complex':
-            #for a complex code, numberOfSymbols means hskip
-            self.readComplexCode(numberOfSymbols, alphabet)
-            return alphabet
-        else:
-            table = []
-            #Set table of lengths for mnemonic function
-            lengths = [[0], [1,1], [1,2,2], '????'][numberOfSymbols-1]
-            #adjust mnemonic function of alphabet class
-            def myMnemonic(index):
-                return '{} bit{}: {}'.format(
-                    lengths[i],
-                    '' if lengths[i]==1 else 's',
-                    alphabet.__class__.mnemonic(alphabet, index)
-                    )
-            alphabet.mnemonic = myMnemonic
-            for i in range(numberOfSymbols):
-                table.append(self.verboseRead(alphabet, skipExtra=True).index)
-            #restore mnemonic
-            del alphabet.mnemonic
-            if numberOfSymbols==4:
-                #read tree shape to redefine lengths
-                lengths = self.verboseRead(TreeShapeAlhabet())
-            #construct the alphabet prefix code
-            alphabet.setLength(dict(zip(table, lengths)))
-        return alphabet
-
-    def readComplexCode(self, hskip, alphabet):
-        """Read complex code"""
-        stream = self.stream
-        #read the lengths for the length code
-        lengths = [1,2,3,4,0,5,17,6,16,7,8,9,10,11,12,13,14,15][hskip:]
-        codeLengths = {}
-        total = 0
-        lol = LengthOfLengthAlphabet('##'+alphabet.name)
-        #lengthCode will be used for coding the lengths of the new code
-        #we use it for display until now; definition comes below
-        lengthCode = LengthAlphabet('#'+alphabet.name)
-        lengthIter = iter(lengths)
-        while total<32:
-            newSymbol = next(lengthIter)
-            lol.description = str(lengthCode[newSymbol])
-            length = self.verboseRead(lol)
-            if length:
-                codeLengths[newSymbol] = length
-                total += 32>>length
-            if total>=32:
-                break
-        if total>32: raise ValueError("Stream format")
-        #Now set the encoding of the lengthCode
-        lengthCode.setLength(codeLengths)
-        print("***** Lengths for {} will be coded as:".format(alphabet.name))
-        lengthCode.showCode()
-        #Now determine the symbol lengths with the lengthCode
-        symbolLengths = {}
-        total = 0
-        lastLength = 8
-        alphabetIter = iter(alphabet)
-        while total<32768:
-            #look ahead to see what is going to happen
-            length = lengthCode.decodePeek(
-                self.stream.peek(lengthCode.maxLength))[1].index
-            #in every branch, set lengthCode.description to explanatory text
-            #lengthCode calls format(symbol, extra) with this string
-            if length==0:
-                symbol = next(alphabetIter)
-                lengthCode.description = 'symbol {} unused'.format(symbol)
-                self.verboseRead(lengthCode)
-                #unused symbol
-                continue
-            if length==16:
-                lengthCode.description = \
-                    '{1}+3 symbols of length '+str(lastLength)
-                extra = self.verboseRead(lengthCode)
-                #scan series of 16s (repeat counts)
-                #start with repeat count 2
-                repeat = 2
-                startSymbol = next(alphabetIter)
-                endSymbol = next(alphabetIter)
-                symbolLengths[startSymbol.index] = \
-                    symbolLengths[endSymbol.index] = lastLength
-                #count the two just defined symbols
-                total += 2*32768>>lastLength
-                #note: loop may end because we're there
-                #even if a 16 _appears_ to follow
-                while True:
-                    #determine last symbol
-                    oldRepeat = repeat
-                    repeat = (repeat-2<<2)+extra+3
-                    #read as many symbols as repeat increased
-                    for i in range(oldRepeat, repeat):
-                        endSymbol = next(alphabetIter)
-                        symbolLengths[endSymbol.index] = lastLength
-                    #compute new total; it may be end of loop
-                    total += (repeat-oldRepeat)*32768>>lastLength
-                    if total>=32768: break
-                    #see if there is more to do
-                    length = lengthCode.decodePeek(
-                        self.stream.peek(lengthCode.maxLength))[1].index
-                    if length!=16: break
-                    lengthCode.description = 'total {}+{{1}} symbols'.format(
-                        (repeat-2<<2)+3)
-                    extra = self.verboseRead(lengthCode)
-            elif length==17:
-                #read, and show explanation
-                lengthCode.description = '{1}+3 unused'
-                extra = self.verboseRead(lengthCode)
-                #scan series of 17s (groups of zero counts)
-                #start with repeat count 2
-                repeat = 2
-                startSymbol = next(alphabetIter)
-                endSymbol = next(alphabetIter)
-                #note: loop will not end with total==32768,
-                #since total doesn't change here
-                while True:
-                    #determine last symbol
-                    oldRepeat = repeat
-                    repeat = (repeat-2<<3)+extra+3
-                    #read as many symbols as repeat increases
-                    for i in range(repeat-oldRepeat):
-                        endSymbol = next(alphabetIter)
-                    #see if there is more to do
-                    length = lengthCode.decodePeek(
-                        self.stream.peek(lengthCode.maxLength))[1].index
-                    if length!=17: break
-                    lengthCode.description = 'total {}+{{1}} unused'.format(
-                        (repeat-2<<3)+3)
-                    extra = self.verboseRead(lengthCode)
-            else:
-                symbol = next(alphabetIter)
-                #double braces for format
-                char = str(symbol)
-                if char in '{}': char *= 2
-                lengthCode.description = \
-                    'Length for {} is {{0.index}} bits'.format(char)
-                #output is not needed (will be 0)
-                self.verboseRead(lengthCode)
-                symbolLengths[symbol.index] = length
-                total += 32768>>length
-                lastLength = length
-        assert total==32768
-        alphabet.setLength(symbolLengths)
-        print('End of table. Prefix code '+alphabet.name+':')
-        alphabet.showCode()
-
-    #stream
-    def processStream(self):
-        """Process a brotli stream.
-        """
-        print('addr  hex{:{}s}binary context explanation'.format(
-            '', self.width-10))
-        print('Stream header'.center(60, '-'))
-        self.windowSize = self.verboseRead(WindowSizeAlphabet())
-        print('Metablock header'.center(60, '='))
-        self.ISLAST = False
-        self.output = bytearray()
-        while not self.ISLAST:
-            self.ISLAST = self.verboseRead(
-                BoolCode('LAST', description="Last block"))
-            if self.ISLAST:
-                if self.verboseRead(
-                    BoolCode('EMPTY', description="Empty block")): break
-            if self.metablockLength(): continue
-            if not self.ISLAST and self.uncompressed(): continue
-            print('Block type descriptors'.center(60, '-'))
-            self.numberOfBlockTypes = {}
-            self.currentBlockCounts = {}
-            self.blockTypeCodes = {}
-            self.blockCountCodes = {}
-            for blockType in (L,I,D): self.blockType(blockType)
-            print('Distance code parameters'.center(60, '-'))
-            self.NPOSTFIX, self.NDIRECT = self.verboseRead(DistanceParamAlphabet())
-            self.readLiteralContextModes()
-            print('Context maps'.center(60, '-'))
-            self.cmaps = {}
-            #keep the number of each kind of prefix tree for the last loop
-            numberOfTrees = {I: self.numberOfBlockTypes[I]}
-            for blockType in (L,D):
-                numberOfTrees[blockType] = self.contextMap(blockType)
-            print('Prefix code lists'.center(60, '-'))
-            self.prefixCodes = {}
-            for blockType in (L,I,D):
-                self.readPrefixArray(blockType, numberOfTrees[blockType])
-            self.metablock()
-
-    #metablock header
-    def verboseRead(self, alphabet, context='', skipExtra=False):
-        """Read symbol and extra from stream and explain what happens.
-        Returns the value of the symbol
-        >>> olleke.pos = 0
-        >>> l = Layout(olleke)
-        >>> l.verboseRead(WindowSizeAlphabet())
-        0000  1b                   1011 WSIZE   windowsize=(1<<22)-16=4194288
-        4194288
-        """
-        #TODO 2: verbosity level, e.g. show only codes and maps in header
-        stream = self.stream
-        pos = stream.pos
-        if skipExtra:
-            length, symbol = alphabet.readTuple(stream)
-            extraBits, extra = 0, None
-        else:
-            length, symbol, extraBits, extra = alphabet.readTupleAndExtra(
-                stream)
-        #fields: address, hex data, binary data, name of alphabet, explanation
-        hexdata = self.makeHexData(pos)
-        addressField = '{:04x}'.format(pos+7>>3) if hexdata else ''
-        bitdata = self.formatBitData(pos, length, extraBits)
-        #bitPtr moves bitdata so that the bytes are easier to read
-        #jump back to right if a new byte starts
-        if '|' in bitdata[1:]:
-            #start over on the right side
-            self.bitPtr = self.width
-        fillWidth = self.bitPtr-(len(hexdata)+len(bitdata))
-        if fillWidth<0: fillWidth = 0
-        print('{:<5s} {:<{}s} {:7s} {}'.format(
-            addressField,
-            hexdata+' '*fillWidth+bitdata, self.width,
-            context+alphabet.name,
-            symbol if skipExtra else symbol.explanation(extra),
-            ))
-        #jump to the right if we started with a '|'
-        #because we didn't jump before printing
-        if bitdata.startswith('|'): self.bitPtr = self.width
-        else: self.bitPtr -= len(bitdata)
-        return symbol if skipExtra else symbol.value(extra)
-
-    def metablockLength(self):
-        """Read MNIBBLES and meta block length;
-        if empty block, skip block and return true.
-        """
-        self.MLEN = self.verboseRead(MetablockLengthAlphabet())
-        if self.MLEN:
-            return False
-        #empty block; skip and return False
-        self.verboseRead(ReservedAlphabet())
-        MSKIP = self.verboseRead(SkipLengthAlphabet())
-        self.verboseRead(FillerAlphabet(streamPos=self.stream.pos))
-        self.stream.pos += 8*MSKIP
-        print("Skipping to {:x}".format(self.stream.pos>>3))
-        return True
-
-    def uncompressed(self):
-        """If true, handle uncompressed data
-        """
-        ISUNCOMPRESSED = self.verboseRead(
-            BoolCode('UNCMPR', description='Is uncompressed?'))
-        if ISUNCOMPRESSED:
-            self.verboseRead(FillerAlphabet(streamPos=self.stream.pos))
-            print('Uncompressed data:')
-            self.output += self.stream.readBytes(self.MLEN)
-            print(outputFormatter(self.output[-self.MLEN:]))
-        return ISUNCOMPRESSED
-
-    def blockType(self, kind):
-        """Read block type switch descriptor for given kind of blockType."""
-        NBLTYPES = self.verboseRead(TypeCountAlphabet(
-            'BT#'+kind[0].upper(),
-            description='{} block types'.format(kind),
-            ))
-        self.numberOfBlockTypes[kind] = NBLTYPES
-        if NBLTYPES>=2:
-            self.blockTypeCodes[kind] = self.readPrefixCode(
-                BlockTypeAlphabet('BT'+kind[0].upper(), NBLTYPES))
-            self.blockCountCodes[kind] = self.readPrefixCode(
-                BlockCountAlphabet('BC'+kind[0].upper()))
-            blockCount = self.verboseRead(self.blockCountCodes[kind])
-        else:
-            blockCount = 1<<24
-        self.currentBlockCounts[kind] = blockCount
-
-    def readLiteralContextModes(self):
-        """Read literal context modes.
-        LSB6: lower 6 bits of last char
-        MSB6: upper 6 bits of last char
-        UTF8: rougly dependent on categories:
-            upper 4 bits depend on category of last char:
-                control/whitespace/space/ punctuation/quote/%/open/close/
-                comma/period/=/digits/ VOWEL/CONSONANT/vowel/consonant
-            lower 2 bits depend on category of 2nd last char:
-                space/punctuation/digit or upper/lowercase
-        signed: hamming weight of last 2 chars
-        """
-        print('Context modes'.center(60, '-'))
-        self.literalContextModes = []
-        for i in range(self.numberOfBlockTypes[L]):
-            self.literalContextModes.append(
-                self.verboseRead(LiteralContextMode(number=i)))
-
-    def contextMap(self, kind):
-        """Read context maps
-        Returns the number of differnt values on the context map
-        (In other words, the number of prefix trees)
-        """
-        NTREES = self.verboseRead(TypeCountAlphabet(
-            kind[0].upper()+'T#',
-            description='{} prefix trees'.format(kind)))
-        mapSize = {L:64, D:4}[kind]
-        if NTREES<2:
-            self.cmaps[kind] = [0]*mapSize
-        else:
-            #read CMAPkind
-            RLEMAX = self.verboseRead(RLEmaxAlphabet(
-                'RLE#'+kind[0].upper(),
-                description=kind+' context map'))
-            alphabet = TreeAlphabet('CM'+kind[0].upper(), NTREES=NTREES, RLEMAX=RLEMAX)
-            cmapCode = self.readPrefixCode(alphabet)
-            tableSize = mapSize*self.numberOfBlockTypes[kind]
-            cmap = []
-            while len(cmap)<tableSize:
-                cmapCode.description = 'map {}, entry {}'.format(
-                    *divmod(len(cmap), mapSize))
-                count, value = self.verboseRead(cmapCode)
-                cmap.extend([value]*count)
-            assert len(cmap)==tableSize
-            IMTF = self.verboseRead(BoolCode('IMTF', description='Apply inverse MTF'))
-            if IMTF:
-                self.IMTF(cmap)
-            if kind==L:
-                print('Context maps for literal data:')
-                for i in range(0, len(cmap), 64):
-                    print(*(
-                        ''.join(map(str, cmap[j:j+8]))
-                        for j in range(i, i+64, 8)
-                        ))
-            else:
-                print('Context map for distances:')
-                print(*(
-                    ''.join(map('{:x}'.format, cmap[i:i+4]))
-                    for i in range(0, len(cmap), 4)
-                    ))
-            self.cmaps[kind] = cmap
-        return NTREES
-
-    @staticmethod
-    def IMTF(v):
-        """In place inverse move to front transform.
-        """
-        #mtf is initialized virtually with range(infinity)
-        mtf = []
-        for i, vi in enumerate(v):
-            #get old value from mtf. If never seen, take virtual value
-            try: value = mtf.pop(vi)
-            except IndexError: value = vi
-            #put value at front
-            mtf.insert(0, value)
-            #replace transformed value
-            v[i] = value
-
-    def readPrefixArray(self, kind, numberOfTrees):
-        """Read prefix code array"""
-        prefixes = []
-        for i in range(numberOfTrees):
-            if kind==L: alphabet = LiteralAlphabet(i)
-            elif kind==I: alphabet = InsertAndCopyAlphabet(i)
-            elif kind==D: alphabet = DistanceAlphabet(
-                i, NPOSTFIX=self.NPOSTFIX, NDIRECT=self.NDIRECT)
-            self.readPrefixCode(alphabet)
-            prefixes.append(alphabet)
-        self.prefixCodes[kind] = prefixes
-
-    #metablock data
-    def metablock(self):
-        """Process the data.
-        Relevant variables of self:
-        numberOfBlockTypes[kind]: number of block types
-        currentBlockTypes[kind]: current block types (=0)
-        literalContextModes: the context modes for the literal block types
-        currentBlockCounts[kind]: counters for block types
-        blockTypeCodes[kind]: code for block type
-        blockCountCodes[kind]: code for block count
-        cmaps[kind]: the context maps (not for I)
-        prefixCodes[kind][#]: the prefix codes
-        lastDistances: the last four distances
-        lastChars: the last two chars
-        output: the result
-        """
-        print('Meta block contents'.center(60, '='))
-        self.currentBlockTypes = {L:0, I:0, D:0, pL:1, pI:1, pD:1}
-        self.lastDistances = deque([17,16,11,4], maxlen=4)
-        #the current context mode is for block type 0
-        self.contextMode = ContextModeKeeper(self.literalContextModes[0])
-        wordList = WordList()
-
-        #setup distance callback function
-        def distanceCallback(symbol, extra):
-            "callback function for displaying decoded distance"
-            index, offset = symbol.value(extra)
-            if index:
-                #recent distance
-                distance = self.lastDistances[-index]+offset
-                return 'Distance: {}last{:+d}={}'.format(index, offset, distance)
-            #absolute value
-            if offset<=maxDistance:
-                return 'Absolute value: {} (pos {})'.format(offset, maxDistance-offset)
-            #word list value
-            action, word = divmod(offset-maxDistance, 1<<wordList.NDBITS[copyLen])
-            return '{}-{} gives word {},{} action {}'.format(
-                offset, maxDistance, copyLen, word, action)
-        for dpc in self.prefixCodes[D]: dpc.callback = distanceCallback
-
-        blockLen = 0
-        #there we go
-        while blockLen<self.MLEN:
-            #get insert&copy command
-            litLen, copyLen, dist0Flag = self.verboseRead(
-                self.prefixCodes[I][
-                    self.figureBlockType(I)])
-            #literal data
-            for i in range(litLen):
-                bt = self.figureBlockType(L)
-                cm = self.contextMode.getIndex()
-                ct = self.cmaps[L][bt<<6|cm]
-                char = self.verboseRead(
-                    self.prefixCodes[L][ct],
-                    context='{},{}='.format(bt,cm))
-                self.contextMode.add(char)
-                self.output.append(char)
-            blockLen += litLen
-            #check if we're done
-            if blockLen>=self.MLEN: return
-            #distance
-            #distances are computed relative to output length, at most window size
-            maxDistance = min(len(self.output), self.windowSize)
-            if dist0Flag:
-                distance = self.lastDistances[-1]
-            else:
-                bt = self.figureBlockType(D)
-                cm = {2:0, 3:1, 4:2}.get(copyLen, 3)
-                ct = self.cmaps[D][bt<<2|cm]
-                index, offset = self.verboseRead(
-                    self.prefixCodes[D][ct],
-                    context='{},{}='.format(bt,cm))
-                distance = self.lastDistances[-index]+offset if index else offset
-                if index==1 and offset==0:
-                    #to make sure distance is not put in last distance list
-                    dist0Flag = True
-            if distance<=maxDistance:
-                #copy from output
-                for i in range(
-                        maxDistance-distance,
-                        maxDistance-distance+copyLen):
-                    self.output.append(self.output[i])
-                if not dist0Flag: self.lastDistances.append(distance)
-                comment = 'Seen before'
-            else:
-                #fetch from wordlist
-                newWord = wordList.word(copyLen, distance-maxDistance-1)
-                self.output.extend(newWord)
-                #adjust copyLen to reflect actual new data
-                copyLen = len(newWord)
-                comment = 'From wordlist'
-            blockLen += copyLen
-            print(' '*40,
-                comment,
-                ': "',
-                outputFormatter(self.output[-copyLen:]),
-                '"',
-                sep='')
-            self.contextMode.add(self.output[-2])
-            self.contextMode.add(self.output[-1])
-
-    def figureBlockType(self, kind):
-        counts, types = self.currentBlockCounts, self.currentBlockTypes
-        if counts[kind]==0:
-            newType = self.verboseRead(self.blockTypeCodes[kind])
-            if newType==-2: newType = types['P'+kind]
-            elif newType==-1:
-                newType = (types[kind]+1)%self.numberOfBlockTypes[kind]
-            types['P'+kind] = types[kind]
-            types[kind] = newType
-            counts[kind] = self.verboseRead(self.blockCountCodes[kind])
-        counts[kind] -=1
-        return types[kind]
-
-__test__ = {
-'BitStream': """
-    >>> bs = BitStream(b'Jurjen')
-    >>> bs.readBytes(2)
-    b'Ju'
-    >>> bs.read(6) #r=01110010
-    50
-    >>> bs
-    BitStream(pos=2:6)
-    >>> bs.peek(5)  #j=01101010
-    9
-    >>> bs.readBytes(2)
-    Traceback (most recent call last):
-        ...
-    ValueError: readBytes: need byte boundary
-    """,
-
-'Symbol': """
-    >>> a=Symbol(MetablockLengthAlphabet(),5)
-    >>> len(a)
-    2
-    >>> int(a)
-    5
-    >>> a.bitPattern()
-    '01'
-    >>> a.value(200000)
-    200001
-    >>> a.explanation(300000)
-    'data length: 493e0h+1=300001'
-    """,
-
-'RangeDecoder': """
-    >>> a=RangeDecoder(bitLength=3)
-    >>> len(a)
-    8
-    >>> a.name='t'
-    >>> list(a)
-    [Symbol(t, 0), Symbol(t, 1), Symbol(t, 2), Symbol(t, 3), Symbol(t, 4), Symbol(t, 5), Symbol(t, 6), Symbol(t, 7)]
-    >>> a[2]
-    Symbol(t, 2)
-    >>> a.bitPattern(4)
-    '100'
-    >>> a.length(2)
-    3
-    >>> a.decodePeek(15)
-    (3, Symbol(t, 7))
-    >>>
-
-    """,
-
-'PrefixDecoder': """
-    >>> a=PrefixDecoder(decodeTable={0:1,1:2,3:3,7:4})
-    >>> len(a)
-    4
-    >>> a.name='t'
-    >>> list(a)
-    [Symbol(t, 1), Symbol(t, 2), Symbol(t, 3), Symbol(t, 4)]
-    >>> a.decodePeek(22)
-    (1, Symbol(t, 1))
-    >>> a.decodePeek(27)
-    (3, Symbol(t, 3))
-    >>> a.length(1)
-    1
-    >>> a.length(4)
-    3
-    """,
-
-'Code': """
-    >>> a=Code('t',alphabetSize=10)
-    >>> len(a)
-    10
-    >>> a.showCode()
-    0000:0 0001:1 0010:2 0011:3 0100:4 0101:5 0110:6 0111:7 1000:8 1001:9
-    >>> a.setLength({2:1,3:2,5:3,6:3})
-    >>> a.showCode()
-      0:2  01:3 011:5 111:6
-    >>> len(a)
-    4
-    >>> def callback(i): return 'call{}back'.format(i)
-    >>> a=Code('t',callback=callback,bitLength=3)
-    >>> a[6].explanation()
-    'call6back'
-    """,
-
-'WithExtra': """
-    >>> class A(WithExtra):
-    ...    extraTable = [0,1,1,2,2]
-    >>> a=A('t',alphabetSize=5)
-    >>> a[1]
-    Symbol(t, 1)
-    >>> a.extraBits(2)
-    1
-    >>> a.mnemonic(4)
-    '4'
-    >>> a.readTupleAndExtra(BitStream(b'\x5b'))
-    (3, Symbol(t, 3), 2, 3)
-    """,
-
-'BoolCode': """
-    >>> BoolCode('test')[0].explanation()
-    '0: False'
-    """,
-
-'Enumerator': """
-    >>> class A(Enumerator):
-    ...    extraTable = [0,1,1,2,2]
-    ...    value0=3
-    >>> a=A(alphabetLength=5)
-    >>> a.value(3)
-    Traceback (most recent call last):
-        ...
-    TypeError: value() missing 1 required positional argument: 'extra'
-    >>> a.explanation(3,4)
-    'xx 011: 8-11; 8+4=12'
-    """,
-
-'WindowSizeAlphabet': """
-    >>> windowSizeAlphabet = WindowSizeAlphabet()
-    >>> windowSizeAlphabet[0]
-    Traceback (most recent call last):
-        ...
-    ValueError: No symbol WindowSizeAlphabet[0]
-    >>> len(windowSizeAlphabet)
-    16
-    >>> windowSizeAlphabet[21]
-    Symbol(WSIZE, 21)
-    >>> windowSizeAlphabet[21].bitPattern()
-    '1001'
-    >>> windowSizeAlphabet[21].extraBits()
-    0
-    >>> windowSizeAlphabet[21].index
-    21
-    >>> windowSizeAlphabet[10].value()
-    1008
-    >>> windowSizeAlphabet[10].explanation()
-    'windowsize=(1<<10)-16=1008'
-    >>> windowSizeAlphabet.showCode()
-          0:65520    1100001:16368    1110001:32752       0011:262128
-    0000001:131056   0010001:None        1001:2097136     1011:4194288
-    1000001:4080     1010001:8176        0101:524272      0111:1048560
-    0100001:1008     0110001:2032        1101:8388592     1111:16777200
-    """,
-
-'TypeCountAlphabet': """
-    >>> typeCountAlphabet = TypeCountAlphabet(description='bananas')
-    >>> len(typeCountAlphabet)
-    9
-    >>> typeCountAlphabet[3]
-    Symbol(BT#, 3)
-    >>> typeCountAlphabet[9]
-    Traceback (most recent call last):
-        ...
-    ValueError: No symbol TypeCountAlphabet[9]
-    >>> print(typeCountAlphabet[3])
-    xx,0101
-    >>> typeCountAlphabet[8].value(127)
-    256
-    >>> typeCountAlphabet[4].explanation(2)
-    'xxx,0111: 11 bananas'
-    >>> typeCountAlphabet[0].explanation()
-    '0: 1 banana'
-    """,
-
-'DistanceParamAlphabet': """
-    >>> dpa = DistanceParamAlphabet()
-    >>> dpa.showCode()
-    00:PF0 01:PF1 10:PF2 11:PF3
-    >>> dpa.readTupleAndExtra(BitStream(b'\\x29'))
-    (2, Symbol(DIST, 1), 4, 10)
-    >>> dpa.explanation(2, 5)
-    '2 postfix bits and 0101<<2=20 direct codes'
-    """,
-
-'LiteralAlphabet': """
-    >>> LiteralAlphabet(-1).showCode()   #doctest: +ELLIPSIS
-    00000000:\\x00 00110100:4    01101000:h    10011100:\\x9c 11010000:\\xd0
-    00000001:\\x01 00110101:5    01101001:i    10011101:\\x9d 11010001:\\xd1
-    00000010:\\x02 00110110:6    01101010:j    10011110:\\x9e 11010010:\\xd2
-    ...
-    00101111:/    01100011:c    10010111:\\x97 11001011:\\xcb 11111111:\\xff
-    00110000:0    01100100:d    10011000:\\x98 11001100:\\xcc
-    00110001:1    01100101:e    10011001:\\x99 11001101:\\xcd
-    00110010:2    01100110:f    10011010:\\x9a 11001110:\\xce
-    00110011:3    01100111:g    10011011:\\x9b 11001111:\\xcf
-    """,
-
-'BlockCountAlphabet': """
-    >>> bc=BlockCountAlphabet('BCL')
-    >>> len(bc)
-    26
-    >>> bs=BitStream(b'\\x40\\x83\\xc8\\x59\\12\\x02')
-    >>> x = bc.readTupleAndExtra(bs); x[1].explanation(x[3])
-    'Block count: xx 00000: 1-4; 1+2=3'
-    >>> x = bc.readTupleAndExtra(bs); x[1].explanation(x[3])
-    'Block count: xxx 00110: 33-40; 33+0=33'
-    >>> x = bc.readTupleAndExtra(bs); x[1].explanation(x[3])
-    'Block count: xxxxxx 10001: 305-368; 305+28=333'
-    >>> x = bc.readTupleAndExtra(bs); x[1].explanation(x[3])
-    'Block count: xxxxxxxxxxx 10110: 2289-4336; 2289+1044=3333'
-    """,
-
-'Layout': """
-    >>> olleke.pos = 0
-    >>> l = Layout(olleke)
-    >>> l.verboseRead(WindowSizeAlphabet())
-    0000  1b                   1011 WSIZE   windowsize=(1<<22)-16=4194288
-    4194288
-    >>> l.verboseRead(BoolCode('LAST', description="Last block"))
-                              1     LAST    Last block: 1: True
-    True
-    >>> l.verboseRead(BoolCode('EMPTY', description="Empty block"))
-                             0      EMPTY   Empty block: 0: False
-    False
-    >>> l.verboseRead(MetablockLengthAlphabet())
-    0001  2e 00        |00h|2Eh,|00 MLEN    data length: 002eh+1=47
-    47
-    >>> olleke.pos = 76
-    >>> l = Layout(olleke)
-    >>> x = l.verboseRead(DistanceAlphabet(0,NPOSTFIX=0,NDIRECT=0), skipExtra=True)
-    000a  82                10|1100 D0      10[15*x]-3
-    >>> x.explanation(0x86a3)
-    '10[1000011010100011]-3: [0]+100000'
-    """,
-
-'olleke': """
-    >>> olleke.pos = 0
-    >>> try: Layout(olleke).processStream()
-    ... except NotImplementedError: pass
-    ... #doctest: +REPORT_NDIFF
-    addr  hex               binary context explanation
-    -----------------------Stream header------------------------
-    0000  1b                   1011 WSIZE   windowsize=(1<<22)-16=4194288
-    ======================Metablock header======================
-                              1     LAST    Last block: 1: True
-                             0      EMPTY   Empty block: 0: False
-    0001  2e 00        |00h|2Eh,|00 MLEN    data length: 002eh+1=47
-    -------------------Block type descriptors-------------------
-    0003  00                      0 BT#L    0: 1 literal block type
-                                 0  BT#I    0: 1 insert&copy block type
-                                0   BT#D    0: 1 distance block type
-    ------------------Distance code parameters------------------
-    0004  44               0|000,00 DIST    0 postfix bits and 0000<<0=0 direct codes
-    -----------------------Context modes------------------------
-                         10         LC0     Context mode for type 0: 2(UTF8)
-    ------------------------Context maps------------------------
-                        0           LT#     0: 1 literal prefix tree
-                       0            DT#     0: 1 distance prefix tree
-    ---------------------Prefix code lists----------------------
-                     10             PFX     L0 is complex with lengths 3,4,0,5,17...
-    0005  4f                    1|0 ##L0    len 3: coded with 3 bits
-                            0111    ##L0    len 4: coded with 1 bits
-                          10        ##L0    unused: coded with 3 bits
-    0006  d6                    0|0 ##L0    len 5: skipped
-                             011    ##L0    zero xxx: coded with 2 bits
-    ***** Lengths for L0 will be coded as:
-      0:len 4     01:zero xxx 011:unused   111:len 3
-    0007  95                1|11,01 #L0     7+3 unused
-                           0        #L0     Length for \\n is 4 bits
-                     001,01         #L0     1+3 unused
-    0008  44                010,0|1 #L0     total 19+2 unused
-                           0        #L0     Length for " " is 4 bits
-                          0         #L0     Length for ! is 4 bits
-    0009  cb                011,|01 #L0     3+3 unused
-                     |110,01        #L0     total 35+6 unused
-    000a  82                      0 #L0     Length for K is 4 bits
-                            000,01  #L0     0+3 unused
-                           0        #L0     Length for O is 4 bits
-    000b  4d                   01|1 #L0     symbol P unused
-                            011     #L0     symbol Q unused
-                           0        #L0     Length for R is 4 bits
-    000c  88                000,|01 #L0     0+3 unused
-                     |100,01        #L0     total 11+4 unused
-    000d  b6                      0 #L0     Length for b is 4 bits
-                               011  #L0     symbol c unused
-                            011     #L0     symbol d unused
-    000e  27                   11|1 #L0     Length for e is 3 bits
-                         010,01     #L0     2+3 unused
-                       |0           #L0     Length for k is 4 bits
-    000f  1f                    111 #L0     Length for l is 3 bits
-                             011    #L0     symbol m unused
-                            0       #L0     Length for n is 4 bits
-                          |0        #L0     Length for o is 4 bits
-    0010  c1                 000,01 #L0     0+3 unused
-                            0       #L0     Length for s is 4 bits
-    0011  b4                   0|11 #L0     symbol t unused
-                              0     #L0     Length for u is 4 bits
-    End of table. Prefix code L0:
-     000:e   0010:\\n  0110:!   0001:O   0101:b   0011:n   0111:s
-     100:l   1010:" " 1110:K   1001:R   1101:k   1011:o   1111:u
-                         11,01      PFX     IC0 is simple with 4 code words
-    0012  2a                |2Ah|10 IC0     ? bits: I5C4
-    0013  b5 ec              00|B5h IC0     ? bits: I6+xC7
-    0015  22            0010|111011 IC0     ? bits: I8+xC5
-    0016  8c            001100|0010 IC0     ? bits: I0C14+xx
-                       0            SHAPE   False: lengths 2,2,2,2
-    0017  74                 10,0|1 PFX     D0 is simple with 3 code words
-    0018  a6                0|01110 D0      1 bit: 2last-3
-                      010011        D0      2 bits: 11xx-3
-    0019  aa                01010|1 D0      2 bits: 11xxx-3
-    ====================Meta block contents=====================
-                       |1,01        IC0     Literal: 9, copy: 5
-    001a  41                   0001 0,0=L0  O
-                            100     0,48=L0 l
-    001b  a2                   10|0 0,62=L0 l
-                            000     0,63=L0 e
-    001c  a1                  1|101 0,59=L0 k
-                           000      0,63=L0 e
-                      |1010         0,59=L0 " "
-    001d  b5                   0101 0,11=L0 b
-                          |1011     0,60=L0 o
-    001e  24                      0 0,3=D0  Distance: 2last-3=8
-                                            Seen before: "lleke"
-                              0,10  IC0     Literal: 6, copy: 7
-                         |0010      0,59=L0 \\n
-    001f  89                   1001 0,7=L0  R
-                            000     0,52=L0 e
-    0020  fa                  010|1 0,58=L0 b
-                          1111      0,63=L0 u
-    0021  eb                  011|1 0,59=L0 s
-                         11,01      0,3=D0  Absolute value: 12 (pos 8)
-                                            Seen before: "olleke\\n"
-    0022  db                 01,1|1 IC0     Literal: 0, copy: 15
-                      |110,11       0,3=D0  Absolute value: 27 (pos 0)
-                                            Seen before: "Olleke bolleke\\n"
-    0023  f8                     00 IC0     Literal: 5, copy: 4
-                             1110   0,7=L0  K
-    0024  2c                  00|11 0,52=L0 n
-                          1011      0,62=L0 o
-    0025  0d                   1|00 0,59=L0 l
-                           0110     0,63=L0 !
-    """,
-
-'file': """
-    >>> try: Layout(BitStream(
-    ... open("H:/Downloads/brotli-master/tests/testdata/10x10y.compressed",'rb')
-    ...     .read())).processStream()
-    ... except NotImplementedError: pass
-    addr  hex               binary context explanation
-    -----------------------Stream header------------------------
-    0000  1b                   1011 WSIZE   windowsize=(1<<22)-16=4194288
-    ======================Metablock header======================
-                              1     LAST    Last block: 1: True
-                             0      EMPTY   Empty block: 0: False
-    0001  13 00        |00h|13h,|00 MLEN    data length: 0013h+1=20
-    -------------------Block type descriptors-------------------
-    0003  00                      0 BT#L    0: 1 literal block type
-                                 0  BT#I    0: 1 insert&copy block type
-                                0   BT#D    0: 1 distance block type
-    ------------------Distance code parameters------------------
-    0004  a4               0|000,00 DIST    0 postfix bits and 0000<<0=0 direct codes
-    -----------------------Context modes------------------------
-                         10         LC0     Context mode for type 0: 2(UTF8)
-    ------------------------Context maps------------------------
-                        0           LT#     0: 1 literal prefix tree
-                       0            DT#     0: 1 distance prefix tree
-    ---------------------Prefix code lists----------------------
-    0005  b0                 0|1,01 PFX     L0 is simple with 2 code words
-    0006  b2              0|1011000 L0      1 bit: X
-    0007  ea              0|1011001 L0      1 bit: Y
-                     01,01          PFX     IC0 is simple with 2 code words
-    0008  81            0000001|111 IC0     1 bit: I1C9&D=0
-    0009  47 02             0|47h|1 IC0     1 bit: I1C9
-                       00,01        PFX     D0 is simple with 1 code word
-    000b  8a                010|000 D0      0 bits: 10x-3
-    ====================Meta block contents=====================
-                           1        IC0     Literal: 1, copy: 9
-                          0         0,0=L0  X
-                      0,()          0,3=D0  Absolute value: 1 (pos 0)
-                                            Seen before: "XXXXXXXXX"
-                     0              IC0     Literal: 1, copy: 9, same distance
-                   |1               0,54=L0 Y
-                                            Seen before: "YYYYYYYYY"
-    """,
-
-'XY': """
-    >>> try: Layout(BitStream(brotli.compress('X'*10+'Y'*10))).processStream()
-    ... except NotImplementedError: pass
-    addr  hex               binary context explanation
-    -----------------------Stream header------------------------
-    0000  1b                   1011 WSIZE   windowsize=(1<<22)-16=4194288
-    ======================Metablock header======================
-                              1     LAST    Last block: 1: True
-                             0      EMPTY   Empty block: 0: False
-    0001  13 00        |00h|13h,|00 MLEN    data length: 0013h+1=20
-    -------------------Block type descriptors-------------------
-    0003  00                      0 BT#L    0: 1 literal block type
-                                 0  BT#I    0: 1 insert&copy block type
-                                0   BT#D    0: 1 distance block type
-    ------------------Distance code parameters------------------
-    0004  a4               0|000,00 DIST    0 postfix bits and 0000<<0=0 direct codes
-    -----------------------Context modes------------------------
-                         10         LC0     Context mode for type 0: 2(UTF8)
-    ------------------------Context maps------------------------
-                        0           LT#     0: 1 literal prefix tree
-                       0            DT#     0: 1 distance prefix tree
-    ---------------------Prefix code lists----------------------
-    0005  b0                 0|1,01 PFX     L0 is simple with 2 code words
-    0006  b2              0|1011000 L0      1 bit: X
-    0007  82              0|1011001 L0      1 bit: Y
-                     00,01          PFX     IC0 is simple with 1 code word
-    0008  84            0000100|100 IC0     0 bits: I4C6&D=0
-    0009  00                 00,0|1 PFX     D0 is simple with 1 code word
-    000a  e0                0|00000 D0      0 bits: last
-    ====================Meta block contents=====================
-                          ()        IC0     Literal: 4, copy: 6, same distance
-                         0          0,0=L0  X
-                        0           0,52=L0 X
-                       0            0,54=L0 X
-                      0             0,54=L0 X
-                                            Seen before: "XXXXXX"
-                    ()              IC0     Literal: 4, copy: 6, same distance
-                   1                0,54=L0 Y
-                  1                 0,54=L0 Y
-                |1                  0,54=L0 Y
-    000b  01                      1 0,54=L0 Y
-                                            Seen before: "YYYYYY"
-    """,
-
-'empty': """
-    >>> try: Layout(BitStream(b'\\x81\\x16\\x00\\x58')).processStream()
-    ... except NotImplementedError: pass
-    addr  hex               binary context explanation
-    -----------------------Stream header------------------------
-    0000  81                0000001 WSIZE   windowsize=(1<<17)-16=131056
-    ======================Metablock header======================
-                          |1        LAST    Last block: 1: True
-    0001  16                      0 EMPTY   Empty block: 0: False
-                                11  MLEN    11: empty block
-                               0    RSVD    Reserved (must be zero)
-    0002  00           000000|00,01 SKIP    skip length: 0h+1=1
-                    |00             SKIP    2 bits ignored
-    Skipping to 4
-    """,
-
-}
-
-if __name__=='__main__':
-    import sys
-    if len(sys.argv)>1:
-        l = Layout(BitStream(open(sys.argv[1],'rb').read()))
-        l.processStream()
-    else:
-        sys.path.append("h:/Persoonlijk/bin")
-        try:
-            import brotli
-            open('brotlidump.br', 'wb').write(
-                brotli.compress(
-                    open('brotlidump.py', 'r').read()
-                ))
-            olleke = BitStream(brotli.compress(
-                'Olleke bolleke\nRebusolleke\nOlleke bolleke\nKnol!'))
-        except ImportError: pass
-        import doctest
-        doctest.testmod(optionflags=doctest.REPORT_NDIFF
-            #|doctest.FAIL_FAST
-            )
author	Eugene Kliuchnikov <eustas@google.com>	2016-12-20 18:00:51 +0100
committer	GitHub <noreply@github.com>	2016-12-20 18:00:51 +0100
commit	fd96151b2a04667dac73242b72bc62e007b2b543 (patch)
tree	7442df2539cd00495f6553e22ff78b4e2c5ce4d3 /python
parent	5814438791fb2d4394b46e5682a96b68cd092803 (diff)
download	brotli-fd96151b2a04667dac73242b72bc62e007b2b543.zip brotli-fd96151b2a04667dac73242b72bc62e007b2b543.tar.gz brotli-fd96151b2a04667dac73242b72bc62e007b2b543.tar.bz2