From b5033d0e1e91d5c17e6b1079242e821e1bf3bdeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Ch=C3=BDlek?= Date: Thu, 8 Feb 2018 12:48:24 +0100 Subject: Fix brotlidump.py crashing when complex prefix code has exactly 1 non-zero code length (#635) According to the format specification regarding complex prefix codes: > If there are at least two non-zero code lengths, any trailing zero > code lengths are omitted, i.e., the last code length in the > sequence must be non-zero. In this case, the sum of (32 >> code > length) over all the non-zero code lengths must equal to 32. > If the lengths have been read for the entire code length alphabet > and there was only one non-zero code length, then the prefix code > has one symbol whose code has zero length. The script does not handle a case where there is just 1 non-zero code length where the sum rule doesn't apply, which causes a StopIteration exception when it attempts to read past the list boundaries. An example of such file is tests/testdata/mapsdatazrh.compressed. I made sure this change doesn't break anything by processing all *.compressed files from the testdata folder with no thrown exceptions. --- research/brotlidump.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'research') diff --git a/research/brotlidump.py b/research/brotlidump.py index a625368..7018934 100644 --- a/research/brotlidump.py +++ b/research/brotlidump.py @@ -1467,16 +1467,17 @@ class Layout: #we use it for display until now; definition comes below lengthCode = LengthAlphabet('#'+alphabet.name) lengthIter = iter(lengths) - while total<32: + lengthsLeft = len(lengths) + while total<32 and lengthsLeft>0: + lengthsLeft -= 1 newSymbol = next(lengthIter) lol.description = str(lengthCode[newSymbol]) length = self.verboseRead(lol) if length: codeLengths[newSymbol] = length total += 32>>length - if total>=32: - break if total>32: raise ValueError("Stream format") + if len(codeLengths)==1: codeLengths[list(codeLengths.keys())[0]] = 0 #Now set the encoding of the lengthCode lengthCode.setLength(codeLengths) print("***** Lengths for {} will be coded as:".format(alphabet.name)) -- cgit v1.1