diff options
Diffstat (limited to 'java/org/brotli/enc/PreparedDictionaryGenerator.java')
-rw-r--r-- | java/org/brotli/enc/PreparedDictionaryGenerator.java | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/java/org/brotli/enc/PreparedDictionaryGenerator.java b/java/org/brotli/enc/PreparedDictionaryGenerator.java new file mode 100644 index 0000000..1257b92 --- /dev/null +++ b/java/org/brotli/enc/PreparedDictionaryGenerator.java @@ -0,0 +1,185 @@ +/* Copyright 2017 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +package org.brotli.enc; + +import java.nio.Buffer; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.IntBuffer; +import java.nio.ShortBuffer; + +/** + * Java prepared (raw) dictionary producer. + */ +public class PreparedDictionaryGenerator { + + private static final int MAGIC = 0xDEBCEDE0; + private static final long HASH_MULTIPLIER = 0x1fe35a7bd3579bd3L; + + private static class PreparedDictionaryImpl implements PreparedDictionary { + private final ByteBuffer data; + + private PreparedDictionaryImpl(ByteBuffer data) { + this.data = data; + } + + @Override + public ByteBuffer getData() { + return data; + } + } + + // Disallow instantiation. + private PreparedDictionaryGenerator() { } + + public static PreparedDictionary generate(ByteBuffer src) { + return generate(src, 17, 3, 40, 5); + } + + public static PreparedDictionary generate(ByteBuffer src, + int bucketBits, int slotBits, int hashBits, int blockBits) { + ((Buffer) src).clear(); // Just in case... + if (blockBits > 12) { + throw new IllegalArgumentException("blockBits is too big"); + } + if (bucketBits >= 24) { + throw new IllegalArgumentException("bucketBits is too big"); + } + if (bucketBits - slotBits >= 16) { + throw new IllegalArgumentException("slotBits is too small"); + } + int bucketLimit = 1 << blockBits; + int numBuckets = 1 << bucketBits; + int numSlots = 1 << slotBits; + int slotMask = numSlots - 1; + int hashShift = 64 - bucketBits; + long hashMask = (~0L) >>> (64 - hashBits); + int sourceSize = src.capacity(); + if (sourceSize < 8) { + throw new IllegalArgumentException("src is too short"); + } + + /* Step 1: create "bloated" hasher. */ + short[] num = new short[numBuckets]; + int[] bucketHeads = new int[numBuckets]; + int[] nextBucket = new int[sourceSize]; + + long accumulator = 0; + for (int i = 0; i < 7; ++i) { + accumulator |= (src.get(i) & 0xFFL) << (8 * i); + } + accumulator <<= 8; + /* TODO: apply custom "store" order. */ + for (int i = 0; i + 7 < sourceSize; ++i) { + accumulator = (accumulator >>> 8) | ((src.get(i + 7) & 0xFFL) << 56); + long h = (accumulator & hashMask) * HASH_MULTIPLIER; + int key = (int) (h >>> hashShift); + int count = num[key]; + nextBucket[i] = (count == 0) ? -1 : bucketHeads[key]; + bucketHeads[key] = i; + count++; + if (count > bucketLimit) { + count = bucketLimit; + } + num[key] = (short) count; + } + + /* Step 2: find slot limits. */ + int[] slotLimit = new int[numSlots]; + int[] slotSize = new int[numSlots]; + int totalItems = 0; + for (int i = 0; i < numSlots; ++i) { + boolean overflow = false; + slotLimit[i] = bucketLimit; + while (true) { + overflow = false; + int limit = slotLimit[i]; + int count = 0; + for (int j = i; j < numBuckets; j += numSlots) { + int size = num[j]; + /* Last chain may span behind 64K limit; overflow happens only if + we are about to use 0xFFFF+ as item offset. */ + if (count >= 0xFFFF) { + overflow = true; + break; + } + if (size > limit) { + size = limit; + } + count += size; + } + if (!overflow) { + slotSize[i] = count; + totalItems += count; + break; + } + slotLimit[i]--; + } + } + + /* Step 3: transfer data to "slim" hasher. */ + int part0 = 6 * 4; + int part1 = numSlots * 4; + int part2 = numBuckets * 2; + int part3 = totalItems * 4; + int allocSize = part0 + part1 + part2 + part3 + sourceSize; + ByteBuffer flat = ByteBuffer.allocateDirect(allocSize); + ByteBuffer pointer = flat.slice(); + pointer.order(ByteOrder.nativeOrder()); + + IntBuffer struct = pointer.asIntBuffer(); + pointer.position(pointer.position() + part0); + IntBuffer slotOffsets = pointer.asIntBuffer(); + pointer.position(pointer.position() + part1); + ShortBuffer heads = pointer.asShortBuffer(); + pointer.position(pointer.position() + part2); + IntBuffer items = pointer.asIntBuffer(); + pointer.position(pointer.position() + part3); + ByteBuffer sourceCopy = pointer.slice(); + + /* magic */ struct.put(0, MAGIC); + /* source_offset */ struct.put(1, totalItems); + /* source_size */ struct.put(2, sourceSize); + /* hash_bits */ struct.put(3, hashBits); + /* bucket_bits */ struct.put(4, bucketBits); + /* slot_bits */ struct.put(5, slotBits); + + totalItems = 0; + for (int i = 0; i < numSlots; ++i) { + slotOffsets.put(i, totalItems); + totalItems += slotSize[i]; + slotSize[i] = 0; + } + + for (int i = 0; i < numBuckets; ++i) { + int slot = i & slotMask; + int count = num[i]; + if (count > slotLimit[slot]) { + count = slotLimit[slot]; + } + if (count == 0) { + heads.put(i, (short) 0xFFFF); + continue; + } + int cursor = slotSize[slot]; + heads.put(i, (short) cursor); + cursor += slotOffsets.get(slot); + slotSize[slot] += count; + int pos = bucketHeads[i]; + for (int j = 0; j < count; j++) { + items.put(cursor++, pos); + pos = nextBucket[pos]; + } + cursor--; + items.put(cursor, items.get(cursor) | 0x80000000); + } + + sourceCopy.put(src); + + return new PreparedDictionaryImpl(flat); + } +} |