1 files changed, 185 insertions, 0 deletions
diff --git a/java/org/brotli/enc/PreparedDictionaryGenerator.java b/java/org/brotli/enc/PreparedDictionaryGenerator.java
new file mode 100644
index 0000000..1257b92
--- /dev/null
+++ b/java/org/brotli/enc/PreparedDictionaryGenerator.java
@@ -0,0 +1,185 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+package org.brotli.enc;
+
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.IntBuffer;
+import java.nio.ShortBuffer;
+
+/**
+ * Java prepared (raw) dictionary producer.
+ */
+public class PreparedDictionaryGenerator {
+
+  private static final int MAGIC = 0xDEBCEDE0;
+  private static final long HASH_MULTIPLIER = 0x1fe35a7bd3579bd3L;
+
+  private static class PreparedDictionaryImpl implements PreparedDictionary {
+    private final ByteBuffer data;
+
+    private PreparedDictionaryImpl(ByteBuffer data) {
+      this.data = data;
+    }
+
+    @Override
+    public ByteBuffer getData() {
+      return data;
+    }
+  }
+
+  // Disallow instantiation.
+  private PreparedDictionaryGenerator() { }
+
+  public static PreparedDictionary generate(ByteBuffer src) {
+    return generate(src, 17, 3, 40, 5);
+  }
+
+  public static PreparedDictionary generate(ByteBuffer src,
+      int bucketBits, int slotBits, int hashBits, int blockBits) {
+    ((Buffer) src).clear();  // Just in case...
+    if (blockBits > 12) {
+      throw new IllegalArgumentException("blockBits is too big");
+    }
+    if (bucketBits >= 24) {
+      throw new IllegalArgumentException("bucketBits is too big");
+    }
+    if (bucketBits - slotBits >= 16) {
+      throw new IllegalArgumentException("slotBits is too small");
+    }
+    int bucketLimit = 1 << blockBits;
+    int numBuckets = 1 << bucketBits;
+    int numSlots = 1 << slotBits;
+    int slotMask = numSlots - 1;
+    int hashShift = 64 - bucketBits;
+    long hashMask = (~0L) >>> (64 - hashBits);
+    int sourceSize = src.capacity();
+    if (sourceSize < 8) {
+      throw new IllegalArgumentException("src is too short");
+    }
+
+    /* Step 1: create "bloated" hasher. */
+    short[] num = new short[numBuckets];
+    int[] bucketHeads = new int[numBuckets];
+    int[] nextBucket = new int[sourceSize];
+
+    long accumulator = 0;
+    for (int i = 0; i < 7; ++i) {
+      accumulator |= (src.get(i) & 0xFFL) << (8 * i);
+    }
+    accumulator <<= 8;
+    /* TODO: apply custom "store" order. */
+    for (int i = 0; i + 7 < sourceSize; ++i) {
+      accumulator = (accumulator >>> 8) | ((src.get(i + 7) & 0xFFL) << 56);
+      long h = (accumulator & hashMask) * HASH_MULTIPLIER;
+      int key = (int) (h >>> hashShift);
+      int count = num[key];
+      nextBucket[i] = (count == 0) ? -1 : bucketHeads[key];
+      bucketHeads[key] = i;
+      count++;
+      if (count > bucketLimit) {
+        count = bucketLimit;
+      }
+      num[key] = (short) count;
+    }
+
+    /* Step 2: find slot limits. */
+    int[] slotLimit = new int[numSlots];
+    int[] slotSize = new int[numSlots];
+    int totalItems = 0;
+    for (int i = 0; i < numSlots; ++i) {
+      boolean overflow = false;
+      slotLimit[i] = bucketLimit;
+      while (true) {
+        overflow = false;
+        int limit = slotLimit[i];
+        int count = 0;
+        for (int j = i; j < numBuckets; j += numSlots) {
+          int size = num[j];
+          /* Last chain may span behind 64K limit; overflow happens only if
+             we are about to use 0xFFFF+ as item offset. */
+          if (count >= 0xFFFF) {
+            overflow = true;
+            break;
+          }
+          if (size > limit) {
+            size = limit;
+          }
+          count += size;
+        }
+        if (!overflow) {
+          slotSize[i] = count;
+          totalItems += count;
+          break;
+        }
+        slotLimit[i]--;
+      }
+    }
+
+    /* Step 3: transfer data to "slim" hasher. */
+    int part0 = 6 * 4;
+    int part1 = numSlots * 4;
+    int part2 = numBuckets * 2;
+    int part3 = totalItems * 4;
+    int allocSize = part0 + part1 + part2 + part3 + sourceSize;
+    ByteBuffer flat = ByteBuffer.allocateDirect(allocSize);
+    ByteBuffer pointer = flat.slice();
+    pointer.order(ByteOrder.nativeOrder());
+
+    IntBuffer struct = pointer.asIntBuffer();
+    pointer.position(pointer.position() + part0);
+    IntBuffer slotOffsets = pointer.asIntBuffer();
+    pointer.position(pointer.position() + part1);
+    ShortBuffer heads = pointer.asShortBuffer();
+    pointer.position(pointer.position() + part2);
+    IntBuffer items = pointer.asIntBuffer();
+    pointer.position(pointer.position() + part3);
+    ByteBuffer sourceCopy = pointer.slice();
+
+    /* magic         */ struct.put(0, MAGIC);
+    /* source_offset */ struct.put(1, totalItems);
+    /* source_size   */ struct.put(2, sourceSize);
+    /* hash_bits     */ struct.put(3, hashBits);
+    /* bucket_bits   */ struct.put(4, bucketBits);
+    /* slot_bits     */ struct.put(5, slotBits);
+
+    totalItems = 0;
+    for (int i = 0; i < numSlots; ++i) {
+      slotOffsets.put(i, totalItems);
+      totalItems += slotSize[i];
+      slotSize[i] = 0;
+    }
+
+    for (int i = 0; i < numBuckets; ++i) {
+      int slot = i & slotMask;
+      int count = num[i];
+      if (count > slotLimit[slot]) {
+        count = slotLimit[slot];
+      }
+      if (count == 0) {
+        heads.put(i, (short) 0xFFFF);
+        continue;
+      }
+      int cursor = slotSize[slot];
+      heads.put(i, (short) cursor);
+      cursor += slotOffsets.get(slot);
+      slotSize[slot] += count;
+      int pos = bucketHeads[i];
+      for (int j = 0; j < count; j++) {
+        items.put(cursor++, pos);
+        pos = nextBucket[pos];
+      }
+      cursor--;
+      items.put(cursor, items.get(cursor) | 0x80000000);
+    }
+
+    sourceCopy.put(src);
+
+    return new PreparedDictionaryImpl(flat);
+  }
+}