Convert encoder to plain C.

author: Eugene Kliuchnikov <eustas@google.com> 2016-06-13 11:01:04 +0200
committer: Eugene Kliuchnikov <eustas@google.com> 2016-06-13 11:01:04 +0200
commit: b972c67780f03256a3fbf81dc3350a4bf00aa4ad (patch)
tree: 908b04861a1be24988db41ca390b0777679c833f
parent: 63111b21e8e35df764b0ebb7891533291d77ed18 (diff)
download: brotli-b972c67780f03256a3fbf81dc3350a4bf00aa4ad.zip
brotli-b972c67780f03256a3fbf81dc3350a4bf00aa4ad.tar.gz
brotli-b972c67780f03256a3fbf81dc3350a4bf00aa4ad.tar.bz2
62 files changed, 8121 insertions, 6108 deletions
diff --git a/dec/port.h b/dec/port.h
index 0122c55..31e0295 100644
--- a/dec/port.h
+++ b/dec/port.h
@@ -81,7 +81,7 @@
 #endif
 
 #if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
-static inline void BrotliDump(const char* f, int l, const char* fn) {
+static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) {
   fprintf(stderr, "%s:%d (%s)\n", f, l, fn);
   fflush(stderr);
 }
diff --git a/enc/Makefile b/enc/Makefile
index 12772aa..7a87e07 100644
--- a/enc/Makefile
+++ b/enc/Makefile
@@ -2,10 +2,10 @@
 
 include ../shared.mk
 
-OBJS = backward_references.o block_splitter.o brotli_bit_stream.o \
-       compress_fragment.o compress_fragment_two_pass.o encode.o \
-       encode_parallel.o entropy_encode.o histogram.o literal_cost.o \
-       metablock.o static_dict.o streams.o utf8_util.o
+OBJS = backward_references.o bit_cost.o block_splitter.o brotli_bit_stream.o \
+       cluster.o compress_fragment.o compress_fragment_two_pass.o compressor.o \
+       encode.o encode_parallel.o entropy_encode.o histogram.o literal_cost.o \
+       memory.o metablock.o static_dict.o streams.o utf8_util.o
 all : $(OBJS)
 
 clean :
diff --git a/enc/backward_references.c b/enc/backward_references.c
index 3900500..a979aad 100644
--- a/enc/backward_references.c
+++ b/enc/backward_references.c
@@ -8,164 +8,229 @@
 
 #include "./backward_references.h"
 
-#include <algorithm>
-#include <limits>
-#include <vector>
+#include <math.h>  /* INFINITY */
+#include <string.h>  /* memcpy, memset */
 
+#include "../common/constants.h"
 #include "../common/types.h"
 #include "./command.h"
 #include "./fast_log.h"
+#include "./find_match_length.h"
 #include "./literal_cost.h"
+#include "./memory.h"
+#include "./port.h"
+#include "./prefix.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* The maximum length for which the zopflification uses distinct distances. */
-static const uint16_t kMaxZopfliLen = 325;
+static const uint16_t kMaxZopfliLenQuality10 = 150;
+static const uint16_t kMaxZopfliLenQuality11 = 325;
+
+static const float kInfinity = INFINITY;
+
+void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
+  ZopfliNode stub;
+  size_t i;
+  stub.length = 1;
+  stub.distance = 0;
+  stub.insert_length = 0;
+  stub.u.cost = kInfinity;
+  for (i = 0; i < length; ++i) array[i] = stub;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
+  return self->length & 0xffffff;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
+  const uint32_t modifier = self->length >> 24;
+  return ZopfliNodeCopyLength(self) + 9u - modifier;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
+  return self->distance & 0x1ffffff;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
+  const uint32_t short_code = self->distance >> 25;
+  return short_code == 0 ? ZopfliNodeCopyDistance(self) + 15 : short_code - 1;
+}
+
+static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
+  return ZopfliNodeCopyLength(self) + self->insert_length;
+}
+
+static BROTLI_INLINE size_t MaxZopfliLenForQuality(int quality) {
+  return quality <= 10 ? kMaxZopfliLenQuality10 : kMaxZopfliLenQuality11;
+}
 
 /* Histogram based cost model for zopflification. */
-class ZopfliCostModel {
- public:
-  ZopfliCostModel(void) : min_cost_cmd_(kInfinity) {}
-
-  void SetFromCommands(size_t num_bytes,
-                       size_t position,
-                       const uint8_t* ringbuffer,
-                       size_t ringbuffer_mask,
-                       const Command* commands,
-                       size_t num_commands,
-                       size_t last_insert_len) {
-    std::vector<uint32_t> histogram_literal(256, 0);
-    std::vector<uint32_t> histogram_cmd(kNumCommandPrefixes, 0);
-    std::vector<uint32_t> histogram_dist(kNumDistancePrefixes, 0);
-
-    size_t pos = position - last_insert_len;
-    for (size_t i = 0; i < num_commands; i++) {
-      size_t inslength = commands[i].insert_len_;
-      size_t copylength = commands[i].copy_len();
-      size_t distcode = commands[i].dist_prefix_;
-      size_t cmdcode = commands[i].cmd_prefix_;
-
-      histogram_cmd[cmdcode]++;
-      if (cmdcode >= 128) histogram_dist[distcode]++;
-
-      for (size_t j = 0; j < inslength; j++) {
-        histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
-      }
+typedef struct ZopfliCostModel {
+  /* The insert and copy length symbols. */
+  float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
+  float cost_dist_[BROTLI_NUM_DISTANCE_SYMBOLS];
+  /* Cumulative costs of literals per position in the stream. */
+  float* literal_costs_;
+  float min_cost_cmd_;
+  size_t num_bytes_;
+} ZopfliCostModel;
+
+static void InitZopfliCostModel(
+    MemoryManager* m, ZopfliCostModel* self, size_t num_bytes) {
+  self->num_bytes_ = num_bytes;
+  self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
+  if (BROTLI_IS_OOM(m)) return;
+}
 
-      pos += inslength + copylength;
+static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
+  BROTLI_FREE(m, self->literal_costs_);
+}
+
+static void SetCost(const uint32_t* histogram, size_t histogram_size,
+                    float* cost) {
+  size_t sum = 0;
+  float log2sum;
+  size_t i;
+  for (i = 0; i < histogram_size; i++) {
+    sum += histogram[i];
+  }
+  log2sum = (float)FastLog2(sum);
+  for (i = 0; i < histogram_size; i++) {
+    if (histogram[i] == 0) {
+      cost[i] = log2sum + 2;
+      continue;
     }
 
-    std::vector<float> cost_literal;
-    Set(histogram_literal, &cost_literal);
-    Set(histogram_cmd, &cost_cmd_);
-    Set(histogram_dist, &cost_dist_);
+    /* Shannon bits for this symbol. */
+    cost[i] = log2sum - (float)FastLog2(histogram[i]);
 
-    for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
-      min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]);
-    }
+    /* Cannot be coded with less than 1 bit */
+    if (cost[i] < 1) cost[i] = 1;
+  }
+}
 
-    literal_costs_.resize(num_bytes + 1);
-    literal_costs_[0] = 0.0;
-    for (size_t i = 0; i < num_bytes; ++i) {
-      literal_costs_[i + 1] = literal_costs_[i] +
-          cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
+static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
+                                           size_t position,
+                                           const uint8_t* ringbuffer,
+                                           size_t ringbuffer_mask,
+                                           const Command* commands,
+                                           size_t num_commands,
+                                           size_t last_insert_len) {
+  uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint32_t histogram_dist[BROTLI_NUM_DISTANCE_SYMBOLS];
+  float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+  size_t pos = position - last_insert_len;
+  float min_cost_cmd = kInfinity;
+  size_t i;
+  float* cost_cmd = self->cost_cmd_;
+
+  memset(histogram_literal, 0, sizeof(histogram_literal));
+  memset(histogram_cmd, 0, sizeof(histogram_cmd));
+  memset(histogram_dist, 0, sizeof(histogram_dist));
+
+  for (i = 0; i < num_commands; i++) {
+    size_t inslength = commands[i].insert_len_;
+    size_t copylength = CommandCopyLen(&commands[i]);
+    size_t distcode = commands[i].dist_prefix_;
+    size_t cmdcode = commands[i].cmd_prefix_;
+    size_t j;
+
+    histogram_cmd[cmdcode]++;
+    if (cmdcode >= 128) histogram_dist[distcode]++;
+
+    for (j = 0; j < inslength; j++) {
+      histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
     }
+
+    pos += inslength + copylength;
   }
 
-  void SetFromLiteralCosts(size_t num_bytes,
-                           size_t position,
-                           const uint8_t* ringbuffer,
-                           size_t ringbuffer_mask) {
-    literal_costs_.resize(num_bytes + 2);
-    EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
-                                ringbuffer, &literal_costs_[1]);
-    literal_costs_[0] = 0.0;
-    for (size_t i = 0; i < num_bytes; ++i) {
-      literal_costs_[i + 1] += literal_costs_[i];
-    }
-    cost_cmd_.resize(kNumCommandPrefixes);
-    cost_dist_.resize(kNumDistancePrefixes);
-    for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
-      cost_cmd_[i] = static_cast<float>(FastLog2(11 + i));
-    }
-    for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) {
-      cost_dist_[i] = static_cast<float>(FastLog2(20 + i));
+  SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, cost_literal);
+  SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, cost_cmd);
+  SetCost(histogram_dist, BROTLI_NUM_DISTANCE_SYMBOLS, self->cost_dist_);
+
+  for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
+    min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
+  }
+  self->min_cost_cmd_ = min_cost_cmd;
+
+  {
+    float* literal_costs = self->literal_costs_;
+    size_t num_bytes = self->num_bytes_;
+    literal_costs[0] = 0.0;
+    for (i = 0; i < num_bytes; ++i) {
+      literal_costs[i + 1] = literal_costs[i] +
+          cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
     }
-    min_cost_cmd_ = static_cast<float>(FastLog2(11));
   }
+}
 
-  float GetCommandCost(
-      size_t dist_code, size_t length_code, size_t insert_length) const {
-    uint16_t inscode = GetInsertLengthCode(insert_length);
-    uint16_t copycode = GetCopyLengthCode(length_code);
-    uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code == 0);
-    uint16_t dist_symbol;
-    uint32_t distextra;
-    PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
-    uint32_t distnumextra = distextra >> 24;
-
-    float result = static_cast<float>(
-        GetInsertExtra(inscode) + GetCopyExtra(copycode) + distnumextra);
-    result += cost_cmd_[cmdcode];
-    if (cmdcode >= 128) result += cost_dist_[dist_symbol];
-    return result;
+static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
+                                               size_t position,
+                                               const uint8_t* ringbuffer,
+                                               size_t ringbuffer_mask) {
+  float* literal_costs = self->literal_costs_;
+  float* cost_dist = self->cost_dist_;
+  float* cost_cmd = self->cost_cmd_;
+  size_t num_bytes = self->num_bytes_;
+  size_t i;
+  BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
+                                    ringbuffer, &literal_costs[1]);
+  literal_costs[0] = 0.0;
+  for (i = 0; i < num_bytes; ++i) {
+    literal_costs[i + 1] += literal_costs[i];
   }
-
-  float GetLiteralCosts(size_t from, size_t to) const {
-    return literal_costs_[to] - literal_costs_[from];
+  for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
+    cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
   }
-
-  float GetMinCostCmd(void) const {
-    return min_cost_cmd_;
+  for (i = 0; i < BROTLI_NUM_DISTANCE_SYMBOLS; ++i) {
+    cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
   }
+  self->min_cost_cmd_ = (float)FastLog2(11);
+}
 
- private:
-  void Set(const std::vector<uint32_t>& histogram, std::vector<float>* cost) {
-    cost->resize(histogram.size());
-    size_t sum = 0;
-    for (size_t i = 0; i < histogram.size(); i++) {
-      sum += histogram[i];
-    }
-    float log2sum = static_cast<float>(FastLog2(sum));
-    for (size_t i = 0; i < histogram.size(); i++) {
-      if (histogram[i] == 0) {
-        (*cost)[i] = log2sum + 2;
-        continue;
-      }
+static BROTLI_INLINE float ZopfliCostModelGetCommandCost(
+    const ZopfliCostModel* self, uint16_t cmdcode) {
+  return self->cost_cmd_[cmdcode];
+}
 
-      // Shannon bits for this symbol.
-      (*cost)[i] = log2sum - static_cast<float>(FastLog2(histogram[i]));
+static BROTLI_INLINE float ZopfliCostModelGetDistanceCost(
+    const ZopfliCostModel* self, size_t distcode) {
+  return self->cost_dist_[distcode];
+}
 
-      // Cannot be coded with less than 1 bit
-      if ((*cost)[i] < 1) (*cost)[i] = 1;
-    }
-  }
+static BROTLI_INLINE float ZopfliCostModelGetLiteralCosts(
+    const ZopfliCostModel* self, size_t from, size_t to) {
+  return self->literal_costs_[to] - self->literal_costs_[from];
+}
 
-  std::vector<float> cost_cmd_;  // The insert and copy length symbols.
-  std::vector<float> cost_dist_;
-  // Cumulative costs of literals per position in the stream.
-  std::vector<float> literal_costs_;
-  float min_cost_cmd_;
-};
+static BROTLI_INLINE float ZopfliCostModelGetMinCostCmd(
+    const ZopfliCostModel* self) {
+  return self->min_cost_cmd_;
+}
 
-inline size_t ComputeDistanceCode(size_t distance,
-                                  size_t max_distance,
-                                  int quality,
-                                  const int* dist_cache) {
+static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
+                                                size_t max_distance,
+                                                int quality,
+                                                const int* dist_cache) {
   if (distance <= max_distance) {
-    if (distance == static_cast<size_t>(dist_cache[0])) {
+    if (distance == (size_t)dist_cache[0]) {
       return 0;
-    } else if (distance == static_cast<size_t>(dist_cache[1])) {
+    } else if (distance == (size_t)dist_cache[1]) {
       return 1;
-    } else if (distance == static_cast<size_t>(dist_cache[2])) {
+    } else if (distance == (size_t)dist_cache[2]) {
       return 2;
-    } else if (distance == static_cast<size_t>(dist_cache[3])) {
+    } else if (distance == (size_t)dist_cache[3]) {
       return 3;
     } else if (quality > 3 && distance >= 6) {
-      for (size_t k = 4; k < kNumDistanceShortCodes; ++k) {
+      size_t k;
+      for (k = 4; k < BROTLI_NUM_DISTANCE_SHORT_CODES; ++k) {
         size_t idx = kDistanceCacheIndex[k];
-        size_t candidate =
-            static_cast<size_t>(dist_cache[idx] + kDistanceCacheOffset[k]);
+        size_t candidate = (size_t)(dist_cache[idx] + kDistanceCacheOffset[k]);
         static const size_t kLimits[16] = {  0,  0,  0,  0,
                                              6,  6, 11, 11,
                                             11, 11, 11, 11,
@@ -182,75 +247,72 @@ inline size_t ComputeDistanceCode(size_t distance,
 /* REQUIRES: len >= 2, start_pos <= pos */
 /* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
 /* Maintains the "ZopfliNode array invariant". */
-inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
-                             size_t len, size_t len_code, size_t dist,
-                             size_t short_code, float cost) {
-  ZopfliNode& next = nodes[pos + len];
-  next.length = static_cast<uint32_t>(len | ((len + 9u - len_code) << 24));
-  next.distance = static_cast<uint32_t>(dist | (short_code << 25));
-  next.insert_length = static_cast<uint32_t>(pos - start_pos);
-  next.cost = cost;
+static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
+    size_t start_pos, size_t len, size_t len_code, size_t dist,
+    size_t short_code, float cost) {
+  ZopfliNode* next = &nodes[pos + len];
+  next->length = (uint32_t)(len | ((len + 9u - len_code) << 24));
+  next->distance = (uint32_t)(dist | (short_code << 25));
+  next->insert_length = (uint32_t)(pos - start_pos);
+  next->u.cost = cost;
 }
 
+typedef struct PosData {
+  size_t pos;
+  int distance_cache[4];
+  float costdiff;
+} PosData;
+
 /* Maintains the smallest 8 cost difference together with their positions */
-class StartPosQueue {
- public:
-  struct PosData {
-    size_t pos;
-    int distance_cache[4];
-    float costdiff;
-  };
-
-  explicit StartPosQueue(int bits)
-      : mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {}
-
-  void Clear(void) {
-    idx_ = 0;
-  }
+typedef struct StartPosQueue {
+  PosData q_[8];
+  size_t idx_;
+} StartPosQueue;
 
-  void Push(const StartPosQueue::PosData& posdata) {
-    size_t offset = ~idx_ & mask_;
-    ++idx_;
-    size_t len = size();
-    q_[offset] = posdata;
+static BROTLI_INLINE void InitStartPosQueue(StartPosQueue* self) {
+  self->idx_ = 0;
+}
+
+static size_t StartPosQueueSize(const StartPosQueue* self) {
+  return BROTLI_MIN(size_t, self->idx_, 8);
+}
+
+static void StartPosQueuePush(StartPosQueue* self, const PosData* posdata) {
+  size_t offset = ~(self->idx_++) & 7;
+  size_t len = StartPosQueueSize(self);
+  size_t i;
+  PosData* q = self->q_;
+  q[offset] = *posdata;
   /* Restore the sorted order. In the list of |len| items at most |len - 1|
      adjacent element comparisons / swaps are required. */
-    for (size_t i = 1; i < len; ++i) {
-      if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) {
-        std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
-      }
-      ++offset;
+  for (i = 1; i < len; ++i) {
+    if (q[offset & 7].costdiff > q[(offset + 1) & 7].costdiff) {
+      BROTLI_SWAP(PosData, q, offset & 7, (offset + 1) & 7);
     }
+    ++offset;
   }
+}
 
-  size_t size(void) const { return std::min(idx_, mask_ + 1); }
-
-  const StartPosQueue::PosData& GetStartPosData(size_t k) const {
-    return q_[(k - idx_) & mask_];
-  }
-
- private:
-  const size_t mask_;
-  std::vector<PosData> q_;
-  size_t idx_;
-};
+static const PosData* StartPosQueueAt(const StartPosQueue* self, size_t k) {
+  return &self->q_[(k - self->idx_) & 7];
+}
 
 /* Returns the minimum possible copy length that can improve the cost of any */
 /* future position. */
-static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
+static size_t ComputeMinimumCopyLength(const StartPosQueue* queue,
                                        const ZopfliNode* nodes,
-                                       const ZopfliCostModel& model,
+                                       const ZopfliCostModel* model,
                                        const size_t num_bytes,
                                        const size_t pos) {
   /* Compute the minimum possible cost of reaching any future position. */
-  const size_t start0 = queue.GetStartPosData(0).pos;
-  float min_cost = (nodes[start0].cost +
-                    model.GetLiteralCosts(start0, pos) +
-                    model.GetMinCostCmd());
+  const size_t start0 = StartPosQueueAt(queue, 0)->pos;
+  float min_cost = (nodes[start0].u.cost +
+                    ZopfliCostModelGetLiteralCosts(model, start0, pos) +
+                    ZopfliCostModelGetMinCostCmd(model));
   size_t len = 2;
   size_t next_len_bucket = 4;
   size_t next_len_offset = 10;
-  while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) {
+  while (pos + len <= num_bytes && nodes[pos + len].u.cost <= min_cost) {
     /* We already reached (pos + len) with no more cost than the minimum
        possible cost of reaching anything from this pos, so there is no point in
        looking for lengths <= len. */
@@ -258,7 +320,7 @@ static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
     if (len == next_len_offset) {
       /* We reached the next copy length code bucket, so we add one more
          extra bit to the minimum cost. */
-      min_cost += static_cast<float>(1.0);
+      min_cost += 1.0f;
       next_len_offset += next_len_bucket;
       next_len_bucket *= 2;
     }
@@ -283,17 +345,17 @@ static void ComputeDistanceCache(const size_t block_start,
   size_t p = pos;
   /* Because of prerequisite, does at most (pos + 1) / 2 iterations. */
   while (idx < 4 && p > 0) {
-    const size_t clen = nodes[p].copy_length();
+    const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
     const size_t ilen = nodes[p].insert_length;
-    const size_t dist = nodes[p].copy_distance();
+    const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
     /* Since block_start + p is the end position of the command, the copy part
        starts from block_start + p - clen. Distances that are greater than this
        or greater than max_backward are static dictionary references, and do
        not update the last distances. Also distance code 0 (last distance)
        does not update the last distances. */
     if (dist + clen <= block_start + p && dist <= max_backward &&
-        nodes[p].distance_code() > 0) {
-      dist_cache[idx++] = static_cast<int>(dist);
+        ZopfliNodeDistanceCode(&nodes[p]) > 0) {
+      dist_cache[idx++] = (int)dist;
     }
     /* Because of prerequisite, p >= clen + ilen >= 2. */
     p -= clen + ilen;
@@ -308,6 +370,7 @@ static void UpdateNodes(const size_t num_bytes,
                         const size_t pos,
                         const uint8_t* ringbuffer,
                         const size_t ringbuffer_mask,
+                        const int quality,
                         const size_t max_backward_limit,
                         const int* starting_dist_cache,
                         const size_t num_matches,
@@ -315,36 +378,45 @@ static void UpdateNodes(const size_t num_bytes,
                         const ZopfliCostModel* model,
                         StartPosQueue* queue,
                         ZopfliNode* nodes) {
-  size_t cur_ix = block_start + pos;
-  size_t cur_ix_masked = cur_ix & ringbuffer_mask;
-  size_t max_distance = std::min(cur_ix, max_backward_limit);
-
-  if (nodes[pos].cost <= model->GetLiteralCosts(0, pos)) {
-    StartPosQueue::PosData posdata;
+  const size_t cur_ix = block_start + pos;
+  const size_t cur_ix_masked = cur_ix & ringbuffer_mask;
+  const size_t max_distance = BROTLI_MIN(size_t, cur_ix, max_backward_limit);
+  const size_t max_len = num_bytes - pos;
+  const size_t max_zopfli_len = MaxZopfliLenForQuality(quality);
+  const size_t max_iters = quality <= 10 ? 1 : 5;
+  size_t min_len;
+  size_t k;
+
+  if (nodes[pos].u.cost <= ZopfliCostModelGetLiteralCosts(model, 0, pos)) {
+    PosData posdata;
     posdata.pos = pos;
-    posdata.costdiff = nodes[pos].cost - model->GetLiteralCosts(0, pos);
+    posdata.costdiff = nodes[pos].u.cost -
+        ZopfliCostModelGetLiteralCosts(model, 0, pos);
     ComputeDistanceCache(block_start, pos, max_backward_limit,
                          starting_dist_cache, nodes, posdata.distance_cache);
-    queue->Push(posdata);
+    StartPosQueuePush(queue, &posdata);
   }
 
-  const size_t min_len = ComputeMinimumCopyLength(
-      *queue, nodes, *model, num_bytes, pos);
+  min_len = ComputeMinimumCopyLength(queue, nodes, model, num_bytes, pos);
 
   /* Go over the command starting positions in order of increasing cost
      difference. */
-  for (size_t k = 0; k < 5 && k < queue->size(); ++k) {
-    const StartPosQueue::PosData& posdata = queue->GetStartPosData(k);
-    const size_t start = posdata.pos;
-    const float start_costdiff = posdata.costdiff;
+  for (k = 0; k < max_iters && k < StartPosQueueSize(queue); ++k) {
+    const PosData* posdata = StartPosQueueAt(queue, k);
+    const size_t start = posdata->pos;
+    const uint16_t inscode = GetInsertLengthCode(pos - start);
+    const float start_costdiff = posdata->costdiff;
+    const float base_cost = start_costdiff + (float)GetInsertExtra(inscode) +
+        ZopfliCostModelGetLiteralCosts(model, 0, pos);
 
     /* Look for last distance matches using the distance cache from this
        starting position. */
     size_t best_len = min_len - 1;
-    for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
+    size_t j = 0;
+    for (; j < BROTLI_NUM_DISTANCE_SHORT_CODES && best_len < max_len; ++j) {
       const size_t idx = kDistanceCacheIndex[j];
-      const size_t backward = static_cast<size_t>(posdata.distance_cache[idx] +
-                                                  kDistanceCacheOffset[j]);
+      const size_t backward =
+          (size_t)(posdata->distance_cache[idx] + kDistanceCacheOffset[j]);
       size_t prev_ix = cur_ix - backward;
       if (prev_ix >= cur_ix) {
         continue;
@@ -357,21 +429,29 @@ static void UpdateNodes(const size_t num_bytes,
       if (cur_ix_masked + best_len > ringbuffer_mask ||
           prev_ix + best_len > ringbuffer_mask ||
           ringbuffer[cur_ix_masked + best_len] !=
-          ringbuffer[prev_ix + best_len]) {
+              ringbuffer[prev_ix + best_len]) {
         continue;
       }
-      const size_t len =
-          FindMatchLengthWithLimit(&ringbuffer[prev_ix],
-                                   &ringbuffer[cur_ix_masked],
-                                   num_bytes - pos);
-      for (size_t l = best_len + 1; l <= len; ++l) {
-        const size_t inslen = pos - start;
-        float cmd_cost = model->GetCommandCost(j, l, inslen);
-        float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
-        if (cost < nodes[pos + l].cost) {
-          UpdateZopfliNode(&nodes[0], pos, start, l, l, backward, j + 1, cost);
+      {
+        const size_t len =
+            FindMatchLengthWithLimit(&ringbuffer[prev_ix],
+                                     &ringbuffer[cur_ix_masked],
+                                     max_len);
+        const float dist_cost = base_cost +
+            ZopfliCostModelGetDistanceCost(model, j);
+        size_t l;
+        for (l = best_len + 1; l <= len; ++l) {
+          const uint16_t copycode = GetCopyLengthCode(l);
+          const uint16_t cmdcode =
+              CombineLengthCodes(inscode, copycode, j == 0);
+          const float cost = (cmdcode < 128 ? base_cost : dist_cost) +
+              (float)GetCopyExtra(copycode) +
+              ZopfliCostModelGetCommandCost(model, cmdcode);
+          if (cost < nodes[pos + l].u.cost) {
+            UpdateZopfliNode(nodes, pos, start, l, l, backward, j + 1, cost);
+          }
+          best_len = l;
         }
-        best_len = l;
       }
     }
 
@@ -380,86 +460,103 @@ static void UpdateNodes(const size_t num_bytes,
        does not help much. */
     if (k >= 2) continue;
 
+    {
       /* Loop through all possible copy lengths at this position. */
-    size_t len = min_len;
-    for (size_t j = 0; j < num_matches; ++j) {
-      BackwardMatch match = matches[j];
-      size_t dist = match.distance;
-      bool is_dictionary_match = dist > max_distance;
+      size_t len = min_len;
+      for (j = 0; j < num_matches; ++j) {
+        BackwardMatch match = matches[j];
+        size_t dist = match.distance;
+        int is_dictionary_match = (dist > max_distance) ? 1 : 0;
         /* We already tried all possible last distance matches, so we can use
            normal distance code here. */
-      size_t dist_code = dist + 15;
+        size_t dist_code = dist + 15;
+        uint16_t dist_symbol;
+        uint32_t distextra;
+        uint32_t distnumextra;
+        float dist_cost;
+        size_t max_match_len;
+        PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
+        distnumextra = distextra >> 24;
+        dist_cost = base_cost + (float)distnumextra +
+            ZopfliCostModelGetDistanceCost(model, dist_symbol);
+
         /* Try all copy lengths up until the maximum copy length corresponding
            to this distance. If the distance refers to the static dictionary, or
            the maximum length is long enough, try only one maximum length. */
-      size_t max_len = match.length();
-      if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
-        len = max_len;
-      }
-      for (; len <= max_len; ++len) {
-        size_t len_code = is_dictionary_match ? match.length_code() : len;
-        const size_t inslen = pos - start;
-        float cmd_cost = model->GetCommandCost(dist_code, len_code, inslen);
-        float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
-        if (cost < nodes[pos + len].cost) {
-          UpdateZopfliNode(&nodes[0], pos, start, len, len_code, dist, 0, cost);
+        max_match_len = BackwardMatchLength(&match);
+        if (len < max_match_len &&
+            (is_dictionary_match || max_match_len > max_zopfli_len)) {
+          len = max_match_len;
+        }
+        for (; len <= max_match_len; ++len) {
+          const size_t len_code =
+              is_dictionary_match ? BackwardMatchLengthCode(&match) : len;
+          const uint16_t copycode = GetCopyLengthCode(len_code);
+          const uint16_t cmdcode = CombineLengthCodes(inscode, copycode, 0);
+          const float cost = dist_cost + (float)GetCopyExtra(copycode) +
+              ZopfliCostModelGetCommandCost(model, cmdcode);
+          if (cost < nodes[pos + len].u.cost) {
+            UpdateZopfliNode(nodes, pos, start, len, len_code, dist, 0, cost);
+          }
         }
       }
     }
   }
 }
 
-static void ComputeShortestPathFromNodes(size_t num_bytes,
-                                         const ZopfliNode* nodes,
-                                         std::vector<uint32_t>* path) {
-  std::vector<uint32_t> backwards(num_bytes / 2 + 1);
+static size_t ComputeShortestPathFromNodes(size_t num_bytes,
+    ZopfliNode* nodes) {
   size_t index = num_bytes;
-  while (nodes[index].cost == kInfinity) --index;
   size_t num_commands = 0;
+  while (nodes[index].u.cost == kInfinity) --index;
+  nodes[index].u.next = BROTLI_UINT32_MAX;
   while (index != 0) {
-    size_t len = nodes[index].command_length();
-    backwards[num_commands++] = static_cast<uint32_t>(len);
+    size_t len = ZopfliNodeCommandLength(&nodes[index]);
     index -= len;
+    nodes[index].u.next = (uint32_t)len;
+    num_commands++;
   }
-  path->resize(num_commands);
-  for (size_t i = num_commands, j = 0; i > 0; --i, ++j) {
-    (*path)[j] = backwards[i - 1];
-  }
+  return num_commands;
 }
 
-void ZopfliCreateCommands(const size_t num_bytes,
-                          const size_t block_start,
-                          const size_t max_backward_limit,
-                          const std::vector<uint32_t>& path,
-                          const ZopfliNode* nodes,
-                          int* dist_cache,
-                          size_t* last_insert_len,
-                          Command* commands,
-                          size_t* num_literals) {
+void BrotliZopfliCreateCommands(const size_t num_bytes,
+                                const size_t block_start,
+                                const size_t max_backward_limit,
+                                const ZopfliNode* nodes,
+                                int* dist_cache,
+                                size_t* last_insert_len,
+                                Command* commands,
+                                size_t* num_literals) {
   size_t pos = 0;
-  for (size_t i = 0; i < path.size(); i++) {
-    const ZopfliNode& next = nodes[pos + path[i]];
-    size_t copy_length = next.copy_length();
-    size_t insert_length = next.insert_length;
+  uint32_t offset = nodes[0].u.next;
+  size_t i;
+  for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
+    const ZopfliNode* next = &nodes[pos + offset];
+    size_t copy_length = ZopfliNodeCopyLength(next);
+    size_t insert_length = next->insert_length;
     pos += insert_length;
+    offset = next->u.next;
     if (i == 0) {
       insert_length += *last_insert_len;
       *last_insert_len = 0;
     }
-    size_t distance = next.copy_distance();
-    size_t len_code = next.length_code();
-    size_t max_distance = std::min(block_start + pos, max_backward_limit);
-    bool is_dictionary = (distance > max_distance);
-    size_t dist_code = next.distance_code();
-
-    Command cmd(insert_length, copy_length, len_code, dist_code);
-    commands[i] = cmd;
-
-    if (!is_dictionary && dist_code > 0) {
-      dist_cache[3] = dist_cache[2];
-      dist_cache[2] = dist_cache[1];
-      dist_cache[1] = dist_cache[0];
-      dist_cache[0] = static_cast<int>(distance);
+    {
+      size_t distance = ZopfliNodeCopyDistance(next);
+      size_t len_code = ZopfliNodeLengthCode(next);
+      size_t max_distance =
+          BROTLI_MIN(size_t, block_start + pos, max_backward_limit);
+      int is_dictionary = (distance > max_distance) ? 1 : 0;
+      size_t dist_code = ZopfliNodeDistanceCode(next);
+
+      InitCommand(
+          &commands[i], insert_length, copy_length, len_code, dist_code);
+
+      if (!is_dictionary && dist_code > 0) {
+        dist_cache[3] = dist_cache[2];
+        dist_cache[2] = dist_cache[1];
+        dist_cache[1] = dist_cache[0];
+        dist_cache[0] = (int)distance;
+      }
     }
 
     *num_literals += insert_length;
@@ -468,392 +565,319 @@ void ZopfliCreateCommands(const size_t num_bytes,
   *last_insert_len += num_bytes - pos;
 }
 
-static void ZopfliIterate(size_t num_bytes,
-                          size_t position,
-                          const uint8_t* ringbuffer,
-                          size_t ringbuffer_mask,
-                          const size_t max_backward_limit,
-                          const int* dist_cache,
-                          const ZopfliCostModel& model,
-                          const std::vector<uint32_t>& num_matches,
-                          const std::vector<BackwardMatch>& matches,
-                          ZopfliNode* nodes,
-                          std::vector<uint32_t>* path) {
-  nodes[0].length = 0;
-  nodes[0].cost = 0;
-  StartPosQueue queue(3);
+static size_t ZopfliIterate(size_t num_bytes,
+                            size_t position,
+                            const uint8_t* ringbuffer,
+                            size_t ringbuffer_mask,
+                            const int quality,
+                            const size_t max_backward_limit,
+                            const int* dist_cache,
+                            const ZopfliCostModel* model,
+                            const uint32_t* num_matches,
+                            const BackwardMatch* matches,
+                            ZopfliNode* nodes) {
+  const size_t max_zopfli_len = MaxZopfliLenForQuality(quality);
+  StartPosQueue queue;
   size_t cur_match_pos = 0;
-  for (size_t i = 0; i + 3 < num_bytes; i++) {
+  size_t i;
+  nodes[0].length = 0;
+  nodes[0].u.cost = 0;
+  InitStartPosQueue(&queue);
+  for (i = 0; i + 3 < num_bytes; i++) {
     UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
-                max_backward_limit, dist_cache, num_matches[i],
-                &matches[cur_match_pos], &model, &queue, &nodes[0]);
+                quality, max_backward_limit, dist_cache, num_matches[i],
+                &matches[cur_match_pos], model, &queue, nodes);
     cur_match_pos += num_matches[i];
     /* The zopflification can be too slow in case of very long lengths, so in
        such case skip it all, it does not cost a lot of compression ratio. */
     if (num_matches[i] == 1 &&
-        matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
-      i += matches[cur_match_pos - 1].length() - 1;
-      queue.Clear();
+        BackwardMatchLength(&matches[cur_match_pos - 1]) > max_zopfli_len) {
+      i += BackwardMatchLength(&matches[cur_match_pos - 1]) - 1;
+      InitStartPosQueue(&queue);
     }
   }
-  ComputeShortestPathFromNodes(num_bytes, &nodes[0], path);
+  return ComputeShortestPathFromNodes(num_bytes, nodes);
 }
 
 
-void ZopfliComputeShortestPath(size_t num_bytes,
-                               size_t position,
-                               const uint8_t* ringbuffer,
-                               size_t ringbuffer_mask,
-                               const size_t max_backward_limit,
-                               const int* dist_cache,
-                               Hashers::H10* hasher,
-                               ZopfliNode* nodes,
-                               std::vector<uint32_t>* path) {
+size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
+                                       size_t num_bytes,
+                                       size_t position,
+                                       const uint8_t* ringbuffer,
+                                       size_t ringbuffer_mask,
+                                       const int quality,
+                                       const size_t max_backward_limit,
+                                       const int* dist_cache,
+                                       H10* hasher,
+                                       ZopfliNode* nodes) {
+  const size_t max_zopfli_len = MaxZopfliLenForQuality(quality);
+  ZopfliCostModel model;
+  StartPosQueue queue;
+  BackwardMatch matches[MAX_NUM_MATCHES_H10];
+  const size_t store_end = num_bytes >= StoreLookaheadH10() ?
+      position + num_bytes - StoreLookaheadH10() + 1 : position;
+  size_t i;
   nodes[0].length = 0;
-  nodes[0].cost = 0;
-  ZopfliCostModel* model = new ZopfliCostModel;
-  model->SetFromLiteralCosts(num_bytes, position,
-                             ringbuffer, ringbuffer_mask);
-  StartPosQueue queue(3);
-  BackwardMatch matches[Hashers::H10::kMaxNumMatches];
-  for (size_t i = 0; i + 3 < num_bytes; i++) {
-    const size_t max_distance = std::min(position + i, max_backward_limit);
-    size_t num_matches = hasher->FindAllMatches(
-        ringbuffer, ringbuffer_mask, position + i, num_bytes - i, max_distance,
-        matches);
+  nodes[0].u.cost = 0;
+  InitZopfliCostModel(m, &model, num_bytes);
+  if (BROTLI_IS_OOM(m)) return 0;
+  ZopfliCostModelSetFromLiteralCosts(
+      &model, position, ringbuffer, ringbuffer_mask);
+  InitStartPosQueue(&queue);
+  for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
+    const size_t pos = position + i;
+    const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
+    size_t num_matches = FindAllMatchesH10(hasher,
+        ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
+        quality, matches);
     if (num_matches > 0 &&
-        matches[num_matches - 1].length() > kMaxZopfliLen) {
+        BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
       matches[0] = matches[num_matches - 1];
       num_matches = 1;
     }
     UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
-                max_backward_limit, dist_cache, num_matches, matches,
-                model, &queue, nodes);
-    if (num_matches == 1 && matches[0].length() > kMaxZopfliLen) {
-      for (size_t j = 1; j < matches[0].length() && i + 4 < num_bytes; ++j) {
-        ++i;
-        if (matches[0].length() - j < 64 &&
-            num_bytes - i >= kMaxTreeCompLength) {
-          hasher->Store(ringbuffer, ringbuffer_mask, position + i);
-        }
-      }
-      queue.Clear();
+                quality, max_backward_limit, dist_cache, num_matches, matches,
+                &model, &queue, nodes);
+    if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
+      /* Add the tail of the copy to the hasher. */
+      StoreRangeH10(hasher, ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
+          size_t, pos + BackwardMatchLength(&matches[0]), store_end));
+      i += BackwardMatchLength(&matches[0]) - 1;
+      InitStartPosQueue(&queue);
     }
   }
-  delete model;
-  ComputeShortestPathFromNodes(num_bytes, nodes, path);
+  CleanupZopfliCostModel(m, &model);
+  return ComputeShortestPathFromNodes(num_bytes, nodes);
 }
 
-template<typename Hasher>
-void CreateBackwardReferences(size_t num_bytes,
-                              size_t position,
-                              bool is_last,
-                              const uint8_t* ringbuffer,
-                              size_t ringbuffer_mask,
-                              const int quality,
-                              const int lgwin,
-                              Hasher* hasher,
-                              int* dist_cache,
-                              size_t* last_insert_len,
-                              Command* commands,
-                              size_t* num_commands,
-                              size_t* num_literals) {
-  // Set maximum distance, see section 9.1. of the spec.
-  const size_t max_backward_limit = (1 << lgwin) - 16;
-
-  // Choose which init method is faster.
-  // memset is about 100 times faster than hasher->InitForData().
-  const size_t kMaxBytesForPartialHashInit = Hasher::kHashMapSize >> 7;
-  if (position == 0 && is_last && num_bytes <= kMaxBytesForPartialHashInit) {
-    hasher->InitForData(ringbuffer, num_bytes);
-  } else {
-    hasher->Init();
-  }
-  if (num_bytes >= 3 && position >= 3) {
-    // Prepare the hashes for three last bytes of the last write.
-    // These could not be calculated before, since they require knowledge
-    // of both the previous and the current block.
-    hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
-                  static_cast<uint32_t>(position - 3));
-    hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
-                  static_cast<uint32_t>(position - 2));
-    hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
-                  static_cast<uint32_t>(position - 1));
-  }
-  const Command * const orig_commands = commands;
-  size_t insert_length = *last_insert_len;
-  size_t i = position & ringbuffer_mask;
-  const size_t i_diff = position - i;
-  const size_t i_end = i + num_bytes;
-
-  // For speed up heuristics for random data.
-  const size_t random_heuristics_window_size = quality < 9 ? 64 : 512;
-  size_t apply_random_heuristics = i + random_heuristics_window_size;
-
-  // Minimum score to accept a backward reference.
-  const double kMinScore = 4.0;
-
-  while (i + Hasher::kHashTypeLength - 1 < i_end) {
-    size_t max_length = i_end - i;
-    size_t max_distance = std::min(i + i_diff, max_backward_limit);
-    size_t best_len = 0;
-    size_t best_len_code = 0;
-    size_t best_dist = 0;
-    double best_score = kMinScore;
-    bool match_found = hasher->FindLongestMatch(
-        ringbuffer, ringbuffer_mask,
-        dist_cache, static_cast<uint32_t>(i + i_diff), max_length, max_distance,
-        &best_len, &best_len_code, &best_dist, &best_score);
-    if (match_found) {
-      // Found a match. Let's look for something even better ahead.
-      int delayed_backward_references_in_row = 0;
-      for (;;) {
-        --max_length;
-        size_t best_len_2 =
-            quality < 5 ? std::min(best_len - 1, max_length) : 0;
-        size_t best_len_code_2 = 0;
-        size_t best_dist_2 = 0;
-        double best_score_2 = kMinScore;
-        max_distance = std::min(i + i_diff + 1, max_backward_limit);
-        match_found = hasher->FindLongestMatch(
-            ringbuffer, ringbuffer_mask,
-            dist_cache, static_cast<uint32_t>(i + i_diff + 1),
-            max_length, max_distance,
-            &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
-        double cost_diff_lazy = 7.0;
-        if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
-          // Ok, let's just write one byte for now and start a match from the
-            // next byte.
-          ++i;
-          ++insert_length;
-          best_len = best_len_2;
-          best_len_code = best_len_code_2;
-          best_dist = best_dist_2;
-          best_score = best_score_2;
-          if (++delayed_backward_references_in_row < 4) {
-            continue;
-          }
-        }
-        break;
-      }
-      apply_random_heuristics =
-          i + 2 * best_len + random_heuristics_window_size;
-      max_distance = std::min(i + i_diff, max_backward_limit);
-      // The first 16 codes are special shortcodes, and the minimum offset is 1.
-      size_t distance_code =
-          ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
-      if (best_dist <= max_distance && distance_code > 0) {
-        dist_cache[3] = dist_cache[2];
-        dist_cache[2] = dist_cache[1];
-        dist_cache[1] = dist_cache[0];
-        dist_cache[0] = static_cast<int>(best_dist);
-      }
-      Command cmd(insert_length, best_len, best_len_code, distance_code);
-      *commands++ = cmd;
-      *num_literals += insert_length;
-      insert_length = 0;
-      // Put the hash keys into the table, if there are enough
-      // bytes left.
-      for (size_t j = 2; j < best_len; ++j) {
-        hasher->Store(&ringbuffer[i + j],
-                      static_cast<uint32_t>(i + i_diff + j));
-      }
-      i += best_len;
+#define EXPAND_CAT(a, b) CAT(a, b)
+#define CAT(a, b) a ## b
+#define FN(X) EXPAND_CAT(X, HASHER())
+
+#define HASHER() H2
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H3
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H4
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H5
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H6
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H7
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H8
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#define HASHER() H9
+/* NOLINTNEXTLINE(build/include) */
+#include "./backward_references_inc.h"
+#undef HASHER
+
+#undef FN
+#undef CAT
+#undef EXPAND_CAT
+
+void BrotliCreateBackwardReferences(MemoryManager* m,
+                                    size_t num_bytes,
+                                    size_t position,
+                                    int is_last,
+                                    const uint8_t* ringbuffer,
+                                    size_t ringbuffer_mask,
+                                    const int quality,
+                                    const int lgwin,
+                                    Hashers* hashers,
+                                    int hash_type,
+                                    int* dist_cache,
+                                    size_t* last_insert_len,
+                                    Command* commands,
+                                    size_t* num_commands,
+                                    size_t* num_literals) {
+  if (quality > 9) {  /* Zopflify. */
+    H10* hasher = hashers->hash_h10;
+    const size_t max_backward_limit = MaxBackwardLimit(lgwin);
+    InitH10(m, hasher, ringbuffer, lgwin, position, num_bytes, is_last);
+    if (BROTLI_IS_OOM(m)) return;
+    StitchToPreviousBlockH10(hasher, num_bytes, position,
+                             ringbuffer, ringbuffer_mask);
+    /* Set maximum distance, see section 9.1. of the spec. */
+    if (quality == 10) {
+      ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
+      if (BROTLI_IS_OOM(m)) return;
+      BrotliInitZopfliNodes(nodes, num_bytes + 1);
+      *num_commands += BrotliZopfliComputeShortestPath(m, num_bytes, position,
+          ringbuffer, ringbuffer_mask, quality, max_backward_limit, dist_cache,
+          hasher, nodes);
+      if (BROTLI_IS_OOM(m)) return;
+      BrotliZopfliCreateCommands(num_bytes, position, max_backward_limit, nodes,
+          dist_cache, last_insert_len, commands, num_literals);
+      BROTLI_FREE(m, nodes);
+      return;
     } else {
-      ++insert_length;
-      ++i;
-      // If we have not seen matches for a long time, we can skip some
-      // match lookups. Unsuccessful match lookups are very very expensive
-      // and this kind of a heuristic speeds up compression quite
-      // a lot.
-      if (i > apply_random_heuristics) {
-        // Going through uncompressible data, jump.
-        if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
-          // It is quite a long time since we saw a copy, so we assume
-          // that this data is not compressible, and store hashes less
-          // often. Hashes of non compressible data are less likely to
-          // turn out to be useful in the future, too, so we store less of
-          // them to not to flood out the hash table of good compressible
-          // data.
-          size_t i_jump = std::min(i + 16, i_end - 4);
-          for (; i < i_jump; i += 4) {
-            hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
-            insert_length += 4;
-          }
-        } else {
-          size_t i_jump = std::min(i + 8, i_end - 3);
-          for (; i < i_jump; i += 2) {
-            hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
-            insert_length += 2;
+      uint32_t* num_matches = BROTLI_ALLOC(m, uint32_t, num_bytes);
+      size_t matches_size = 4 * num_bytes;
+      BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
+      const size_t store_end = num_bytes >= StoreLookaheadH10() ?
+          position + num_bytes - StoreLookaheadH10() + 1 : position;
+      size_t cur_match_pos = 0;
+      size_t i;
+      size_t orig_num_literals;
+      size_t orig_last_insert_len;
+      int orig_dist_cache[4];
+      size_t orig_num_commands;
+      ZopfliCostModel model;
+      ZopfliNode* nodes;
+      if (BROTLI_IS_OOM(m)) return;
+      for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
+        const size_t pos = position + i;
+        size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
+        size_t max_length = num_bytes - i;
+        size_t num_found_matches;
+        size_t cur_match_end;
+        size_t j;
+        /* Ensure that we have enough free slots. */
+        BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
+            cur_match_pos + MAX_NUM_MATCHES_H10);
+        if (BROTLI_IS_OOM(m)) return;
+        num_found_matches = FindAllMatchesH10(hasher, ringbuffer,
+            ringbuffer_mask, pos, max_length, max_distance, quality,
+            &matches[cur_match_pos]);
+        cur_match_end = cur_match_pos + num_found_matches;
+        for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
+          assert(BackwardMatchLength(&matches[j]) <
+              BackwardMatchLength(&matches[j + 1]));
+          assert(matches[j].distance > max_distance ||
+                 matches[j].distance <= matches[j + 1].distance);
+        }
+        num_matches[i] = (uint32_t)num_found_matches;
+        if (num_found_matches > 0) {
+          const size_t match_len =
+              BackwardMatchLength(&matches[cur_match_end - 1]);
+          if (match_len > kMaxZopfliLenQuality11) {
+            const size_t skip = match_len - 1;
+            matches[cur_match_pos++] = matches[cur_match_end - 1];
+            num_matches[i] = 1;
+            /* Add the tail of the copy to the hasher. */
+            StoreRangeH10(hasher, ringbuffer, ringbuffer_mask, pos + 1,
+                          BROTLI_MIN(size_t, pos + match_len, store_end));
+            memset(&num_matches[i + 1], 0, skip * sizeof(num_matches[0]));
+            i += skip;
+          } else {
+            cur_match_pos = cur_match_end;
           }
         }
       }
-    }
-  }
-  insert_length += i_end - i;
-  *last_insert_len = insert_length;
-  *num_commands += static_cast<size_t>(commands - orig_commands);
-}
-
-void CreateBackwardReferences(size_t num_bytes,
-                              size_t position,
-                              bool is_last,
-                              const uint8_t* ringbuffer,
-                              size_t ringbuffer_mask,
-                              const int quality,
-                              const int lgwin,
-                              Hashers* hashers,
-                              int hash_type,
-                              int* dist_cache,
-                              size_t* last_insert_len,
-                              Command* commands,
-                              size_t* num_commands,
-                              size_t* num_literals) {
-  bool zopflify = quality > 9;
-  if (zopflify) {
-    Hashers::H10* hasher = hashers->hash_h10;
-    hasher->Init(lgwin, position, num_bytes, is_last);
-    hasher->StitchToPreviousBlock(num_bytes, position,
-                                  ringbuffer, ringbuffer_mask);
-    // Set maximum distance, see section 9.1. of the spec.
-    const size_t max_backward_limit = (1 << lgwin) - 16;
-    if (quality == 10) {
-      std::vector<ZopfliNode> nodes(num_bytes + 1);
-      std::vector<uint32_t> path;
-      ZopfliComputeShortestPath(num_bytes, position,
-                                ringbuffer, ringbuffer_mask,
-                                max_backward_limit, dist_cache, hasher,
-                                &nodes[0], &path);
-      ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
-                           &nodes[0], dist_cache, last_insert_len, commands,
-                           num_literals);
-      *num_commands += path.size();
-      return;
-    }
-    std::vector<uint32_t> num_matches(num_bytes);
-    std::vector<BackwardMatch> matches(4 * num_bytes);
-    size_t cur_match_pos = 0;
-    for (size_t i = 0; i + 3 < num_bytes; ++i) {
-      size_t max_distance = std::min(position + i, max_backward_limit);
-      size_t max_length = num_bytes - i;
-      // Ensure that we have enough free slots.
-      if (matches.size() < cur_match_pos + Hashers::H10::kMaxNumMatches) {
-        matches.resize(cur_match_pos + Hashers::H10::kMaxNumMatches);
-      }
-      size_t num_found_matches = hasher->FindAllMatches(
-          ringbuffer, ringbuffer_mask, position + i, max_length, max_distance,
-          &matches[cur_match_pos]);
-      const size_t cur_match_end = cur_match_pos + num_found_matches;
-      for (size_t j = cur_match_pos; j + 1 < cur_match_end; ++j) {
-        assert(matches[j].length() < matches[j + 1].length());
-        assert(matches[j].distance > max_distance ||
-               matches[j].distance <= matches[j + 1].distance);
-      }
-      num_matches[i] = static_cast<uint32_t>(num_found_matches);
-      if (num_found_matches > 0) {
-        const size_t match_len = matches[cur_match_end - 1].length();
-        if (match_len > kMaxZopfliLen) {
-          matches[cur_match_pos++] = matches[cur_match_end - 1];
-          num_matches[i] = 1;
-          for (size_t j = 1; j < match_len; ++j) {
-            ++i;
-            if (match_len - j < 64 && num_bytes - i >= kMaxTreeCompLength) {
-              hasher->Store(ringbuffer, ringbuffer_mask, position + i);
-            }
-            num_matches[i] = 0;
-          }
+      orig_num_literals = *num_literals;
+      orig_last_insert_len = *last_insert_len;
+      memcpy(orig_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
+      orig_num_commands = *num_commands;
+      nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
+      if (BROTLI_IS_OOM(m)) return;
+      InitZopfliCostModel(m, &model, num_bytes);
+      if (BROTLI_IS_OOM(m)) return;
+      for (i = 0; i < 2; i++) {
+        BrotliInitZopfliNodes(nodes, num_bytes + 1);
+        if (i == 0) {
+          ZopfliCostModelSetFromLiteralCosts(
+              &model, position, ringbuffer, ringbuffer_mask);
         } else {
-          cur_match_pos = cur_match_end;
+          ZopfliCostModelSetFromCommands(&model, position, ringbuffer,
+              ringbuffer_mask, commands, *num_commands - orig_num_commands,
+              orig_last_insert_len);
         }
+        *num_commands = orig_num_commands;
+        *num_literals = orig_num_literals;
+        *last_insert_len = orig_last_insert_len;
+        memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
+        *num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
+            ringbuffer_mask, quality, max_backward_limit, dist_cache, &model,
+            num_matches, matches, nodes);
+        BrotliZopfliCreateCommands(num_bytes, position, max_backward_limit,
+            nodes, dist_cache, last_insert_len, commands, num_literals);
       }
-    }
-    size_t orig_num_literals = *num_literals;
-    size_t orig_last_insert_len = *last_insert_len;
-    int orig_dist_cache[4] = {
-      dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3]
-    };
-    size_t orig_num_commands = *num_commands;
-    static const size_t kIterations = 2;
-    for (size_t i = 0; i < kIterations; i++) {
-      ZopfliCostModel model;
-      if (i == 0) {
-        model.SetFromLiteralCosts(num_bytes, position,
-                                  ringbuffer, ringbuffer_mask);
-      } else {
-        model.SetFromCommands(num_bytes, position,
-                              ringbuffer, ringbuffer_mask,
-                              commands, *num_commands - orig_num_commands,
-                              orig_last_insert_len);
-      }
-      *num_commands = orig_num_commands;
-      *num_literals = orig_num_literals;
-      *last_insert_len = orig_last_insert_len;
-      memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
-      std::vector<ZopfliNode> nodes(num_bytes + 1);
-      std::vector<uint32_t> path;
-      ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
-                    max_backward_limit, dist_cache, model, num_matches, matches,
-                    &nodes[0], &path);
-      ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
-                           &nodes[0], dist_cache, last_insert_len, commands,
-                           num_literals);
-      *num_commands += path.size();
+      CleanupZopfliCostModel(m, &model);
+      BROTLI_FREE(m, nodes);
+      BROTLI_FREE(m, matches);
+      BROTLI_FREE(m, num_matches);
     }
     return;
   }
 
   switch (hash_type) {
     case 2:
-      CreateBackwardReferences<Hashers::H2>(
-          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+      CreateBackwardReferencesH2(
+          m, num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
           quality, lgwin, hashers->hash_h2, dist_cache,
           last_insert_len, commands, num_commands, num_literals);
       break;
     case 3:
-      CreateBackwardReferences<Hashers::H3>(
-          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+      CreateBackwardReferencesH3(
+          m, num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
           quality, lgwin, hashers->hash_h3, dist_cache,
           last_insert_len, commands, num_commands, num_literals);
       break;
     case 4:
-      CreateBackwardReferences<Hashers::H4>(
-          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+      CreateBackwardReferencesH4(
+          m, num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
           quality, lgwin, hashers->hash_h4, dist_cache,
           last_insert_len, commands, num_commands, num_literals);
       break;
     case 5:
-      CreateBackwardReferences<Hashers::H5>(
-          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+      CreateBackwardReferencesH5(
+          m, num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
           quality, lgwin, hashers->hash_h5, dist_cache,
           last_insert_len, commands, num_commands, num_literals);
       break;
     case 6:
-      CreateBackwardReferences<Hashers::H6>(
-          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+      CreateBackwardReferencesH6(
+          m, num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
           quality, lgwin, hashers->hash_h6, dist_cache,
           last_insert_len, commands, num_commands, num_literals);
       break;
     case 7:
-      CreateBackwardReferences<Hashers::H7>(
-          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+      CreateBackwardReferencesH7(
+          m, num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
           quality, lgwin, hashers->hash_h7, dist_cache,
           last_insert_len, commands, num_commands, num_literals);
       break;
     case 8:
-      CreateBackwardReferences<Hashers::H8>(
-          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+      CreateBackwardReferencesH8(
+          m, num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
           quality, lgwin, hashers->hash_h8, dist_cache,
           last_insert_len, commands, num_commands, num_literals);
       break;
     case 9:
-      CreateBackwardReferences<Hashers::H9>(
-          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+      CreateBackwardReferencesH9(
+          m, num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
           quality, lgwin, hashers->hash_h9, dist_cache,
           last_insert_len, commands, num_commands, num_literals);
       break;
     default:
       break;
   }
+  if (BROTLI_IS_OOM(m)) return;
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/backward_references.h b/enc/backward_references.h
index 9619f5f..84ad827 100644
--- a/enc/backward_references.h
+++ b/enc/backward_references.h
@@ -9,63 +9,37 @@
 #ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
 #define BROTLI_ENC_BACKWARD_REFERENCES_H_
 
-#include <vector>
-
 #include "../common/types.h"
 #include "./command.h"
 #include "./hash.h"
+#include "./memory.h"
+#include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* "commands" points to the next output command to write to, "*num_commands" is
    initially the total amount of commands output by previous
    CreateBackwardReferences calls, and must be incremented by the amount written
    by this call. */
-void CreateBackwardReferences(size_t num_bytes,
-                              size_t position,
-                              bool is_last,
-                              const uint8_t* ringbuffer,
-                              size_t ringbuffer_mask,
-                              const int quality,
-                              const int lgwin,
-                              Hashers* hashers,
-                              int hash_type,
-                              int* dist_cache,
-                              size_t* last_insert_len,
-                              Command* commands,
-                              size_t* num_commands,
-                              size_t* num_literals);
-
-static const float kInfinity = std::numeric_limits<float>::infinity();
-
-struct ZopfliNode {
-  ZopfliNode(void) : length(1),
-                     distance(0),
-                     insert_length(0),
-                     cost(kInfinity) {}
-
-  inline uint32_t copy_length() const {
-    return length & 0xffffff;
-  }
-
-  inline uint32_t length_code() const {
-    const uint32_t modifier = length >> 24;
-    return copy_length() + 9u - modifier;
-  }
-
-  inline uint32_t copy_distance() const {
-    return distance & 0x1ffffff;
-  }
-
-  inline uint32_t distance_code() const {
-    const uint32_t short_code = distance >> 25;
-    return short_code == 0 ? copy_distance() + 15 : short_code - 1;
-  }
-
-  inline uint32_t command_length() const {
-    return copy_length() + insert_length;
-  }
-
+BROTLI_INTERNAL void BrotliCreateBackwardReferences(MemoryManager* m,
+                                                    size_t num_bytes,
+                                                    size_t position,
+                                                    int is_last,
+                                                    const uint8_t* ringbuffer,
+                                                    size_t ringbuffer_mask,
+                                                    const int quality,
+                                                    const int lgwin,
+                                                    Hashers* hashers,
+                                                    int hash_type,
+                                                    int* dist_cache,
+                                                    size_t* last_insert_len,
+                                                    Command* commands,
+                                                    size_t* num_commands,
+                                                    size_t* num_literals);
+
+typedef struct ZopfliNode {
   /* best length to get up to this byte (not including this byte itself)
      highest 8 bit is used to reconstruct the length code */
   uint32_t length;
@@ -75,9 +49,21 @@ struct ZopfliNode {
   uint32_t distance;
   /* number of literal inserts before this copy */
   uint32_t insert_length;
+
+  /* This union holds information used by dynamic-programming. During forward
+     pass |cost| it used to store the goal function. On path backtracing pass
+     |next| is assigned the offset to next node on the path. As |cost| is not
+     used after the forward pass, it shares the memory with |next|. */
+  union {
     /* Smallest cost to get to this byte from the beginning, as found so far. */
-  float cost;
-};
+    float cost;
+    /* Offset to the next node on the path. Equals to command_length() of the
+       next node on the path. For last node equals to BROTLI_UINT32_MAX */
+    uint32_t next;
+  } u;
+} ZopfliNode;
+
+BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
 
 /* Computes the shortest path of commands from position to at most
    position + num_bytes.
@@ -92,26 +78,28 @@ struct ZopfliNode {
      (1) nodes[i].copy_length() >= 2
      (2) nodes[i].command_length() <= i and
      (3) nodes[i - nodes[i].command_length()].cost < kInfinity */
-void ZopfliComputeShortestPath(size_t num_bytes,
-                               size_t position,
-                               const uint8_t* ringbuffer,
-                               size_t ringbuffer_mask,
-                               const size_t max_backward_limit,
-                               const int* dist_cache,
-                               Hashers::H10* hasher,
-                               ZopfliNode* nodes,
-                               std::vector<uint32_t>* path);
-
-void ZopfliCreateCommands(const size_t num_bytes,
-                          const size_t block_start,
-                          const size_t max_backward_limit,
-                          const std::vector<uint32_t>& path,
-                          const ZopfliNode* nodes,
-                          int* dist_cache,
-                          size_t* last_insert_len,
-                          Command* commands,
-                          size_t* num_literals);
-
-}  // namespace brotli
+BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
+    MemoryManager* m, size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask, const int quality,
+    const size_t max_backward_limit, const int* dist_cache, H10* hasher,
+    ZopfliNode* nodes);
+
+BROTLI_INTERNAL void BrotliZopfliCreateCommands(const size_t num_bytes,
+                                                const size_t block_start,
+                                                const size_t max_backward_limit,
+                                                const ZopfliNode* nodes,
+                                                int* dist_cache,
+                                                size_t* last_insert_len,
+                                                Command* commands,
+                                                size_t* num_literals);
+
+/* Maximum distance, see section 9.1. of the spec. */
+static BROTLI_INLINE size_t MaxBackwardLimit(int lgwin) {
+  return (1u << lgwin) - 16;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
diff --git a/enc/backward_references_inc.h b/enc/backward_references_inc.h
new file mode 100644
index 0000000..cc6ed3c
--- /dev/null
+++ b/enc/backward_references_inc.h
@@ -0,0 +1,156 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define Hasher HASHER()
+
+static void FN(CreateBackwardReferences)(MemoryManager* m,
+                                         size_t num_bytes,
+                                         size_t position,
+                                         int is_last,
+                                         const uint8_t* ringbuffer,
+                                         size_t ringbuffer_mask,
+                                         const int quality,
+                                         const int lgwin,
+                                         Hasher* hasher,
+                                         int* dist_cache,
+                                         size_t* last_insert_len,
+                                         Command* commands,
+                                         size_t* num_commands,
+                                         size_t* num_literals) {
+  /* Set maximum distance, see section 9.1. of the spec. */
+  const size_t max_backward_limit = MaxBackwardLimit(lgwin);
+
+  const Command * const orig_commands = commands;
+  size_t insert_length = *last_insert_len;
+  const size_t pos_end = position + num_bytes;
+  const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
+      position + num_bytes - FN(StoreLookahead)() + 1 : position;
+
+  /* For speed up heuristics for random data. */
+  const size_t random_heuristics_window_size = quality < 9 ? 64 : 512;
+  size_t apply_random_heuristics = position + random_heuristics_window_size;
+
+  /* Minimum score to accept a backward reference. */
+  const double kMinScore = 4.0;
+
+  FN(Init)(m, hasher, ringbuffer, lgwin, position, num_bytes, is_last);
+  if (BROTLI_IS_OOM(m)) return;
+  FN(StitchToPreviousBlock)(hasher, num_bytes, position,
+                            ringbuffer, ringbuffer_mask);
+
+  while (position + FN(HashTypeLength)() < pos_end) {
+    size_t max_length = pos_end - position;
+    size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
+    size_t best_len = 0;
+    size_t best_len_code = 0;
+    size_t best_dist = 0;
+    double best_score = kMinScore;
+    int is_match_found = FN(FindLongestMatch)(hasher, ringbuffer,
+        ringbuffer_mask, dist_cache, position, max_length, max_distance,
+        &best_len, &best_len_code, &best_dist, &best_score);
+    if (is_match_found) {
+      /* Found a match. Let's look for something even better ahead. */
+      int delayed_backward_references_in_row = 0;
+      --max_length;
+      for (;; --max_length) {
+        size_t best_len_2 =
+            quality < 5 ? BROTLI_MIN(size_t, best_len - 1, max_length) : 0;
+        size_t best_len_code_2 = 0;
+        size_t best_dist_2 = 0;
+        double best_score_2 = kMinScore;
+        const double cost_diff_lazy = 7.0;
+        max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
+        is_match_found = FN(FindLongestMatch)(hasher, ringbuffer,
+            ringbuffer_mask, dist_cache, position + 1, max_length, max_distance,
+            &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
+        if (is_match_found && best_score_2 >= best_score + cost_diff_lazy) {
+          /* Ok, let's just write one byte for now and start a match from the
+             next byte. */
+          ++position;
+          ++insert_length;
+          best_len = best_len_2;
+          best_len_code = best_len_code_2;
+          best_dist = best_dist_2;
+          best_score = best_score_2;
+          if (++delayed_backward_references_in_row < 4 &&
+              position + FN(HashTypeLength)() < pos_end) {
+            continue;
+          }
+        }
+        break;
+      }
+      apply_random_heuristics =
+          position + 2 * best_len + random_heuristics_window_size;
+      max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
+      {
+        /* The first 16 codes are special shortcodes,
+           and the minimum offset is 1. */
+        size_t distance_code =
+            ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
+        if (best_dist <= max_distance && distance_code > 0) {
+          dist_cache[3] = dist_cache[2];
+          dist_cache[2] = dist_cache[1];
+          dist_cache[1] = dist_cache[0];
+          dist_cache[0] = (int)best_dist;
+        }
+        InitCommand(
+            commands++, insert_length, best_len, best_len_code, distance_code);
+      }
+      *num_literals += insert_length;
+      insert_length = 0;
+      /* Put the hash keys into the table, if there are enough bytes left.
+         Depending on the hasher implementation, it can push all positions
+         in the given range or only a subset of them. */
+      FN(StoreRange)(hasher, ringbuffer, ringbuffer_mask, position + 2,
+                     BROTLI_MIN(size_t, position + best_len, store_end));
+      position += best_len;
+    } else {
+      ++insert_length;
+      ++position;
+      /* If we have not seen matches for a long time, we can skip some
+         match lookups. Unsuccessful match lookups are very very expensive
+         and this kind of a heuristic speeds up compression quite
+         a lot. */
+      if (position > apply_random_heuristics) {
+        /* Going through uncompressible data, jump. */
+        if (position >
+            apply_random_heuristics + 4 * random_heuristics_window_size) {
+          /* It is quite a long time since we saw a copy, so we assume
+             that this data is not compressible, and store hashes less
+             often. Hashes of non compressible data are less likely to
+             turn out to be useful in the future, too, so we store less of
+             them to not to flood out the hash table of good compressible
+             data. */
+          const size_t kMargin =
+              BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
+          size_t pos_jump =
+              BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
+          for (; position < pos_jump; position += 4) {
+            FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
+            insert_length += 4;
+          }
+        } else {
+          const size_t kMargin =
+              BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
+          size_t pos_jump =
+              BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
+          for (; position < pos_jump; position += 2) {
+            FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
+            insert_length += 2;
+          }
+        }
+      }
+    }
+  }
+  insert_length += pos_end - position;
+  *last_insert_len = insert_length;
+  *num_commands += (size_t)(commands - orig_commands);
+}
+
+#undef Hasher
diff --git a/enc/bit_cost.c b/enc/bit_cost.c
new file mode 100644
index 0000000..4d80ead
--- /dev/null
+++ b/enc/bit_cost.c
@@ -0,0 +1,35 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to estimate the bit cost of Huffman trees. */
+
+#include "./bit_cost.h"
+
+#include "../common/constants.h"
+#include "../common/types.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define FN(X) X ## Literal
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/bit_cost.h b/enc/bit_cost.h
index 086224f..eeeedde 100644
--- a/enc/bit_cost.h
+++ b/enc/bit_cost.h
@@ -10,13 +10,16 @@
 #define BROTLI_ENC_BIT_COST_H_
 
 #include "../common/types.h"
-#include "./entropy_encode.h"
 #include "./fast_log.h"
+#include "./histogram.h"
+#include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
-static inline double ShannonEntropy(const uint32_t *population, size_t size,
-                                    size_t *total) {
+static BROTLI_INLINE double ShannonEntropy(const uint32_t *population,
+                                           size_t size, size_t *total) {
   size_t sum = 0;
   double retval = 0;
   const uint32_t *population_end = population + size;
@@ -27,135 +30,34 @@ static inline double ShannonEntropy(const uint32_t *population, size_t size,
   while (population < population_end) {
     p = *population++;
     sum += p;
-    retval -= static_cast<double>(p) * FastLog2(p);
+    retval -= (double)p * FastLog2(p);
  odd_number_of_elements_left:
     p = *population++;
     sum += p;
-    retval -= static_cast<double>(p) * FastLog2(p);
+    retval -= (double)p * FastLog2(p);
   }
-  if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
+  if (sum) retval += (double)sum * FastLog2(sum);
   *total = sum;
   return retval;
 }
 
-static inline double BitsEntropy(const uint32_t *population, size_t size) {
+static BROTLI_INLINE double BitsEntropy(
+    const uint32_t *population, size_t size) {
   size_t sum;
   double retval = ShannonEntropy(population, size, &sum);
   if (retval < sum) {
     /* At least one bit per literal is needed. */
-    retval = static_cast<double>(sum);
+    retval = (double)sum;
   }
   return retval;
 }
 
-template<int kSize>
-double PopulationCost(const Histogram<kSize>& histogram) {
-  static const double kOneSymbolHistogramCost = 12;
-  static const double kTwoSymbolHistogramCost = 20;
-  static const double kThreeSymbolHistogramCost = 28;
-  static const double kFourSymbolHistogramCost = 37;
-  if (histogram.total_count_ == 0) {
-    return kOneSymbolHistogramCost;
-  }
-  int count = 0;
-  int s[5];
-  for (int i = 0; i < kSize; ++i) {
-    if (histogram.data_[i] > 0) {
-      s[count] = i;
-      ++count;
-      if (count > 4) break;
-    }
-  }
-  if (count == 1) {
-    return kOneSymbolHistogramCost;
-  }
-  if (count == 2) {
-    return (kTwoSymbolHistogramCost +
-            static_cast<double>(histogram.total_count_));
-  }
-  if (count == 3) {
-    const uint32_t histo0 = histogram.data_[s[0]];
-    const uint32_t histo1 = histogram.data_[s[1]];
-    const uint32_t histo2 = histogram.data_[s[2]];
-    const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
-    return (kThreeSymbolHistogramCost +
-            2 * (histo0 + histo1 + histo2) - histomax);
-  }
-  if (count == 4) {
-    uint32_t histo[4];
-    for (int i = 0; i < 4; ++i) {
-      histo[i] = histogram.data_[s[i]];
-    }
-    // Sort
-    for (int i = 0; i < 4; ++i) {
-      for (int j = i + 1; j < 4; ++j) {
-        if (histo[j] > histo[i]) {
-          std::swap(histo[j], histo[i]);
-        }
-      }
-    }
-    const uint32_t h23 = histo[2] + histo[3];
-    const uint32_t histomax = std::max(h23, histo[0]);
-    return (kFourSymbolHistogramCost +
-            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
-  }
-
-  // In this loop we compute the entropy of the histogram and simultaneously
-  // build a simplified histogram of the code length codes where we use the
-  // zero repeat code 17, but we don't use the non-zero repeat code 16.
-  double bits = 0;
-  size_t max_depth = 1;
-  uint32_t depth_histo[kCodeLengthCodes] = { 0 };
-  const double log2total = FastLog2(histogram.total_count_);
-  for (size_t i = 0; i < kSize;) {
-    if (histogram.data_[i] > 0) {
-      // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
-      //                          =  log2(total_count) - log2(count(symbol))
-      double log2p = log2total - FastLog2(histogram.data_[i]);
-      // Approximate the bit depth by round(-log2(P(symbol)))
-      size_t depth = static_cast<size_t>(log2p + 0.5);
-      bits += histogram.data_[i] * log2p;
-      if (depth > 15) {
-        depth = 15;
-      }
-      if (depth > max_depth) {
-        max_depth = depth;
-      }
-      ++depth_histo[depth];
-      ++i;
-    } else {
-      // Compute the run length of zeros and add the appropriate number of 0 and
-      // 17 code length codes to the code length code histogram.
-      uint32_t reps = 1;
-      for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
-        ++reps;
-      }
-      i += reps;
-      if (i == kSize) {
-        // Don't add any cost for the last zero run, since these are encoded
-        // only implicitly.
-        break;
-      }
-      if (reps < 3) {
-        depth_histo[0] += reps;
-      } else {
-        reps -= 2;
-        while (reps > 0) {
-          ++depth_histo[17];
-          // Add the 3 extra bits for the 17 code length code.
-          bits += 3;
-          reps >>= 3;
-        }
-      }
-    }
-  }
-  // Add the estimated encoding cost of the code length code histogram.
-  bits += static_cast<double>(18 + 2 * max_depth);
-  // Add the entropy of the code length code histogram.
-  bits += BitsEntropy(depth_histo, kCodeLengthCodes);
-  return bits;
-}
+BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
+BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
+BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_BIT_COST_H_ */
diff --git a/enc/bit_cost_inc.h b/enc/bit_cost_inc.h
new file mode 100644
index 0000000..453c226
--- /dev/null
+++ b/enc/bit_cost_inc.h
@@ -0,0 +1,127 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+double FN(BrotliPopulationCost)(const HistogramType* histogram) {
+  static const double kOneSymbolHistogramCost = 12;
+  static const double kTwoSymbolHistogramCost = 20;
+  static const double kThreeSymbolHistogramCost = 28;
+  static const double kFourSymbolHistogramCost = 37;
+  const size_t data_size = FN(HistogramDataSize)();
+  int count = 0;
+  size_t s[5];
+  double bits = 0.0;
+  size_t i;
+  if (histogram->total_count_ == 0) {
+    return kOneSymbolHistogramCost;
+  }
+  for (i = 0; i < data_size; ++i) {
+    if (histogram->data_[i] > 0) {
+      s[count] = i;
+      ++count;
+      if (count > 4) break;
+    }
+  }
+  if (count == 1) {
+    return kOneSymbolHistogramCost;
+  }
+  if (count == 2) {
+    return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
+  }
+  if (count == 3) {
+    const uint32_t histo0 = histogram->data_[s[0]];
+    const uint32_t histo1 = histogram->data_[s[1]];
+    const uint32_t histo2 = histogram->data_[s[2]];
+    const uint32_t histomax =
+        BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
+    return (kThreeSymbolHistogramCost +
+            2 * (histo0 + histo1 + histo2) - histomax);
+  }
+  if (count == 4) {
+    uint32_t histo[4];
+    uint32_t h23;
+    uint32_t histomax;
+    for (i = 0; i < 4; ++i) {
+      histo[i] = histogram->data_[s[i]];
+    }
+    /* Sort */
+    for (i = 0; i < 4; ++i) {
+      size_t j;
+      for (j = i + 1; j < 4; ++j) {
+        if (histo[j] > histo[i]) {
+          BROTLI_SWAP(uint32_t, histo, j, i);
+        }
+      }
+    }
+    h23 = histo[2] + histo[3];
+    histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
+    return (kFourSymbolHistogramCost +
+            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
+  }
+
+  {
+    /* In this loop we compute the entropy of the histogram and simultaneously
+       build a simplified histogram of the code length codes where we use the
+       zero repeat code 17, but we don't use the non-zero repeat code 16. */
+    size_t max_depth = 1;
+    uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
+    const double log2total = FastLog2(histogram->total_count_);
+    for (i = 0; i < data_size;) {
+      if (histogram->data_[i] > 0) {
+        /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
+                                    = log2(total_count) - log2(count(symbol)) */
+        double log2p = log2total - FastLog2(histogram->data_[i]);
+        /* Approximate the bit depth by round(-log2(P(symbol))) */
+        size_t depth = (size_t)(log2p + 0.5);
+        bits += histogram->data_[i] * log2p;
+        if (depth > 15) {
+          depth = 15;
+        }
+        if (depth > max_depth) {
+          max_depth = depth;
+        }
+        ++depth_histo[depth];
+        ++i;
+      } else {
+        /* Compute the run length of zeros and add the appropriate number of 0
+           and 17 code length codes to the code length code histogram. */
+        uint32_t reps = 1;
+        size_t k;
+        for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
+          ++reps;
+        }
+        i += reps;
+        if (i == data_size) {
+          /* Don't add any cost for the last zero run, since these are encoded
+             only implicitly. */
+          break;
+        }
+        if (reps < 3) {
+          depth_histo[0] += reps;
+        } else {
+          reps -= 2;
+          while (reps > 0) {
+            ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
+            /* Add the 3 extra bits for the 17 code length code. */
+            bits += 3;
+            reps >>= 3;
+          }
+        }
+      }
+    }
+    /* Add the estimated encoding cost of the code length code histogram. */
+    bits += (double)(18 + 2 * max_depth);
+    /* Add the entropy of the code length code histogram. */
+    bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
+  }
+  return bits;
+}
+
+#undef HistogramType
diff --git a/enc/block_encoder_inc.h b/enc/block_encoder_inc.h
new file mode 100644
index 0000000..2a08f90
--- /dev/null
+++ b/enc/block_encoder_inc.h
@@ -0,0 +1,33 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+/* Creates entropy codes for all block types and stores them to the bit
+   stream. */
+static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
+    const HistogramType* histograms, const size_t histograms_size,
+    HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) {
+  const size_t alphabet_size = self->alphabet_size_;
+  const size_t table_size = histograms_size * alphabet_size;
+  self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
+  self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  {
+    size_t i;
+    for (i = 0; i < histograms_size; ++i) {
+      size_t ix = i * alphabet_size;
+      BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size, tree,
+          &self->depths_[ix], &self->bits_[ix], storage_ix, storage);
+    }
+  }
+}
+
+#undef HistogramType
diff --git a/enc/block_splitter.c b/enc/block_splitter.c
index f98765c..57f379e 100644
--- a/enc/block_splitter.c
+++ b/enc/block_splitter.c
@@ -9,18 +9,19 @@
 #include "./block_splitter.h"
 
 #include <assert.h>
-#include <math.h>
-
-#include <algorithm>
-#include <cstring>
-#include <vector>
+#include <string.h>  /* memcpy, memset */
 
+#include "./bit_cost.h"
 #include "./cluster.h"
 #include "./command.h"
 #include "./fast_log.h"
 #include "./histogram.h"
+#include "./memory.h"
+#include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 static const size_t kMaxLiteralHistograms = 100;
 static const size_t kMaxCommandHistograms = 50;
@@ -36,45 +37,43 @@ static const size_t kMinLengthForBlockSplitting = 128;
 static const size_t kIterMulForRefining = 2;
 static const size_t kMinItersForRefining = 100;
 
-void CopyLiteralsToByteArray(const Command* cmds,
-                             const size_t num_commands,
-                             const uint8_t* data,
-                             const size_t offset,
-                             const size_t mask,
-                             std::vector<uint8_t>* literals) {
+static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
   /* Count how many we have. */
   size_t total_length = 0;
-  for (size_t i = 0; i < num_commands; ++i) {
+  size_t i;
+  for (i = 0; i < num_commands; ++i) {
     total_length += cmds[i].insert_len_;
   }
-  if (total_length == 0) {
-    return;
-  }
-
-  // Allocate.
-  literals->resize(total_length);
+  return total_length;
+}
 
-  // Loop again, and copy this time.
+static void CopyLiteralsToByteArray(const Command* cmds,
+                                    const size_t num_commands,
+                                    const uint8_t* data,
+                                    const size_t offset,
+                                    const size_t mask,
+                                    uint8_t* literals) {
   size_t pos = 0;
   size_t from_pos = offset & mask;
-  for (size_t i = 0; i < num_commands && pos < total_length; ++i) {
+  size_t i;
+  for (i = 0; i < num_commands; ++i) {
     size_t insert_len = cmds[i].insert_len_;
     if (from_pos + insert_len > mask) {
       size_t head_size = mask + 1 - from_pos;
-      memcpy(&(*literals)[pos], data + from_pos, head_size);
+      memcpy(literals + pos, data + from_pos, head_size);
       from_pos = 0;
       pos += head_size;
       insert_len -= head_size;
     }
     if (insert_len > 0) {
-      memcpy(&(*literals)[pos], data + from_pos, insert_len);
+      memcpy(literals + pos, data + from_pos, insert_len);
       pos += insert_len;
     }
-    from_pos = (from_pos + insert_len + cmds[i].copy_len()) & mask;
+    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
   }
 }
 
-inline static unsigned int MyRand(unsigned int* seed) {
+static BROTLI_INLINE unsigned int MyRand(unsigned int* seed) {
   *seed *= 16807U;
   if (*seed == 0) {
     *seed = 1;
@@ -82,424 +81,116 @@ inline static unsigned int MyRand(unsigned int* seed) {
   return *seed;
 }
 
-template<typename HistogramType, typename DataType>
-void InitialEntropyCodes(const DataType* data, size_t length,
-                         size_t stride,
-                         size_t num_histograms,
-                         HistogramType* histograms) {
-  for (size_t i = 0; i < num_histograms; ++i) {
-    histograms[i].Clear();
-  }
-  unsigned int seed = 7;
-  size_t block_length = length / num_histograms;
-  for (size_t i = 0; i < num_histograms; ++i) {
-    size_t pos = length * i / num_histograms;
-    if (i != 0) {
-      pos += MyRand(&seed) % block_length;
-    }
-    if (pos + stride >= length) {
-      pos = length - stride - 1;
-    }
-    histograms[i].Add(data + pos, stride);
-  }
-}
-
-template<typename HistogramType, typename DataType>
-void RandomSample(unsigned int* seed,
-                  const DataType* data,
-                  size_t length,
-                  size_t stride,
-                  HistogramType* sample) {
-  size_t pos = 0;
-  if (stride >= length) {
-    pos = 0;
-    stride = length;
-  } else {
-    pos = MyRand(seed) % (length - stride + 1);
-  }
-  sample->Add(data + pos, stride);
-}
-
-template<typename HistogramType, typename DataType>
-void RefineEntropyCodes(const DataType* data, size_t length,
-                        size_t stride,
-                        size_t num_histograms,
-                        HistogramType* histograms) {
-  size_t iters =
-      kIterMulForRefining * length / stride + kMinItersForRefining;
-  unsigned int seed = 7;
-  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
-  for (size_t iter = 0; iter < iters; ++iter) {
-    HistogramType sample;
-    RandomSample(&seed, data, length, stride, &sample);
-    size_t ix = iter % num_histograms;
-    histograms[ix].AddHistogram(sample);
-  }
-}
-
-inline static double BitCost(size_t count) {
+static BROTLI_INLINE double BitCost(size_t count) {
   return count == 0 ? -2.0 : FastLog2(count);
 }
 
-// Assigns a block id from the range [0, vec.size()) to each data element
-// in data[0..length) and fills in block_id[0..length) with the assigned values.
-// Returns the number of blocks, i.e. one plus the number of block switches.
-template<typename DataType, int kSize>
-size_t FindBlocks(const DataType* data, const size_t length,
-                  const double block_switch_bitcost,
-                  const size_t num_histograms,
-                  const Histogram<kSize>* histograms,
-                  double* insert_cost,
-                  double* cost,
-                  uint8_t* switch_signal,
-                  uint8_t *block_id) {
-  if (num_histograms <= 1) {
-    for (size_t i = 0; i < length; ++i) {
-      block_id[i] = 0;
-    }
-    return 1;
-  }
-  const size_t bitmaplen = (num_histograms + 7) >> 3;
-  assert(num_histograms <= 256);
-  memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * num_histograms);
-  for (size_t j = 0; j < num_histograms; ++j) {
-    insert_cost[j] = FastLog2(static_cast<uint32_t>(
-        histograms[j].total_count_));
-  }
-  for (size_t i = kSize; i != 0;) {
-    --i;
-    for (size_t j = 0; j < num_histograms; ++j) {
-      insert_cost[i * num_histograms + j] =
-          insert_cost[j] - BitCost(histograms[j].data_[i]);
-    }
-  }
-  memset(cost, 0, sizeof(cost[0]) * num_histograms);
-  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
-  // After each iteration of this loop, cost[k] will contain the difference
-  // between the minimum cost of arriving at the current byte position using
-  // entropy code k, and the minimum cost of arriving at the current byte
-  // position. This difference is capped at the block switch cost, and if it
-  // reaches block switch cost, it means that when we trace back from the last
-  // position, we need to switch here.
-  for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
-    size_t ix = byte_ix * bitmaplen;
-    size_t insert_cost_ix = data[byte_ix] * num_histograms;
-    double min_cost = 1e99;
-    for (size_t k = 0; k < num_histograms; ++k) {
-      // We are coding the symbol in data[byte_ix] with entropy code k.
-      cost[k] += insert_cost[insert_cost_ix + k];
-      if (cost[k] < min_cost) {
-        min_cost = cost[k];
-        block_id[byte_ix] = static_cast<uint8_t>(k);
-      }
-    }
-    double block_switch_cost = block_switch_bitcost;
-    // More blocks for the beginning.
-    if (byte_ix < 2000) {
-      block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
-    }
-    for (size_t k = 0; k < num_histograms; ++k) {
-      cost[k] -= min_cost;
-      if (cost[k] >= block_switch_cost) {
-        cost[k] = block_switch_cost;
-        const uint8_t mask = static_cast<uint8_t>(1u << (k & 7));
-        assert((k >> 3) < bitmaplen);
-        switch_signal[ix + (k >> 3)] |= mask;
-      }
-    }
-  }
-  // Now trace back from the last position and switch at the marked places.
-  size_t byte_ix = length - 1;
-  size_t ix = byte_ix * bitmaplen;
-  uint8_t cur_id = block_id[byte_ix];
-  size_t num_blocks = 1;
-  while (byte_ix > 0) {
-    --byte_ix;
-    ix -= bitmaplen;
-    const uint8_t mask = static_cast<uint8_t>(1u << (cur_id & 7));
-    assert((static_cast<size_t>(cur_id) >> 3) < bitmaplen);
-    if (switch_signal[ix + (cur_id >> 3)] & mask) {
-      if (cur_id != block_id[byte_ix]) {
-        cur_id = block_id[byte_ix];
-        ++num_blocks;
-      }
-    }
-    block_id[byte_ix] = cur_id;
-  }
-  return num_blocks;
-}
-
-static size_t RemapBlockIds(uint8_t* block_ids, const size_t length,
-                            uint16_t* new_id, const size_t num_histograms) {
-  static const uint16_t kInvalidId = 256;
-  for (size_t i = 0; i < num_histograms; ++i) {
-    new_id[i] = kInvalidId;
-  }
-  uint16_t next_id = 0;
-  for (size_t i = 0; i < length; ++i) {
-    assert(block_ids[i] < num_histograms);
-    if (new_id[block_ids[i]] == kInvalidId) {
-      new_id[block_ids[i]] = next_id++;
-    }
-  }
-  for (size_t i = 0; i < length; ++i) {
-    block_ids[i] = static_cast<uint8_t>(new_id[block_ids[i]]);
-    assert(block_ids[i] < num_histograms);
-  }
-  assert(next_id <= num_histograms);
-  return next_id;
-}
-
-template<typename HistogramType, typename DataType>
-void BuildBlockHistograms(const DataType* data, const size_t length,
-                          const uint8_t* block_ids,
-                          const size_t num_histograms,
-                          HistogramType* histograms) {
-  for (size_t i = 0; i < num_histograms; ++i) {
-    histograms[i].Clear();
-  }
-  for (size_t i = 0; i < length; ++i) {
-    histograms[block_ids[i]].Add(data[i]);
-  }
-}
-
-template<typename HistogramType, typename DataType>
-void ClusterBlocks(const DataType* data, const size_t length,
-                   const size_t num_blocks,
-                   uint8_t* block_ids,
-                   BlockSplit* split) {
-  static const size_t kMaxNumberOfBlockTypes = 256;
-  static const size_t kHistogramsPerBatch = 64;
-  static const size_t kClustersPerBatch = 16;
-  std::vector<uint32_t> histogram_symbols(num_blocks);
-  std::vector<uint32_t> block_lengths(num_blocks);
-
-  size_t block_idx = 0;
-  for (size_t i = 0; i < length; ++i) {
-    assert(block_idx < num_blocks);
-    ++block_lengths[block_idx];
-    if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
-      ++block_idx;
-    }
-  }
-  assert(block_idx == num_blocks);
-
-  const size_t expected_num_clusters =
-      kClustersPerBatch *
-      (num_blocks + kHistogramsPerBatch - 1) / kHistogramsPerBatch;
-  std::vector<HistogramType> all_histograms;
-  std::vector<uint32_t> cluster_size;
-  all_histograms.reserve(expected_num_clusters);
-  cluster_size.reserve(expected_num_clusters);
-  size_t num_clusters = 0;
-  std::vector<HistogramType> histograms(
-      std::min(num_blocks, kHistogramsPerBatch));
-  size_t max_num_pairs = kHistogramsPerBatch * kHistogramsPerBatch / 2;
-  std::vector<HistogramPair> pairs(max_num_pairs + 1);
-  size_t pos = 0;
-  for (size_t i = 0; i < num_blocks; i += kHistogramsPerBatch) {
-    const size_t num_to_combine = std::min(num_blocks - i, kHistogramsPerBatch);
-    uint32_t sizes[kHistogramsPerBatch];
-    uint32_t clusters[kHistogramsPerBatch];
-    uint32_t symbols[kHistogramsPerBatch];
-    uint32_t remap[kHistogramsPerBatch];
-    for (size_t j = 0; j < num_to_combine; ++j) {
-      histograms[j].Clear();
-      for (size_t k = 0; k < block_lengths[i + j]; ++k) {
-        histograms[j].Add(data[pos++]);
-      }
-      histograms[j].bit_cost_ = PopulationCost(histograms[j]);
-      symbols[j] = clusters[j] = static_cast<uint32_t>(j);
-      sizes[j] = 1;
-    }
-    size_t num_new_clusters = HistogramCombine(
-        &histograms[0], sizes, symbols, clusters, &pairs[0], num_to_combine,
-        num_to_combine, kHistogramsPerBatch, max_num_pairs);
-    for (size_t j = 0; j < num_new_clusters; ++j) {
-      all_histograms.push_back(histograms[clusters[j]]);
-      cluster_size.push_back(sizes[clusters[j]]);
-      remap[clusters[j]] = static_cast<uint32_t>(j);
-    }
-    for (size_t j = 0; j < num_to_combine; ++j) {
-      histogram_symbols[i + j] =
-          static_cast<uint32_t>(num_clusters) + remap[symbols[j]];
-    }
-    num_clusters += num_new_clusters;
-    assert(num_clusters == cluster_size.size());
-    assert(num_clusters == all_histograms.size());
-  }
-
-  max_num_pairs =
-      std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
-  pairs.resize(max_num_pairs + 1);
-
-  std::vector<uint32_t> clusters(num_clusters);
-  for (size_t i = 0; i < num_clusters; ++i) {
-    clusters[i] = static_cast<uint32_t>(i);
-  }
-  size_t num_final_clusters =
-      HistogramCombine(&all_histograms[0], &cluster_size[0],
-                       &histogram_symbols[0],
-                       &clusters[0], &pairs[0], num_clusters,
-                       num_blocks, kMaxNumberOfBlockTypes, max_num_pairs);
-
-  static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
-  std::vector<uint32_t> new_index(num_clusters, kInvalidIndex);
-  uint32_t next_index = 0;
-  pos = 0;
-  for (size_t i = 0; i < num_blocks; ++i) {
-    HistogramType histo;
-    for (size_t j = 0; j < block_lengths[i]; ++j) {
-      histo.Add(data[pos++]);
-    }
-    uint32_t best_out =
-        i == 0 ? histogram_symbols[0] : histogram_symbols[i - 1];
-    double best_bits = HistogramBitCostDistance(
-        histo, all_histograms[best_out]);
-    for (size_t j = 0; j < num_final_clusters; ++j) {
-      const double cur_bits = HistogramBitCostDistance(
-          histo, all_histograms[clusters[j]]);
-      if (cur_bits < best_bits) {
-        best_bits = cur_bits;
-        best_out = clusters[j];
-      }
-    }
-    histogram_symbols[i] = best_out;
-    if (new_index[best_out] == kInvalidIndex) {
-      new_index[best_out] = next_index++;
-    }
-  }
-  uint8_t max_type = 0;
-  uint32_t cur_length = 0;
-  block_idx = 0;
-  split->types.resize(num_blocks);
-  split->lengths.resize(num_blocks);
-  for (size_t i = 0; i < num_blocks; ++i) {
-    cur_length += block_lengths[i];
-    if (i + 1 == num_blocks ||
-        histogram_symbols[i] != histogram_symbols[i + 1]) {
-      const uint8_t id = static_cast<uint8_t>(new_index[histogram_symbols[i]]);
-      split->types[block_idx] = id;
-      split->lengths[block_idx] = cur_length;
-      max_type = std::max(max_type, id);
-      cur_length = 0;
-      ++block_idx;
-    }
-  }
-  split->types.resize(block_idx);
-  split->lengths.resize(block_idx);
-  split->num_types = static_cast<size_t>(max_type) + 1;
+#define HISTOGRAMS_PER_BATCH 64
+#define CLUSTERS_PER_BATCH 16
+
+#define FN(X) X ## Literal
+#define DataType uint8_t
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef DataType
+#undef FN
+
+#define FN(X) X ## Command
+#define DataType uint16_t
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef FN
+
+#define FN(X) X ## Distance
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef DataType
+#undef FN
+
+void BrotliInitBlockSplit(BlockSplit* self) {
+  self->num_types = 0;
+  self->num_blocks = 0;
+  self->types = 0;
+  self->lengths = 0;
+  self->types_alloc_size = 0;
+  self->lengths_alloc_size = 0;
 }
 
-template<int kSize, typename DataType>
-void SplitByteVector(const std::vector<DataType>& data,
-                     const size_t literals_per_histogram,
-                     const size_t max_histograms,
-                     const size_t sampling_stride_length,
-                     const double block_switch_cost,
-                     BlockSplit* split) {
-  if (data.empty()) {
-    split->num_types = 1;
-    return;
-  } else if (data.size() < kMinLengthForBlockSplitting) {
-    split->num_types = 1;
-    split->types.push_back(0);
-    split->lengths.push_back(static_cast<uint32_t>(data.size()));
-    return;
-  }
-  size_t num_histograms = data.size() / literals_per_histogram + 1;
-  if (num_histograms > max_histograms) {
-    num_histograms = max_histograms;
-  }
-  Histogram<kSize>* histograms = new Histogram<kSize>[num_histograms];
-  // Find good entropy codes.
-  InitialEntropyCodes(&data[0], data.size(),
-                      sampling_stride_length,
-                      num_histograms, histograms);
-  RefineEntropyCodes(&data[0], data.size(),
-                     sampling_stride_length,
-                     num_histograms, histograms);
-  // Find a good path through literals with the good entropy codes.
-  std::vector<uint8_t> block_ids(data.size());
-  size_t num_blocks;
-  const size_t bitmaplen = (num_histograms + 7) >> 3;
-  double* insert_cost = new double[kSize * num_histograms];
-  double *cost = new double[num_histograms];
-  uint8_t* switch_signal = new uint8_t[data.size() * bitmaplen];
-  uint16_t* new_id = new uint16_t[num_histograms];
-  for (size_t i = 0; i < 10; ++i) {
-    num_blocks = FindBlocks(&data[0], data.size(),
-                            block_switch_cost,
-                            num_histograms, histograms,
-                            insert_cost, cost, switch_signal,
-                            &block_ids[0]);
-    num_histograms = RemapBlockIds(&block_ids[0], data.size(),
-                                   new_id, num_histograms);
-    BuildBlockHistograms(&data[0], data.size(), &block_ids[0],
-                         num_histograms, histograms);
-  }
-  delete[] insert_cost;
-  delete[] cost;
-  delete[] switch_signal;
-  delete[] new_id;
-  delete[] histograms;
-  ClusterBlocks<Histogram<kSize> >(&data[0], data.size(), num_blocks,
-                                   &block_ids[0], split);
+void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
+  BROTLI_FREE(m, self->types);
+  BROTLI_FREE(m, self->lengths);
 }
 
-void SplitBlock(const Command* cmds,
-                const size_t num_commands,
-                const uint8_t* data,
-                const size_t pos,
-                const size_t mask,
-                BlockSplit* literal_split,
-                BlockSplit* insert_and_copy_split,
-                BlockSplit* dist_split) {
+void BrotliSplitBlock(MemoryManager* m,
+                      const Command* cmds,
+                      const size_t num_commands,
+                      const uint8_t* data,
+                      const size_t pos,
+                      const size_t mask,
+                      const int quality,
+                      BlockSplit* literal_split,
+                      BlockSplit* insert_and_copy_split,
+                      BlockSplit* dist_split) {
   {
+    size_t literals_count = CountLiterals(cmds, num_commands);
+    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
+    if (BROTLI_IS_OOM(m)) return;
     /* Create a continuous array of literals. */
-    std::vector<uint8_t> literals;
-    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
+    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
     /* Create the block split on the array of literals.
        Literal histograms have alphabet size 256. */
-    SplitByteVector<256>(
-        literals,
+    SplitByteVectorLiteral(
+        m, literals, literals_count,
         kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
-        kLiteralStrideLength, kLiteralBlockSwitchCost,
+        kLiteralStrideLength, kLiteralBlockSwitchCost, quality,
         literal_split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, literals);
   }
 
   {
     /* Compute prefix codes for commands. */
-    std::vector<uint16_t> insert_and_copy_codes(num_commands);
-    for (size_t i = 0; i < num_commands; ++i) {
+    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < num_commands; ++i) {
       insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
     }
     /* Create the block split on the array of command prefixes. */
-    SplitByteVector<kNumCommandPrefixes>(
-        insert_and_copy_codes,
+    SplitByteVectorCommand(
+        m, insert_and_copy_codes, num_commands,
         kSymbolsPerCommandHistogram, kMaxCommandHistograms,
-        kCommandStrideLength, kCommandBlockSwitchCost,
+        kCommandStrideLength, kCommandBlockSwitchCost, quality,
         insert_and_copy_split);
+    if (BROTLI_IS_OOM(m)) return;
+    /* TODO: reuse for distances? */
+    BROTLI_FREE(m, insert_and_copy_codes);
   }
 
   {
     /* Create a continuous array of distance prefixes. */
-    std::vector<uint16_t> distance_prefixes(num_commands);
-    size_t pos = 0;
-    for (size_t i = 0; i < num_commands; ++i) {
-      const Command& cmd = cmds[i];
-      if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
-        distance_prefixes[pos++] = cmd.dist_prefix_;
+    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
+    size_t j = 0;
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < num_commands; ++i) {
+      const Command* cmd = &cmds[i];
+      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+        distance_prefixes[j++] = cmd->dist_prefix_;
       }
     }
-    distance_prefixes.resize(pos);
     /* Create the block split on the array of distance prefixes. */
-    SplitByteVector<kNumDistancePrefixes>(
-        distance_prefixes,
+    SplitByteVectorDistance(
+        m, distance_prefixes, j,
         kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
-        kCommandStrideLength, kDistanceBlockSwitchCost,
+        kCommandStrideLength, kDistanceBlockSwitchCost, quality,
         dist_split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, distance_prefixes);
   }
 }
 
-}  // namespace brotli
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/block_splitter.h b/enc/block_splitter.h
index b9a34b6..1afd7bd 100644
--- a/enc/block_splitter.h
+++ b/enc/block_splitter.h
@@ -9,53 +9,42 @@
 #ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
 #define BROTLI_ENC_BLOCK_SPLITTER_H_
 
-#include <vector>
-
 #include "../common/types.h"
 #include "./command.h"
-#include "./metablock.h"
-
-namespace brotli {
-
-struct BlockSplitIterator {
-  explicit BlockSplitIterator(const BlockSplit& split)
-      : split_(split), idx_(0), type_(0), length_(0) {
-    if (!split.lengths.empty()) {
-      length_ = split.lengths[0];
-    }
-  }
-
-  void Next(void) {
-    if (length_ == 0) {
-      ++idx_;
-      type_ = split_.types[idx_];
-      length_ = split_.lengths[idx_];
-    }
-    --length_;
-  }
-
-  const BlockSplit& split_;
-  size_t idx_;
-  size_t type_;
-  size_t length_;
-};
-
-void CopyLiteralsToByteArray(const Command* cmds,
-                             const size_t num_commands,
-                             const uint8_t* data,
-                             const size_t offset,
-                             const size_t mask,
-                             std::vector<uint8_t>* literals);
-
-void SplitBlock(const Command* cmds,
-                const size_t num_commands,
-                const uint8_t* data,
-                const size_t offset,
-                const size_t mask,
-                BlockSplit* literal_split,
-                BlockSplit* insert_and_copy_split,
-                BlockSplit* dist_split);
-
-}  // namespace brotli
+#include "./memory.h"
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BlockSplit {
+  size_t num_types;  /* Amount of distinct types */
+  size_t num_blocks;  /* Amount of values in types and length */
+  uint8_t* types;
+  uint32_t* lengths;
+
+  size_t types_alloc_size;
+  size_t lengths_alloc_size;
+} BlockSplit;
+
+BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
+BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
+                                             BlockSplit* self);
+
+BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
+                                      const Command* cmds,
+                                      const size_t num_commands,
+                                      const uint8_t* data,
+                                      const size_t offset,
+                                      const size_t mask,
+                                      const int quality,
+                                      BlockSplit* literal_split,
+                                      BlockSplit* insert_and_copy_split,
+                                      BlockSplit* dist_split);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
diff --git a/enc/block_splitter_inc.h b/enc/block_splitter_inc.h
new file mode 100644
index 0000000..c2ef236
--- /dev/null
+++ b/enc/block_splitter_inc.h
@@ -0,0 +1,431 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, DataType */
+
+#define HistogramType FN(Histogram)
+
+static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
+                                    size_t stride,
+                                    size_t num_histograms,
+                                    HistogramType* histograms) {
+  unsigned int seed = 7;
+  size_t block_length = length / num_histograms;
+  size_t i;
+  FN(ClearHistograms)(histograms, num_histograms);
+  for (i = 0; i < num_histograms; ++i) {
+    size_t pos = length * i / num_histograms;
+    if (i != 0) {
+      pos += MyRand(&seed) % block_length;
+    }
+    if (pos + stride >= length) {
+      pos = length - stride - 1;
+    }
+    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
+  }
+}
+
+static void FN(RandomSample)(unsigned int* seed,
+                             const DataType* data,
+                             size_t length,
+                             size_t stride,
+                             HistogramType* sample) {
+  size_t pos = 0;
+  if (stride >= length) {
+    pos = 0;
+    stride = length;
+  } else {
+    pos = MyRand(seed) % (length - stride + 1);
+  }
+  FN(HistogramAddVector)(sample, data + pos, stride);
+}
+
+static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
+                                   size_t stride,
+                                   size_t num_histograms,
+                                   HistogramType* histograms) {
+  size_t iters =
+      kIterMulForRefining * length / stride + kMinItersForRefining;
+  unsigned int seed = 7;
+  size_t iter;
+  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
+  for (iter = 0; iter < iters; ++iter) {
+    HistogramType sample;
+    FN(HistogramClear)(&sample);
+    FN(RandomSample)(&seed, data, length, stride, &sample);
+    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
+  }
+}
+
+/* Assigns a block id from the range [0, vec.size()) to each data element
+   in data[0..length) and fills in block_id[0..length) with the assigned values.
+   Returns the number of blocks, i.e. one plus the number of block switches. */
+static size_t FN(FindBlocks)(const DataType* data, const size_t length,
+                             const double block_switch_bitcost,
+                             const size_t num_histograms,
+                             const HistogramType* histograms,
+                             double* insert_cost,
+                             double* cost,
+                             uint8_t* switch_signal,
+                             uint8_t *block_id) {
+  const size_t data_size = FN(HistogramDataSize)();
+  const size_t bitmaplen = (num_histograms + 7) >> 3;
+  size_t num_blocks = 1;
+  size_t i;
+  size_t j;
+  assert(num_histograms <= 256);
+  if (num_histograms <= 1) {
+    for (i = 0; i < length; ++i) {
+      block_id[i] = 0;
+    }
+    return 1;
+  }
+  memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms);
+  for (i = 0; i < num_histograms; ++i) {
+    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
+  }
+  for (i = data_size; i != 0;) {
+    --i;
+    for (j = 0; j < num_histograms; ++j) {
+      insert_cost[i * num_histograms + j] =
+          insert_cost[j] - BitCost(histograms[j].data_[i]);
+    }
+  }
+  memset(cost, 0, sizeof(cost[0]) * num_histograms);
+  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
+  /* After each iteration of this loop, cost[k] will contain the difference
+     between the minimum cost of arriving at the current byte position using
+     entropy code k, and the minimum cost of arriving at the current byte
+     position. This difference is capped at the block switch cost, and if it
+     reaches block switch cost, it means that when we trace back from the last
+     position, we need to switch here. */
+  for (i = 0; i < length; ++i) {
+    const size_t byte_ix = i;
+    size_t ix = byte_ix * bitmaplen;
+    size_t insert_cost_ix = data[byte_ix] * num_histograms;
+    double min_cost = 1e99;
+    double block_switch_cost = block_switch_bitcost;
+    size_t k;
+    for (k = 0; k < num_histograms; ++k) {
+      /* We are coding the symbol in data[byte_ix] with entropy code k. */
+      cost[k] += insert_cost[insert_cost_ix + k];
+      if (cost[k] < min_cost) {
+        min_cost = cost[k];
+        block_id[byte_ix] = (uint8_t)k;
+      }
+    }
+    /* More blocks for the beginning. */
+    if (byte_ix < 2000) {
+      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
+    }
+    for (k = 0; k < num_histograms; ++k) {
+      cost[k] -= min_cost;
+      if (cost[k] >= block_switch_cost) {
+        const uint8_t mask = (uint8_t)(1u << (k & 7));
+        cost[k] = block_switch_cost;
+        assert((k >> 3) < bitmaplen);
+        switch_signal[ix + (k >> 3)] |= mask;
+      }
+    }
+  }
+  {  /* Trace back from the last position and switch at the marked places. */
+    size_t byte_ix = length - 1;
+    size_t ix = byte_ix * bitmaplen;
+    uint8_t cur_id = block_id[byte_ix];
+    while (byte_ix > 0) {
+      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
+      assert(((size_t)cur_id >> 3) < bitmaplen);
+      --byte_ix;
+      ix -= bitmaplen;
+      if (switch_signal[ix + (cur_id >> 3)] & mask) {
+        if (cur_id != block_id[byte_ix]) {
+          cur_id = block_id[byte_ix];
+          ++num_blocks;
+        }
+      }
+      block_id[byte_ix] = cur_id;
+    }
+  }
+  return num_blocks;
+}
+
+static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
+                                uint16_t* new_id, const size_t num_histograms) {
+  static const uint16_t kInvalidId = 256;
+  uint16_t next_id = 0;
+  size_t i;
+  for (i = 0; i < num_histograms; ++i) {
+    new_id[i] = kInvalidId;
+  }
+  for (i = 0; i < length; ++i) {
+    assert(block_ids[i] < num_histograms);
+    if (new_id[block_ids[i]] == kInvalidId) {
+      new_id[block_ids[i]] = next_id++;
+    }
+  }
+  for (i = 0; i < length; ++i) {
+    block_ids[i] = (uint8_t)new_id[block_ids[i]];
+    assert(block_ids[i] < num_histograms);
+  }
+  assert(next_id <= num_histograms);
+  return next_id;
+}
+
+static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
+                                     const uint8_t* block_ids,
+                                     const size_t num_histograms,
+                                     HistogramType* histograms) {
+  size_t i;
+  FN(ClearHistograms)(histograms, num_histograms);
+  for (i = 0; i < length; ++i) {
+    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
+  }
+}
+
+static void FN(ClusterBlocks)(MemoryManager* m,
+                              const DataType* data, const size_t length,
+                              const size_t num_blocks,
+                              uint8_t* block_ids,
+                              BlockSplit* split) {
+  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
+  uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
+  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
+      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
+  size_t all_histograms_size = 0;
+  size_t all_histograms_capacity = expected_num_clusters;
+  HistogramType* all_histograms =
+      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
+  size_t cluster_size_size = 0;
+  size_t cluster_size_capacity = expected_num_clusters;
+  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
+  size_t num_clusters = 0;
+  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
+      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
+  size_t max_num_pairs =
+      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
+  size_t pairs_capacity = max_num_pairs + 1;
+  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
+  size_t pos = 0;
+  uint32_t* clusters;
+  size_t num_final_clusters;
+  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
+  uint32_t* new_index;
+  uint8_t max_type = 0;
+  size_t i;
+
+  if (BROTLI_IS_OOM(m)) return;
+
+  memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
+
+  {
+    size_t block_idx = 0;
+    for (i = 0; i < length; ++i) {
+      assert(block_idx < num_blocks);
+      ++block_lengths[block_idx];
+      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
+        ++block_idx;
+      }
+    }
+    assert(block_idx == num_blocks);
+  }
+
+  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
+    const size_t num_to_combine =
+        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
+    uint32_t sizes[HISTOGRAMS_PER_BATCH];
+    uint32_t new_clusters[HISTOGRAMS_PER_BATCH];
+    uint32_t symbols[HISTOGRAMS_PER_BATCH];
+    uint32_t remap[HISTOGRAMS_PER_BATCH];
+    size_t num_new_clusters;
+    size_t j;
+    for (j = 0; j < num_to_combine; ++j) {
+      size_t k;
+      FN(HistogramClear)(&histograms[j]);
+      for (k = 0; k < block_lengths[i + j]; ++k) {
+        FN(HistogramAdd)(&histograms[j], data[pos++]);
+      }
+      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
+      symbols[j] = new_clusters[j] = (uint32_t)j;
+      sizes[j] = 1;
+    }
+    num_new_clusters = FN(BrotliHistogramCombine)(
+        histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
+        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
+    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
+        all_histograms_capacity, all_histograms_size + num_new_clusters);
+    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
+        cluster_size_capacity, cluster_size_size + num_new_clusters);
+    if (BROTLI_IS_OOM(m)) return;
+    for (j = 0; j < num_new_clusters; ++j) {
+      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
+      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
+      remap[new_clusters[j]] = (uint32_t)j;
+    }
+    for (j = 0; j < num_to_combine; ++j) {
+      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
+    }
+    num_clusters += num_new_clusters;
+    assert(num_clusters == cluster_size_size);
+    assert(num_clusters == all_histograms_size);
+  }
+  BROTLI_FREE(m, histograms);
+
+  max_num_pairs =
+      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
+  if (pairs_capacity < max_num_pairs + 1) {
+    BROTLI_FREE(m, pairs);
+    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+
+  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < num_clusters; ++i) {
+    clusters[i] = (uint32_t)i;
+  }
+  num_final_clusters = FN(BrotliHistogramCombine)(
+      all_histograms, cluster_size, histogram_symbols, clusters, pairs,
+      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
+      max_num_pairs);
+  BROTLI_FREE(m, pairs);
+  BROTLI_FREE(m, cluster_size);
+
+  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
+  pos = 0;
+  {
+    uint32_t next_index = 0;
+    for (i = 0; i < num_blocks; ++i) {
+      HistogramType histo;
+      size_t j;
+      uint32_t best_out;
+      double best_bits;
+      FN(HistogramClear)(&histo);
+      for (j = 0; j < block_lengths[i]; ++j) {
+        FN(HistogramAdd)(&histo, data[pos++]);
+      }
+      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
+      best_bits =
+          FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
+      for (j = 0; j < num_final_clusters; ++j) {
+        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
+            &histo, &all_histograms[clusters[j]]);
+        if (cur_bits < best_bits) {
+          best_bits = cur_bits;
+          best_out = clusters[j];
+        }
+      }
+      histogram_symbols[i] = best_out;
+      if (new_index[best_out] == kInvalidIndex) {
+        new_index[best_out] = next_index++;
+      }
+    }
+  }
+  BROTLI_FREE(m, clusters);
+  BROTLI_FREE(m, all_histograms);
+  BROTLI_ENSURE_CAPACITY(
+      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
+  BROTLI_ENSURE_CAPACITY(
+      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  {
+    uint32_t cur_length = 0;
+    size_t block_idx = 0;
+    for (i = 0; i < num_blocks; ++i) {
+      cur_length += block_lengths[i];
+      if (i + 1 == num_blocks ||
+          histogram_symbols[i] != histogram_symbols[i + 1]) {
+        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
+        split->types[block_idx] = id;
+        split->lengths[block_idx] = cur_length;
+        max_type = BROTLI_MAX(uint8_t, max_type, id);
+        cur_length = 0;
+        ++block_idx;
+      }
+    }
+    split->num_blocks = block_idx;
+    split->num_types = (size_t)max_type + 1;
+  }
+  BROTLI_FREE(m, new_index);
+  BROTLI_FREE(m, block_lengths);
+  BROTLI_FREE(m, histogram_symbols);
+}
+
+static void FN(SplitByteVector)(MemoryManager* m,
+                                const DataType* data, const size_t length,
+                                const size_t literals_per_histogram,
+                                const size_t max_histograms,
+                                const size_t sampling_stride_length,
+                                const double block_switch_cost,
+                                const int quality,
+                                BlockSplit* split) {
+  const size_t data_size = FN(HistogramDataSize)();
+  size_t num_histograms = length / literals_per_histogram + 1;
+  HistogramType* histograms;
+  if (num_histograms > max_histograms) {
+    num_histograms = max_histograms;
+  }
+  if (length == 0) {
+    split->num_types = 1;
+    return;
+  } else if (length < kMinLengthForBlockSplitting) {
+    BROTLI_ENSURE_CAPACITY(m, uint8_t,
+        split->types, split->types_alloc_size, split->num_blocks + 1);
+    BROTLI_ENSURE_CAPACITY(m, uint32_t,
+        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
+    if (BROTLI_IS_OOM(m)) return;
+    split->num_types = 1;
+    split->types[split->num_blocks] = 0;
+    split->lengths[split->num_blocks] = (uint32_t)length;
+    split->num_blocks++;
+    return;
+  }
+  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
+  if (BROTLI_IS_OOM(m)) return;
+  /* Find good entropy codes. */
+  FN(InitialEntropyCodes)(data, length,
+                          sampling_stride_length,
+                          num_histograms, histograms);
+  FN(RefineEntropyCodes)(data, length,
+                         sampling_stride_length,
+                         num_histograms, histograms);
+  {
+    /* Find a good path through literals with the good entropy codes. */
+    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
+    size_t num_blocks;
+    const size_t bitmaplen = (num_histograms + 7) >> 3;
+    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
+    double* cost = BROTLI_ALLOC(m, double, num_histograms);
+    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
+    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
+    const size_t iters = quality <= 10 ? 3 : 10;
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < iters; ++i) {
+      num_blocks = FN(FindBlocks)(data, length,
+                                  block_switch_cost,
+                                  num_histograms, histograms,
+                                  insert_cost, cost, switch_signal,
+                                  block_ids);
+      num_histograms = FN(RemapBlockIds)(block_ids, length,
+                                         new_id, num_histograms);
+      FN(BuildBlockHistograms)(data, length, block_ids,
+                               num_histograms, histograms);
+    }
+    BROTLI_FREE(m, insert_cost);
+    BROTLI_FREE(m, cost);
+    BROTLI_FREE(m, switch_signal);
+    BROTLI_FREE(m, new_id);
+    BROTLI_FREE(m, histograms);
+    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, block_ids);
+  }
+}
+
+#undef HistogramType
diff --git a/enc/brotli_bit_stream.c b/enc/brotli_bit_stream.c
index df5a887..9b755f2 100644
--- a/enc/brotli_bit_stream.c
+++ b/enc/brotli_bit_stream.c
@@ -10,125 +10,174 @@
 
 #include "./brotli_bit_stream.h"
 
-#include <algorithm>
-#include <cstdlib>  /* free, malloc */
-#include <cstring>
-#include <limits>
-#include <vector>
+#include <string.h>  /* memcpy, memset */
 
+#include "../common/constants.h"
 #include "../common/types.h"
-#include "./bit_cost.h"
 #include "./context.h"
 #include "./entropy_encode.h"
 #include "./entropy_encode_static.h"
 #include "./fast_log.h"
-#include "./prefix.h"
+#include "./memory.h"
+#include "./port.h"
 #include "./write_bits.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_HUFFMAN_TREE_SIZE (2 * BROTLI_NUM_COMMAND_SYMBOLS + 1)
+
+/* Represents the range of values belonging to a prefix code:
+   [offset, offset + 2^nbits) */
+typedef struct PrefixCodeRange {
+  uint32_t offset;
+  uint32_t nbits;
+} PrefixCodeRange;
+
+static const PrefixCodeRange
+    kBlockLengthPrefixCode[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
+  { 1, 2}, { 5, 2}, { 9, 2}, {13, 2}, {17, 3}, { 25, 3}, { 33, 3},
+  {41, 3}, {49, 4}, {65, 4}, {81, 4}, {97, 4}, {113, 5}, {145, 5},
+  {177, 5}, { 209,  5}, { 241,  6}, { 305,  6}, { 369,  7}, {  497,  8},
+  {753, 9}, {1265, 10}, {2289, 11}, {4337, 12}, {8433, 13}, {16625, 24}
+};
+
+static BROTLI_INLINE uint32_t BlockLengthPrefixCode(uint32_t len) {
+  uint32_t code = (len >= 177) ? (len >= 753 ? 20 : 14) : (len >= 41 ? 7 : 0);
+  while (code < (BROTLI_NUM_BLOCK_LEN_SYMBOLS - 1) &&
+      len >= kBlockLengthPrefixCode[code + 1].offset) ++code;
+  return code;
+}
+
+static BROTLI_INLINE void GetBlockLengthPrefixCode(uint32_t len, size_t* code,
+    uint32_t* n_extra, uint32_t* extra) {
+  *code = BlockLengthPrefixCode(len);
+  *n_extra = kBlockLengthPrefixCode[*code].nbits;
+  *extra = len - kBlockLengthPrefixCode[*code].offset;
+}
+
+typedef struct BlockTypeCodeCalculator {
+  size_t last_type;
+  size_t second_last_type;
+} BlockTypeCodeCalculator;
 
-namespace {
+static void InitBlockTypeCodeCalculator(BlockTypeCodeCalculator* self) {
+  self->last_type = 1;
+  self->second_last_type = 0;
+}
 
-static const size_t kMaxHuffmanTreeSize = 2 * kNumCommandPrefixes + 1;
-// Context map alphabet has 256 context id symbols plus max 16 rle symbols.
-static const size_t kContextMapAlphabetSize = 256 + 16;
-// Block type alphabet has 256 block id symbols plus 2 special symbols.
-static const size_t kBlockTypeAlphabetSize = 256 + 2;
+static BROTLI_INLINE size_t NextBlockTypeCode(
+    BlockTypeCodeCalculator* calculator, uint8_t type) {
+  size_t type_code = (type == calculator->last_type + 1) ? 1u :
+      (type == calculator->second_last_type) ? 0u : type + 2u;
+  calculator->second_last_type = calculator->last_type;
+  calculator->last_type = type;
+  return type_code;
+}
 
 /* nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
    REQUIRES: length > 0
    REQUIRES: length <= (1 << 24) */
-void EncodeMlen(size_t length, uint64_t* bits,
-                size_t* numbits, uint64_t* nibblesbits) {
+static void BrotliEncodeMlen(size_t length, uint64_t* bits,
+                             size_t* numbits, uint64_t* nibblesbits) {
+  size_t lg = (length == 1) ? 1 : Log2FloorNonZero((uint32_t)(length - 1)) + 1;
+  size_t mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
   assert(length > 0);
   assert(length <= (1 << 24));
-  length--;  // MLEN - 1 is encoded
-  size_t lg = length == 0 ? 1 : Log2FloorNonZero(
-      static_cast<uint32_t>(length)) + 1;
   assert(lg <= 24);
-  size_t mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
   *nibblesbits = mnibbles - 4;
   *numbits = mnibbles * 4;
-  *bits = length;
+  *bits = length - 1;
 }
 
-static inline void StoreCommandExtra(
-    const Command& cmd, size_t* storage_ix, uint8_t* storage) {
-  uint32_t copylen_code = cmd.copy_len_code();
-  uint16_t inscode = GetInsertLengthCode(cmd.insert_len_);
+static BROTLI_INLINE void StoreCommandExtra(
+    const Command* cmd, size_t* storage_ix, uint8_t* storage) {
+  uint32_t copylen_code = CommandCopyLenCode(cmd);
+  uint16_t inscode = GetInsertLengthCode(cmd->insert_len_);
   uint16_t copycode = GetCopyLengthCode(copylen_code);
   uint32_t insnumextra = GetInsertExtra(inscode);
-  uint64_t insextraval = cmd.insert_len_ - GetInsertBase(inscode);
+  uint64_t insextraval = cmd->insert_len_ - GetInsertBase(inscode);
   uint64_t copyextraval = copylen_code - GetCopyBase(copycode);
   uint64_t bits = (copyextraval << insnumextra) | insextraval;
-  WriteBits(insnumextra + GetCopyExtra(copycode), bits, storage_ix, storage);
+  BrotliWriteBits(
+      insnumextra + GetCopyExtra(copycode), bits, storage_ix, storage);
 }
 
-}  // namespace
-
-void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
+/* Data structure that stores almost everything that is needed to encode each
+   block switch command. */
+typedef struct BlockSplitCode {
+  BlockTypeCodeCalculator type_code_calculator;
+  uint8_t type_depths[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint16_t type_bits[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint8_t length_depths[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+  uint16_t length_bits[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+} BlockSplitCode;
+
+/* Stores a number between 0 and 255. */
+static void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
   if (n == 0) {
-    WriteBits(1, 0, storage_ix, storage);
+    BrotliWriteBits(1, 0, storage_ix, storage);
   } else {
-    WriteBits(1, 1, storage_ix, storage);
     size_t nbits = Log2FloorNonZero(n);
-    WriteBits(3, nbits, storage_ix, storage);
-    WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
+    BrotliWriteBits(1, 1, storage_ix, storage);
+    BrotliWriteBits(3, nbits, storage_ix, storage);
+    BrotliWriteBits(nbits, n - (1u << nbits), storage_ix, storage);
   }
 }
 
 /* Stores the compressed meta-block header.
    REQUIRES: length > 0
    REQUIRES: length <= (1 << 24) */
-void StoreCompressedMetaBlockHeader(bool final_block,
-                                    size_t length,
-                                    size_t* storage_ix,
-                                    uint8_t* storage) {
+static void StoreCompressedMetaBlockHeader(int is_final_block,
+                                           size_t length,
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  uint64_t lenbits;
+  size_t nlenbits;
+  uint64_t nibblesbits;
+
   /* Write ISLAST bit. */
-  WriteBits(1, final_block, storage_ix, storage);
+  BrotliWriteBits(1, (uint64_t)is_final_block, storage_ix, storage);
   /* Write ISEMPTY bit. */
-  if (final_block) {
-    WriteBits(1, 0, storage_ix, storage);
+  if (is_final_block) {
+    BrotliWriteBits(1, 0, storage_ix, storage);
   }
 
-  uint64_t lenbits;
-  size_t nlenbits;
-  uint64_t nibblesbits;
-  EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
-  WriteBits(2, nibblesbits, storage_ix, storage);
-  WriteBits(nlenbits, lenbits, storage_ix, storage);
+  BrotliEncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
+  BrotliWriteBits(2, nibblesbits, storage_ix, storage);
+  BrotliWriteBits(nlenbits, lenbits, storage_ix, storage);
 
-  if (!final_block) {
+  if (!is_final_block) {
     /* Write ISUNCOMPRESSED bit. */
-    WriteBits(1, 0, storage_ix, storage);
+    BrotliWriteBits(1, 0, storage_ix, storage);
   }
 }
 
 /* Stores the uncompressed meta-block header.
    REQUIRES: length > 0
    REQUIRES: length <= (1 << 24) */
-void StoreUncompressedMetaBlockHeader(size_t length,
-                                      size_t* storage_ix,
-                                      uint8_t* storage) {
-  /* Write ISLAST bit.
-     Uncompressed block cannot be the last one, so set to 0. */
-  WriteBits(1, 0, storage_ix, storage);
+static void BrotliStoreUncompressedMetaBlockHeader(size_t length,
+                                                   size_t* storage_ix,
+                                                   uint8_t* storage) {
   uint64_t lenbits;
   size_t nlenbits;
   uint64_t nibblesbits;
-  EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
-  WriteBits(2, nibblesbits, storage_ix, storage);
-  WriteBits(nlenbits, lenbits, storage_ix, storage);
+
+  /* Write ISLAST bit.
+     Uncompressed block cannot be the last one, so set to 0. */
+  BrotliWriteBits(1, 0, storage_ix, storage);
+  BrotliEncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
+  BrotliWriteBits(2, nibblesbits, storage_ix, storage);
+  BrotliWriteBits(nlenbits, lenbits, storage_ix, storage);
   /* Write ISUNCOMPRESSED bit. */
-  WriteBits(1, 1, storage_ix, storage);
+  BrotliWriteBits(1, 1, storage_ix, storage);
 }
 
-void StoreHuffmanTreeOfHuffmanTreeToBitMask(
-    const int num_codes,
-    const uint8_t *code_length_bitdepth,
-    size_t *storage_ix,
-    uint8_t *storage) {
-  static const uint8_t kStorageOrder[kCodeLengthCodes] = {
+static void BrotliStoreHuffmanTreeOfHuffmanTreeToBitMask(
+    const int num_codes, const uint8_t* code_length_bitdepth,
+    size_t* storage_ix, uint8_t* storage) {
+  static const uint8_t kStorageOrder[BROTLI_CODE_LENGTH_CODES] = {
     1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
   };
   /* The bit lengths of the Huffman code over the code length alphabet
@@ -148,8 +197,10 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
     2, 4, 3, 2, 2, 4
   };
 
+  size_t skip_some = 0;  /* skips none. */
+
   /* Throw away trailing zeros: */
-  size_t codes_to_store = kCodeLengthCodes;
+  size_t codes_to_store = BROTLI_CODE_LENGTH_CODES;
   if (num_codes > 1) {
     for (; codes_to_store > 0; --codes_to_store) {
       if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
@@ -157,7 +208,6 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
       }
     }
   }
-  size_t skip_some = 0;  /* skips none. */
   if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
       code_length_bitdepth[kStorageOrder[1]] == 0) {
     skip_some = 2;  /* skips two. */
@@ -165,33 +215,34 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
       skip_some = 3;  /* skips three. */
     }
   }
-  WriteBits(2, skip_some, storage_ix, storage);
-  for (size_t i = skip_some; i < codes_to_store; ++i) {
-    size_t l = code_length_bitdepth[kStorageOrder[i]];
-    WriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
-              kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
+  BrotliWriteBits(2, skip_some, storage_ix, storage);
+  {
+    size_t i;
+    for (i = skip_some; i < codes_to_store; ++i) {
+      size_t l = code_length_bitdepth[kStorageOrder[i]];
+      BrotliWriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
+          kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
+    }
   }
 }
 
-static void StoreHuffmanTreeToBitMask(
-    const size_t huffman_tree_size,
-    const uint8_t* huffman_tree,
-    const uint8_t* huffman_tree_extra_bits,
-    const uint8_t* code_length_bitdepth,
+static void BrotliStoreHuffmanTreeToBitMask(
+    const size_t huffman_tree_size, const uint8_t* huffman_tree,
+    const uint8_t* huffman_tree_extra_bits, const uint8_t* code_length_bitdepth,
     const uint16_t* code_length_bitdepth_symbols,
-    size_t * __restrict storage_ix,
-    uint8_t * __restrict storage) {
-  for (size_t i = 0; i < huffman_tree_size; ++i) {
+    size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage) {
+  size_t i;
+  for (i = 0; i < huffman_tree_size; ++i) {
     size_t ix = huffman_tree[i];
-    WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
-              storage_ix, storage);
+    BrotliWriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
+                    storage_ix, storage);
     /* Extra bits */
     switch (ix) {
-      case 16:
-        WriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage);
+      case BROTLI_REPEAT_PREVIOUS_CODE_LENGTH:
+        BrotliWriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage);
         break;
-      case 17:
-        WriteBits(3, huffman_tree_extra_bits[i], storage_ix, storage);
+      case BROTLI_REPEAT_ZERO_CODE_LENGTH:
+        BrotliWriteBits(3, huffman_tree_extra_bits[i], storage_ix, storage);
         break;
     }
   }
@@ -203,59 +254,68 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths,
                                    size_t max_bits,
                                    size_t *storage_ix, uint8_t *storage) {
   /* value of 1 indicates a simple Huffman code */
-  WriteBits(2, 1, storage_ix, storage);
-  WriteBits(2, num_symbols - 1, storage_ix, storage);  /* NSYM - 1 */
+  BrotliWriteBits(2, 1, storage_ix, storage);
+  BrotliWriteBits(2, num_symbols - 1, storage_ix, storage);  /* NSYM - 1 */
 
+  {
     /* Sort */
-  for (size_t i = 0; i < num_symbols; i++) {
-    for (size_t j = i + 1; j < num_symbols; j++) {
-      if (depths[symbols[j]] < depths[symbols[i]]) {
-        std::swap(symbols[j], symbols[i]);
+    size_t i;
+    for (i = 0; i < num_symbols; i++) {
+      size_t j;
+      for (j = i + 1; j < num_symbols; j++) {
+        if (depths[symbols[j]] < depths[symbols[i]]) {
+          BROTLI_SWAP(size_t, symbols, j, i);
+        }
       }
     }
   }
 
   if (num_symbols == 2) {
-    WriteBits(max_bits, symbols[0], storage_ix, storage);
-    WriteBits(max_bits, symbols[1], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
   } else if (num_symbols == 3) {
-    WriteBits(max_bits, symbols[0], storage_ix, storage);
-    WriteBits(max_bits, symbols[1], storage_ix, storage);
-    WriteBits(max_bits, symbols[2], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
   } else {
-    WriteBits(max_bits, symbols[0], storage_ix, storage);
-    WriteBits(max_bits, symbols[1], storage_ix, storage);
-    WriteBits(max_bits, symbols[2], storage_ix, storage);
-    WriteBits(max_bits, symbols[3], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[3], storage_ix, storage);
     /* tree-select */
-    WriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
+    BrotliWriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
   }
 }
 
 /* num = alphabet size
    depths = symbol depths */
-void StoreHuffmanTree(const uint8_t* depths, size_t num,
-                      HuffmanTree* tree,
-                      size_t *storage_ix, uint8_t *storage) {
+void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
+                            HuffmanTree* tree,
+                            size_t *storage_ix, uint8_t *storage) {
   /* Write the Huffman tree into the brotli-representation.
      The command alphabet is the largest, so this allocation will fit all
      alphabets. */
-  assert(num <= kNumCommandPrefixes);
-  uint8_t huffman_tree[kNumCommandPrefixes];
-  uint8_t huffman_tree_extra_bits[kNumCommandPrefixes];
+  uint8_t huffman_tree[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint8_t huffman_tree_extra_bits[BROTLI_NUM_COMMAND_SYMBOLS];
   size_t huffman_tree_size = 0;
-  WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
-                   huffman_tree_extra_bits);
+  uint8_t code_length_bitdepth[BROTLI_CODE_LENGTH_CODES] = { 0 };
+  uint16_t code_length_bitdepth_symbols[BROTLI_CODE_LENGTH_CODES];
+  uint32_t huffman_tree_histogram[BROTLI_CODE_LENGTH_CODES] = { 0 };
+  size_t i;
+  int num_codes = 0;
+  size_t code = 0;
+
+  assert(num <= BROTLI_NUM_COMMAND_SYMBOLS);
+
+  BrotliWriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
+                         huffman_tree_extra_bits);
 
   /* Calculate the statistics of the Huffman tree in brotli-representation. */
-  uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 };
-  for (size_t i = 0; i < huffman_tree_size; ++i) {
+  for (i = 0; i < huffman_tree_size; ++i) {
     ++huffman_tree_histogram[huffman_tree[i]];
   }
 
-  int num_codes = 0;
-  int code = 0;
-  for (int i = 0; i < kCodeLengthCodes; ++i) {
+  for (i = 0; i < BROTLI_CODE_LENGTH_CODES; ++i) {
     if (huffman_tree_histogram[i]) {
       if (num_codes == 0) {
         code = i;
@@ -269,42 +329,43 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
 
   /* Calculate another Huffman tree to use for compressing both the
      earlier Huffman tree with. */
-  uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 };
-  uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = { 0 };
-  CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes,
-                    5, tree, &code_length_bitdepth[0]);
-  ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
-                            &code_length_bitdepth_symbols[0]);
+  BrotliCreateHuffmanTree(huffman_tree_histogram, BROTLI_CODE_LENGTH_CODES,
+                          5, tree, code_length_bitdepth);
+  BrotliConvertBitDepthsToSymbols(code_length_bitdepth,
+                                  BROTLI_CODE_LENGTH_CODES,
+                                  code_length_bitdepth_symbols);
 
   /* Now, we have all the data, let's start storing it */
-  StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
-                                         storage_ix, storage);
+  BrotliStoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
+                                               storage_ix, storage);
 
   if (num_codes == 1) {
     code_length_bitdepth[code] = 0;
   }
 
   /* Store the real huffman tree now. */
-  StoreHuffmanTreeToBitMask(huffman_tree_size,
-                            huffman_tree,
-                            huffman_tree_extra_bits,
-                            &code_length_bitdepth[0],
-                            code_length_bitdepth_symbols,
-                            storage_ix, storage);
+  BrotliStoreHuffmanTreeToBitMask(huffman_tree_size,
+                                  huffman_tree,
+                                  huffman_tree_extra_bits,
+                                  code_length_bitdepth,
+                                  code_length_bitdepth_symbols,
+                                  storage_ix, storage);
 }
 
 /* Builds a Huffman tree from histogram[0:length] into depth[0:length] and
    bits[0:length] and stores the encoded tree to the bit stream. */
-void BuildAndStoreHuffmanTree(const uint32_t *histogram,
-                              const size_t length,
-                              HuffmanTree* tree,
-                              uint8_t* depth,
-                              uint16_t* bits,
-                              size_t* storage_ix,
-                              uint8_t* storage) {
+static void BuildAndStoreHuffmanTree(const uint32_t *histogram,
+                                     const size_t length,
+                                     HuffmanTree* tree,
+                                     uint8_t* depth,
+                                     uint16_t* bits,
+                                     size_t* storage_ix,
+                                     uint8_t* storage) {
   size_t count = 0;
   size_t s4[4] = { 0 };
-  for (size_t i = 0; i < length; i++) {
+  size_t i;
+  size_t max_bits = 0;
+  for (i = 0; i < length; i++) {
     if (histogram[i]) {
       if (count < 4) {
         s4[count] = i;
@@ -315,41 +376,45 @@ void BuildAndStoreHuffmanTree(const uint32_t *histogram,
     }
   }
 
-  size_t max_bits_counter = length - 1;
-  size_t max_bits = 0;
-  while (max_bits_counter) {
-    max_bits_counter >>= 1;
-    ++max_bits;
+  {
+    size_t max_bits_counter = length - 1;
+    while (max_bits_counter) {
+      max_bits_counter >>= 1;
+      ++max_bits;
+    }
   }
 
   if (count <= 1) {
-    WriteBits(4, 1, storage_ix, storage);
-    WriteBits(max_bits, s4[0], storage_ix, storage);
+    BrotliWriteBits(4, 1, storage_ix, storage);
+    BrotliWriteBits(max_bits, s4[0], storage_ix, storage);
+    depth[s4[0]] = 0;
+    bits[s4[0]] = 0;
     return;
   }
 
-  CreateHuffmanTree(histogram, length, 15, tree, depth);
-  ConvertBitDepthsToSymbols(depth, length, bits);
+  memset(depth, 0, length * sizeof(depth[0]));
+  BrotliCreateHuffmanTree(histogram, length, 15, tree, depth);
+  BrotliConvertBitDepthsToSymbols(depth, length, bits);
 
   if (count <= 4) {
     StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
   } else {
-    StoreHuffmanTree(depth, length, tree, storage_ix, storage);
+    BrotliStoreHuffmanTree(depth, length, tree, storage_ix, storage);
   }
 }
 
-static inline bool SortHuffmanTree(const HuffmanTree& v0,
-                                   const HuffmanTree& v1) {
-  return v0.total_count_ < v1.total_count_;
+static BROTLI_INLINE int SortHuffmanTree(const HuffmanTree* v0,
+                                         const HuffmanTree* v1) {
+  return (v0->total_count_ < v1->total_count_) ? 1 : 0;
 }
 
-void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
-                                  const size_t histogram_total,
-                                  const size_t max_bits,
-                                  uint8_t* depth,
-                                  uint16_t* bits,
-                                  size_t* storage_ix,
-                                  uint8_t* storage) {
+void BrotliBuildAndStoreHuffmanTreeFast(MemoryManager* m,
+                                        const uint32_t* histogram,
+                                        const size_t histogram_total,
+                                        const size_t max_bits,
+                                        uint8_t* depth, uint16_t* bits,
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
   size_t count = 0;
   size_t symbols[4] = { 0 };
   size_t length = 0;
@@ -366,29 +431,41 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
   }
 
   if (count <= 1) {
-    WriteBits(4, 1, storage_ix, storage);
-    WriteBits(max_bits, symbols[0], storage_ix, storage);
+    BrotliWriteBits(4, 1, storage_ix, storage);
+    BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+    depth[symbols[0]] = 0;
+    bits[symbols[0]] = 0;
     return;
   }
 
-  const size_t max_tree_size = 2 * length + 1;
-  HuffmanTree* const tree =
-      static_cast<HuffmanTree*>(malloc(max_tree_size * sizeof(HuffmanTree)));
-  for (uint32_t count_limit = 1; ; count_limit *= 2) {
-    HuffmanTree* node = tree;
-    for (size_t i = length; i != 0;) {
-      --i;
-      if (histogram[i]) {
-        if (PREDICT_TRUE(histogram[i] >= count_limit)) {
-          *node = HuffmanTree(histogram[i], -1, static_cast<int16_t>(i));
-        } else {
-          *node = HuffmanTree(count_limit, -1, static_cast<int16_t>(i));
+  memset(depth, 0, length * sizeof(depth[0]));
+  {
+    const size_t max_tree_size = 2 * length + 1;
+    HuffmanTree* tree = BROTLI_ALLOC(m, HuffmanTree, max_tree_size);
+    uint32_t count_limit;
+    if (BROTLI_IS_OOM(m)) return;
+    for (count_limit = 1; ; count_limit *= 2) {
+      HuffmanTree* node = tree;
+      size_t l;
+      for (l = length; l != 0;) {
+        --l;
+        if (histogram[l]) {
+          if (PREDICT_TRUE(histogram[l] >= count_limit)) {
+            InitHuffmanTree(node, histogram[l], -1, (int16_t)l);
+          } else {
+            InitHuffmanTree(node, count_limit, -1, (int16_t)l);
+          }
+          ++node;
         }
-        ++node;
       }
-    }
-    const int n = static_cast<int>(node - tree);
-    std::sort(tree, node, SortHuffmanTree);
+      {
+        const int n = (int)(node - tree);
+        HuffmanTree sentinel;
+        int i = 0;      /* Points to the next leaf node. */
+        int j = n + 1;  /* Points to the next non-leaf node. */
+        int k;
+
+        SortHuffmanTreeItems(tree, (size_t)n, SortHuffmanTree);
         /* The nodes are:
            [0, n): the sorted leaf nodes that we start with.
            [n]: we add a sentinel here.
@@ -396,106 +473,109 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
                         (n+1). These are naturally in ascending order.
            [2n]: we add a sentinel at the end as well.
            There will be (2n+1) elements at the end. */
-    const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
-    *node++ = sentinel;
-    *node++ = sentinel;
-
-    int i = 0;      // Points to the next leaf node.
-    int j = n + 1;  // Points to the next non-leaf node.
-    for (int k = n - 1; k > 0; --k) {
-      int left, right;
-      if (tree[i].total_count_ <= tree[j].total_count_) {
-        left = i;
-        ++i;
-      } else {
-        left = j;
-        ++j;
-      }
-      if (tree[i].total_count_ <= tree[j].total_count_) {
-        right = i;
-        ++i;
-      } else {
-        right = j;
-        ++j;
-      }
+        InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
+        *node++ = sentinel;
+        *node++ = sentinel;
+
+        for (k = n - 1; k > 0; --k) {
+          int left, right;
+          if (tree[i].total_count_ <= tree[j].total_count_) {
+            left = i;
+            ++i;
+          } else {
+            left = j;
+            ++j;
+          }
+          if (tree[i].total_count_ <= tree[j].total_count_) {
+            right = i;
+            ++i;
+          } else {
+            right = j;
+            ++j;
+          }
           /* The sentinel node becomes the parent node. */
-      node[-1].total_count_ =
-          tree[left].total_count_ + tree[right].total_count_;
-      node[-1].index_left_ = static_cast<int16_t>(left);
-      node[-1].index_right_or_value_ = static_cast<int16_t>(right);
+          node[-1].total_count_ =
+              tree[left].total_count_ + tree[right].total_count_;
+          node[-1].index_left_ = (int16_t)left;
+          node[-1].index_right_or_value_ = (int16_t)right;
           /* Add back the last sentinel node. */
-      *node++ = sentinel;
-    }
-    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+          *node++ = sentinel;
+        }
+        if (BrotliSetDepth(2 * n - 1, tree, depth, 14)) {
           /* We need to pack the Huffman tree in 14 bits. If this was not
              successful, add fake entities to the lowest values and retry. */
-    if (PREDICT_TRUE(*std::max_element(&depth[0], &depth[length]) <= 14)) {
-      break;
+          break;
+        }
+      }
     }
+    BROTLI_FREE(m, tree);
   }
-  free(tree);
-  ConvertBitDepthsToSymbols(depth, length, bits);
+  BrotliConvertBitDepthsToSymbols(depth, length, bits);
   if (count <= 4) {
+    size_t i;
     /* value of 1 indicates a simple Huffman code */
-    WriteBits(2, 1, storage_ix, storage);
-    WriteBits(2, count - 1, storage_ix, storage);  /* NSYM - 1 */
+    BrotliWriteBits(2, 1, storage_ix, storage);
+    BrotliWriteBits(2, count - 1, storage_ix, storage);  /* NSYM - 1 */
 
     /* Sort */
-    for (size_t i = 0; i < count; i++) {
-      for (size_t j = i + 1; j < count; j++) {
+    for (i = 0; i < count; i++) {
+      size_t j;
+      for (j = i + 1; j < count; j++) {
         if (depth[symbols[j]] < depth[symbols[i]]) {
-          std::swap(symbols[j], symbols[i]);
+          BROTLI_SWAP(size_t, symbols, j, i);
         }
       }
     }
 
     if (count == 2) {
-      WriteBits(max_bits, symbols[0], storage_ix, storage);
-      WriteBits(max_bits, symbols[1], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
     } else if (count == 3) {
-      WriteBits(max_bits, symbols[0], storage_ix, storage);
-      WriteBits(max_bits, symbols[1], storage_ix, storage);
-      WriteBits(max_bits, symbols[2], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
     } else {
-      WriteBits(max_bits, symbols[0], storage_ix, storage);
-      WriteBits(max_bits, symbols[1], storage_ix, storage);
-      WriteBits(max_bits, symbols[2], storage_ix, storage);
-      WriteBits(max_bits, symbols[3], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[0], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[1], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[2], storage_ix, storage);
+      BrotliWriteBits(max_bits, symbols[3], storage_ix, storage);
       /* tree-select */
-      WriteBits(1, depth[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
+      BrotliWriteBits(1, depth[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
     }
   } else {
+    uint8_t previous_value = 8;
+    size_t i;
     /* Complex Huffman Tree */
     StoreStaticCodeLengthCode(storage_ix, storage);
 
     /* Actual rle coding. */
-    uint8_t previous_value = 8;
-    for (size_t i = 0; i < length;) {
+    for (i = 0; i < length;) {
       const uint8_t value = depth[i];
       size_t reps = 1;
-      for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
+      size_t k;
+      for (k = i + 1; k < length && depth[k] == value; ++k) {
         ++reps;
       }
       i += reps;
       if (value == 0) {
-        WriteBits(kZeroRepsDepth[reps], kZeroRepsBits[reps],
-                  storage_ix, storage);
+        BrotliWriteBits(kZeroRepsDepth[reps], kZeroRepsBits[reps],
+                        storage_ix, storage);
       } else {
         if (previous_value != value) {
-          WriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
-                    storage_ix, storage);
+          BrotliWriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
+                          storage_ix, storage);
           --reps;
         }
         if (reps < 3) {
           while (reps != 0) {
             reps--;
-            WriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
-                      storage_ix, storage);
+            BrotliWriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
+                            storage_ix, storage);
           }
         } else {
           reps -= 3;
-          WriteBits(kNonZeroRepsDepth[reps], kNonZeroRepsBits[reps],
-                    storage_ix, storage);
+          BrotliWriteBits(kNonZeroRepsDepth[reps], kNonZeroRepsBits[reps],
+                          storage_ix, storage);
         }
         previous_value = value;
       }
@@ -513,30 +593,38 @@ static size_t IndexOf(const uint8_t* v, size_t v_size, uint8_t value) {
 
 static void MoveToFront(uint8_t* v, size_t index) {
   uint8_t value = v[index];
-  for (size_t i = index; i != 0; --i) {
+  size_t i;
+  for (i = index; i != 0; --i) {
     v[i] = v[i - 1];
   }
   v[0] = value;
 }
 
-static void MoveToFrontTransform(const uint32_t* __restrict v_in,
+static void MoveToFrontTransform(const uint32_t* BROTLI_RESTRICT v_in,
                                  const size_t v_size,
                                  uint32_t* v_out) {
+  size_t i;
+  uint8_t mtf[256];
+  uint32_t max_value;
   if (v_size == 0) {
     return;
   }
-  uint32_t max_value = *std::max_element(v_in, v_in + v_size);
+  max_value = v_in[0];
+  for (i = 1; i < v_size; ++i) {
+    if (v_in[i] > max_value) max_value = v_in[i];
+  }
   assert(max_value < 256u);
-  uint8_t mtf[256];
-  size_t mtf_size = max_value + 1;
-  for (uint32_t i = 0; i <= max_value; ++i) {
-    mtf[i] = static_cast<uint8_t>(i);
+  for (i = 0; i <= max_value; ++i) {
+    mtf[i] = (uint8_t)i;
   }
-  for (size_t i = 0; i < v_size; ++i) {
-    size_t index = IndexOf(mtf, mtf_size, static_cast<uint8_t>(v_in[i]));
-    assert(index < mtf_size);
-    v_out[i] = static_cast<uint32_t>(index);
-    MoveToFront(mtf, index);
+  {
+    size_t mtf_size = max_value + 1;
+    for (i = 0; i < v_size; ++i) {
+      size_t index = IndexOf(mtf, mtf_size, (uint8_t)v_in[i]);
+      assert(index < mtf_size);
+      v_out[i] = (uint32_t)index;
+      MoveToFront(mtf, index);
+    }
   }
 }
 
@@ -547,23 +635,24 @@ static void MoveToFrontTransform(const uint32_t* __restrict v_in,
    value of *max_run_length_prefix. The prefix code of run length L is simply
    Log2Floor(L) and the number of extra bits is the same as the prefix code. */
 static void RunLengthCodeZeros(const size_t in_size,
-                               uint32_t* __restrict v,
-                               size_t* __restrict out_size,
-                               uint32_t* __restrict max_run_length_prefix) {
+    uint32_t* BROTLI_RESTRICT v, size_t* BROTLI_RESTRICT out_size,
+    uint32_t* BROTLI_RESTRICT max_run_length_prefix) {
   uint32_t max_reps = 0;
-  for (size_t i = 0; i < in_size;) {
-    for (; i < in_size && v[i] != 0; ++i) ;
+  size_t i;
+  uint32_t max_prefix;
+  for (i = 0; i < in_size;) {
     uint32_t reps = 0;
+    for (; i < in_size && v[i] != 0; ++i) ;
     for (; i < in_size && v[i] == 0; ++i) {
       ++reps;
     }
-    max_reps = std::max(reps, max_reps);
+    max_reps = BROTLI_MAX(uint32_t, reps, max_reps);
   }
-  uint32_t max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
-  max_prefix = std::min(max_prefix, *max_run_length_prefix);
+  max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
+  max_prefix = BROTLI_MIN(uint32_t, max_prefix, *max_run_length_prefix);
   *max_run_length_prefix = max_prefix;
   *out_size = 0;
-  for (size_t i = 0; i < in_size;) {
+  for (i = 0; i < in_size;) {
     assert(*out_size <= i);
     if (v[i] != 0) {
       v[*out_size] = v[i] + *max_run_length_prefix;
@@ -571,7 +660,8 @@ static void RunLengthCodeZeros(const size_t in_size,
       ++(*out_size);
     } else {
       uint32_t reps = 1;
-      for (size_t k = i + 1; k < in_size && v[k] == 0; ++k) {
+      size_t k;
+      for (k = i + 1; k < in_size && v[k] == 0; ++k) {
         ++reps;
       }
       i += reps;
@@ -593,368 +683,391 @@ static void RunLengthCodeZeros(const size_t in_size,
   }
 }
 
-void EncodeContextMap(const std::vector<uint32_t>& context_map,
-                      size_t num_clusters,
-                      HuffmanTree* tree,
-                      size_t* storage_ix, uint8_t* storage) {
+#define SYMBOL_BITS 9
+
+static void EncodeContextMap(MemoryManager* m,
+                             const uint32_t* context_map,
+                             size_t context_map_size,
+                             size_t num_clusters,
+                             HuffmanTree* tree,
+                             size_t* storage_ix, uint8_t* storage) {
+  size_t i;
+  uint32_t* rle_symbols;
+  uint32_t max_run_length_prefix = 6;
+  size_t num_rle_symbols = 0;
+  uint32_t histogram[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  static const uint32_t kSymbolMask = (1u << SYMBOL_BITS) - 1u;
+  uint8_t depths[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  uint16_t bits[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+
   StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
 
   if (num_clusters == 1) {
     return;
   }
 
-  uint32_t* rle_symbols = new uint32_t[context_map.size()];
-  MoveToFrontTransform(&context_map[0], context_map.size(), rle_symbols);
-  uint32_t max_run_length_prefix = 6;
-  size_t num_rle_symbols = 0;
-  RunLengthCodeZeros(context_map.size(), rle_symbols,
+  rle_symbols = BROTLI_ALLOC(m, uint32_t, context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+  MoveToFrontTransform(context_map, context_map_size, rle_symbols);
+  RunLengthCodeZeros(context_map_size, rle_symbols,
                      &num_rle_symbols, &max_run_length_prefix);
-  uint32_t histogram[kContextMapAlphabetSize];
   memset(histogram, 0, sizeof(histogram));
-  static const int kSymbolBits = 9;
-  static const uint32_t kSymbolMask = (1u << kSymbolBits) - 1u;
-  for (size_t i = 0; i < num_rle_symbols; ++i) {
+  for (i = 0; i < num_rle_symbols; ++i) {
     ++histogram[rle_symbols[i] & kSymbolMask];
   }
-  bool use_rle = max_run_length_prefix > 0;
-  WriteBits(1, use_rle, storage_ix, storage);
-  if (use_rle) {
-    WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+  {
+    int use_rle = (max_run_length_prefix > 0) ? 1 : 0;
+    BrotliWriteBits(1, (uint64_t)use_rle, storage_ix, storage);
+    if (use_rle) {
+      BrotliWriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+    }
   }
-  uint8_t depths[kContextMapAlphabetSize];
-  uint16_t bits[kContextMapAlphabetSize];
-  memset(depths, 0, sizeof(depths));
-  memset(bits, 0, sizeof(bits));
   BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
                            tree, depths, bits, storage_ix, storage);
-  for (size_t i = 0; i < num_rle_symbols; ++i) {
+  for (i = 0; i < num_rle_symbols; ++i) {
     const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
-    const uint32_t extra_bits_val = rle_symbols[i] >> kSymbolBits;
-    WriteBits(depths[rle_symbol], bits[rle_symbol], storage_ix, storage);
+    const uint32_t extra_bits_val = rle_symbols[i] >> SYMBOL_BITS;
+    BrotliWriteBits(depths[rle_symbol], bits[rle_symbol], storage_ix, storage);
     if (rle_symbol > 0 && rle_symbol <= max_run_length_prefix) {
-      WriteBits(rle_symbol, extra_bits_val, storage_ix, storage);
+      BrotliWriteBits(rle_symbol, extra_bits_val, storage_ix, storage);
     }
   }
-  WriteBits(1, 1, storage_ix, storage);  // use move-to-front
-  delete[] rle_symbols;
+  BrotliWriteBits(1, 1, storage_ix, storage);  /* use move-to-front */
+  BROTLI_FREE(m, rle_symbols);
 }
 
 /* Stores the block switch command with index block_ix to the bit stream. */
-void StoreBlockSwitch(const BlockSplitCode& code,
-                      const size_t block_ix,
-                      size_t* storage_ix,
-                      uint8_t* storage) {
-  if (block_ix > 0) {
-    size_t typecode = code.type_code[block_ix];
-    WriteBits(code.type_depths[typecode], code.type_bits[typecode],
-              storage_ix, storage);
+static BROTLI_INLINE void StoreBlockSwitch(BlockSplitCode* code,
+                                           const uint32_t block_len,
+                                           const uint8_t block_type,
+                                           int is_first_block,
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  size_t typecode = NextBlockTypeCode(&code->type_code_calculator, block_type);
+  size_t lencode;
+  uint32_t len_nextra;
+  uint32_t len_extra;
+  if (!is_first_block) {
+    BrotliWriteBits(code->type_depths[typecode], code->type_bits[typecode],
+                    storage_ix, storage);
   }
-  size_t lencode = code.length_prefix[block_ix];
-  WriteBits(code.length_depths[lencode], code.length_bits[lencode],
-            storage_ix, storage);
-  WriteBits(code.length_nextra[block_ix], code.length_extra[block_ix],
-            storage_ix, storage);
+  GetBlockLengthPrefixCode(block_len, &lencode, &len_nextra, &len_extra);
+
+  BrotliWriteBits(code->length_depths[lencode], code->length_bits[lencode],
+                  storage_ix, storage);
+  BrotliWriteBits(len_nextra, len_extra, storage_ix, storage);
 }
 
 /* Builds a BlockSplitCode data structure from the block split given by the
    vector of block types and block lengths and stores it to the bit stream. */
-static void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
-                                        const std::vector<uint32_t>& lengths,
+static void BuildAndStoreBlockSplitCode(const uint8_t* types,
+                                        const uint32_t* lengths,
+                                        const size_t num_blocks,
                                         const size_t num_types,
                                         HuffmanTree* tree,
                                         BlockSplitCode* code,
                                         size_t* storage_ix,
                                         uint8_t* storage) {
-  const size_t num_blocks = types.size();
-  uint32_t type_histo[kBlockTypeAlphabetSize];
-  uint32_t length_histo[kNumBlockLenPrefixes];
+  uint32_t type_histo[BROTLI_MAX_BLOCK_TYPE_SYMBOLS];
+  uint32_t length_histo[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
+  size_t i;
+  BlockTypeCodeCalculator type_code_calculator;
   memset(type_histo, 0, (num_types + 2) * sizeof(type_histo[0]));
   memset(length_histo, 0, sizeof(length_histo));
-  size_t last_type = 1;
-  size_t second_last_type = 0;
-  code->type_code.resize(num_blocks);
-  code->length_prefix.resize(num_blocks);
-  code->length_nextra.resize(num_blocks);
-  code->length_extra.resize(num_blocks);
-  code->type_depths.resize(num_types + 2);
-  code->type_bits.resize(num_types + 2);
-  memset(code->length_depths, 0, sizeof(code->length_depths));
-  memset(code->length_bits, 0, sizeof(code->length_bits));
-  for (size_t i = 0; i < num_blocks; ++i) {
-    size_t type = types[i];
-    size_t type_code = (type == last_type + 1 ? 1 :
-                     type == second_last_type ? 0 :
-                     type + 2);
-    second_last_type = last_type;
-    last_type = type;
-    code->type_code[i] = static_cast<uint32_t>(type_code);
+  InitBlockTypeCodeCalculator(&type_code_calculator);
+  for (i = 0; i < num_blocks; ++i) {
+    size_t type_code = NextBlockTypeCode(&type_code_calculator, types[i]);
     if (i != 0) ++type_histo[type_code];
-    GetBlockLengthPrefixCode(lengths[i],
-                             &code->length_prefix[i],
-                             &code->length_nextra[i],
-                             &code->length_extra[i]);
-    ++length_histo[code->length_prefix[i]];
+    ++length_histo[BlockLengthPrefixCode(lengths[i])];
   }
   StoreVarLenUint8(num_types - 1, storage_ix, storage);
-  if (num_types > 1) {
+  if (num_types > 1) {  /* TODO: else? could StoreBlockSwitch occur? */
     BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree,
                              &code->type_depths[0], &code->type_bits[0],
                              storage_ix, storage);
-    BuildAndStoreHuffmanTree(&length_histo[0], kNumBlockLenPrefixes, tree,
-                             &code->length_depths[0], &code->length_bits[0],
-                             storage_ix, storage);
-    StoreBlockSwitch(*code, 0, storage_ix, storage);
+    BuildAndStoreHuffmanTree(&length_histo[0], BROTLI_NUM_BLOCK_LEN_SYMBOLS,
+                             tree, &code->length_depths[0],
+                             &code->length_bits[0], storage_ix, storage);
+    StoreBlockSwitch(code, lengths[0], types[0], 1, storage_ix, storage);
   }
 }
 
 /* Stores a context map where the histogram type is always the block type. */
-void StoreTrivialContextMap(size_t num_types,
-                            size_t context_bits,
-                            HuffmanTree* tree,
-                            size_t* storage_ix,
-                            uint8_t* storage) {
+static void StoreTrivialContextMap(size_t num_types,
+                                   size_t context_bits,
+                                   HuffmanTree* tree,
+                                   size_t* storage_ix,
+                                   uint8_t* storage) {
   StoreVarLenUint8(num_types - 1, storage_ix, storage);
   if (num_types > 1) {
     size_t repeat_code = context_bits - 1u;
     size_t repeat_bits = (1u << repeat_code) - 1u;
     size_t alphabet_size = num_types + repeat_code;
-    uint32_t histogram[kContextMapAlphabetSize];
-    uint8_t depths[kContextMapAlphabetSize];
-    uint16_t bits[kContextMapAlphabetSize];
+    uint32_t histogram[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+    uint8_t depths[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+    uint16_t bits[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+    size_t i;
     memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
-    memset(depths, 0, alphabet_size * sizeof(depths[0]));
-    memset(bits, 0, alphabet_size * sizeof(bits[0]));
     /* Write RLEMAX. */
-    WriteBits(1, 1, storage_ix, storage);
-    WriteBits(4, repeat_code - 1, storage_ix, storage);
-    histogram[repeat_code] = static_cast<uint32_t>(num_types);
+    BrotliWriteBits(1, 1, storage_ix, storage);
+    BrotliWriteBits(4, repeat_code - 1, storage_ix, storage);
+    histogram[repeat_code] = (uint32_t)num_types;
     histogram[0] = 1;
-    for (size_t i = context_bits; i < alphabet_size; ++i) {
+    for (i = context_bits; i < alphabet_size; ++i) {
       histogram[i] = 1;
     }
-    BuildAndStoreHuffmanTree(&histogram[0], alphabet_size, tree,
-                             &depths[0], &bits[0],
-                             storage_ix, storage);
-    for (size_t i = 0; i < num_types; ++i) {
+    BuildAndStoreHuffmanTree(histogram, alphabet_size, tree,
+                             depths, bits, storage_ix, storage);
+    for (i = 0; i < num_types; ++i) {
       size_t code = (i == 0 ? 0 : i + context_bits - 1);
-      WriteBits(depths[code], bits[code], storage_ix, storage);
-      WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage);
-      WriteBits(repeat_code, repeat_bits, storage_ix, storage);
+      BrotliWriteBits(depths[code], bits[code], storage_ix, storage);
+      BrotliWriteBits(
+          depths[repeat_code], bits[repeat_code], storage_ix, storage);
+      BrotliWriteBits(repeat_code, repeat_bits, storage_ix, storage);
     }
     /* Write IMTF (inverse-move-to-front) bit. */
-    WriteBits(1, 1, storage_ix, storage);
+    BrotliWriteBits(1, 1, storage_ix, storage);
   }
 }
 
 /* Manages the encoding of one block category (literal, command or distance). */
-class BlockEncoder {
- public:
-  BlockEncoder(size_t alphabet_size,
-               size_t num_block_types,
-               const std::vector<uint8_t>& block_types,
-               const std::vector<uint32_t>& block_lengths)
-      : alphabet_size_(alphabet_size),
-        num_block_types_(num_block_types),
-        block_types_(block_types),
-        block_lengths_(block_lengths),
-        block_ix_(0),
-        block_len_(block_lengths.empty() ? 0 : block_lengths[0]),
-        entropy_ix_(0) {}
+typedef struct BlockEncoder {
+  size_t alphabet_size_;
+  size_t num_block_types_;
+  const uint8_t* block_types_;  /* Not owned. */
+  const uint32_t* block_lengths_;  /* Not owned. */
+  size_t num_blocks_;
+  BlockSplitCode block_split_code_;
+  size_t block_ix_;
+  size_t block_len_;
+  size_t entropy_ix_;
+  uint8_t* depths_;
+  uint16_t* bits_;
+} BlockEncoder;
+
+static void InitBlockEncoder(BlockEncoder* self, size_t alphabet_size,
+    size_t num_block_types, const uint8_t* block_types,
+    const uint32_t* block_lengths, const size_t num_blocks) {
+  self->alphabet_size_ = alphabet_size;
+  self->num_block_types_ = num_block_types;
+  self->block_types_ = block_types;
+  self->block_lengths_ = block_lengths;
+  self->num_blocks_ = num_blocks;
+  InitBlockTypeCodeCalculator(&self->block_split_code_.type_code_calculator);
+  self->block_ix_ = 0;
+  self->block_len_ = num_blocks == 0 ? 0 : block_lengths[0];
+  self->entropy_ix_ = 0;
+  self->depths_ = 0;
+  self->bits_ = 0;
+}
+
+static void CleanupBlockEncoder(MemoryManager* m, BlockEncoder* self) {
+  BROTLI_FREE(m, self->depths_);
+  BROTLI_FREE(m, self->bits_);
+}
 
 /* Creates entropy codes of block lengths and block types and stores them
    to the bit stream. */
-  void BuildAndStoreBlockSwitchEntropyCodes(HuffmanTree* tree,
-                                            size_t* storage_ix,
-                                            uint8_t* storage) {
-    BuildAndStoreBlockSplitCode(
-        block_types_, block_lengths_, num_block_types_,
-        tree, &block_split_code_, storage_ix, storage);
-  }
-
-  // Creates entropy codes for all block types and stores them to the bit
-  // stream.
-  template<int kSize>
-  void BuildAndStoreEntropyCodes(
-      const std::vector<Histogram<kSize> >& histograms,
-      HuffmanTree* tree,
-      size_t* storage_ix, uint8_t* storage) {
-    depths_.resize(histograms.size() * alphabet_size_);
-    bits_.resize(histograms.size() * alphabet_size_);
-    for (size_t i = 0; i < histograms.size(); ++i) {
-      size_t ix = i * alphabet_size_;
-      BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size_,
-                               tree,
-                               &depths_[ix], &bits_[ix],
-                               storage_ix, storage);
-    }
-  }
+static void BuildAndStoreBlockSwitchEntropyCodes(BlockEncoder* self,
+    HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) {
+  BuildAndStoreBlockSplitCode(self->block_types_, self->block_lengths_,
+      self->num_blocks_, self->num_block_types_, tree, &self->block_split_code_,
+      storage_ix, storage);
+}
 
 /* Stores the next symbol with the entropy code of the current block type.
    Updates the block type and block length at block boundaries. */
-  void StoreSymbol(size_t symbol, size_t* storage_ix, uint8_t* storage) {
-    if (block_len_ == 0) {
-      ++block_ix_;
-      block_len_ = block_lengths_[block_ix_];
-      entropy_ix_ = block_types_[block_ix_] * alphabet_size_;
-      StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
-    }
-    --block_len_;
-    size_t ix = entropy_ix_ + symbol;
-    WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
+static void StoreSymbol(BlockEncoder* self, size_t symbol, size_t* storage_ix,
+    uint8_t* storage) {
+  if (self->block_len_ == 0) {
+    size_t block_ix = ++self->block_ix_;
+    uint32_t block_len = self->block_lengths_[block_ix];
+    uint8_t block_type = self->block_types_[block_ix];
+    self->block_len_ = block_len;
+    self->entropy_ix_ = block_type * self->alphabet_size_;
+    StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0,
+        storage_ix, storage);
   }
+  --self->block_len_;
+  {
+    size_t ix = self->entropy_ix_ + symbol;
+    BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage);
+  }
+}
 
 /* Stores the next symbol with the entropy code of the current block type and
    context value.
    Updates the block type and block length at block boundaries. */
-  template<int kContextBits>
-  void StoreSymbolWithContext(size_t symbol, size_t context,
-                              const std::vector<uint32_t>& context_map,
-                              size_t* storage_ix, uint8_t* storage) {
-    if (block_len_ == 0) {
-      ++block_ix_;
-      block_len_ = block_lengths_[block_ix_];
-      size_t block_type = block_types_[block_ix_];
-      entropy_ix_ = block_type << kContextBits;
-      StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
-    }
-    --block_len_;
-    size_t histo_ix = context_map[entropy_ix_ + context];
-    size_t ix = histo_ix * alphabet_size_ + symbol;
-    WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
+static void StoreSymbolWithContext(BlockEncoder* self, size_t symbol,
+    size_t context, const uint32_t* context_map, size_t* storage_ix,
+    uint8_t* storage, const size_t context_bits) {
+  if (self->block_len_ == 0) {
+    size_t block_ix = ++self->block_ix_;
+    uint32_t block_len = self->block_lengths_[block_ix];
+    uint8_t block_type = self->block_types_[block_ix];
+    self->block_len_ = block_len;
+    self->entropy_ix_ = (size_t)block_type << context_bits;
+    StoreBlockSwitch(&self->block_split_code_, block_len, block_type, 0,
+        storage_ix, storage);
+  }
+  --self->block_len_;
+  {
+    size_t histo_ix = context_map[self->entropy_ix_ + context];
+    size_t ix = histo_ix * self->alphabet_size_ + symbol;
+    BrotliWriteBits(self->depths_[ix], self->bits_[ix], storage_ix, storage);
   }
+}
 
- private:
-  const size_t alphabet_size_;
-  const size_t num_block_types_;
-  const std::vector<uint8_t>& block_types_;
-  const std::vector<uint32_t>& block_lengths_;
-  BlockSplitCode block_split_code_;
-  size_t block_ix_;
-  size_t block_len_;
-  size_t entropy_ix_;
-  std::vector<uint8_t> depths_;
-  std::vector<uint16_t> bits_;
-};
+#define FN(X) X ## Literal
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_encoder_inc.h"
+#undef FN
+
+#define FN(X) X ## Command
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_encoder_inc.h"
+#undef FN
+
+#define FN(X) X ## Distance
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_encoder_inc.h"
+#undef FN
 
 static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) {
   *storage_ix = (*storage_ix + 7u) & ~7u;
   storage[*storage_ix >> 3] = 0;
 }
 
-void StoreMetaBlock(const uint8_t* input,
-                    size_t start_pos,
-                    size_t length,
-                    size_t mask,
-                    uint8_t prev_byte,
-                    uint8_t prev_byte2,
-                    bool is_last,
-                    uint32_t num_direct_distance_codes,
-                    uint32_t distance_postfix_bits,
-                    ContextType literal_context_mode,
-                    const brotli::Command *commands,
-                    size_t n_commands,
-                    const MetaBlockSplit& mb,
-                    size_t *storage_ix,
-                    uint8_t *storage) {
-  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
-
+void BrotliStoreMetaBlock(MemoryManager* m,
+                          const uint8_t* input,
+                          size_t start_pos,
+                          size_t length,
+                          size_t mask,
+                          uint8_t prev_byte,
+                          uint8_t prev_byte2,
+                          int is_last,
+                          uint32_t num_direct_distance_codes,
+                          uint32_t distance_postfix_bits,
+                          ContextType literal_context_mode,
+                          const Command *commands,
+                          size_t n_commands,
+                          const MetaBlockSplit* mb,
+                          size_t *storage_ix,
+                          uint8_t *storage) {
+  size_t pos = start_pos;
+  size_t i;
   size_t num_distance_codes =
-      kNumDistanceShortCodes + num_direct_distance_codes +
+      BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_distance_codes +
       (48u << distance_postfix_bits);
+  HuffmanTree* tree;
+  BlockEncoder literal_enc;
+  BlockEncoder command_enc;
+  BlockEncoder distance_enc;
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
 
-  HuffmanTree* tree = static_cast<HuffmanTree*>(
-      malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
-  BlockEncoder literal_enc(256,
-                           mb.literal_split.num_types,
-                           mb.literal_split.types,
-                           mb.literal_split.lengths);
-  BlockEncoder command_enc(kNumCommandPrefixes,
-                           mb.command_split.num_types,
-                           mb.command_split.types,
-                           mb.command_split.lengths);
-  BlockEncoder distance_enc(num_distance_codes,
-                            mb.distance_split.num_types,
-                            mb.distance_split.types,
-                            mb.distance_split.lengths);
-
-  literal_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
-  command_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
-  distance_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
-
-  WriteBits(2, distance_postfix_bits, storage_ix, storage);
-  WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
-            storage_ix, storage);
-  for (size_t i = 0; i < mb.literal_split.num_types; ++i) {
-    WriteBits(2, literal_context_mode, storage_ix, storage);
+  tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockEncoder(&literal_enc, 256, mb->literal_split.num_types,
+      mb->literal_split.types, mb->literal_split.lengths,
+      mb->literal_split.num_blocks);
+  InitBlockEncoder(&command_enc, BROTLI_NUM_COMMAND_SYMBOLS,
+      mb->command_split.num_types, mb->command_split.types,
+      mb->command_split.lengths, mb->command_split.num_blocks);
+  InitBlockEncoder(&distance_enc, num_distance_codes,
+      mb->distance_split.num_types, mb->distance_split.types,
+      mb->distance_split.lengths, mb->distance_split.num_blocks);
+
+  BuildAndStoreBlockSwitchEntropyCodes(&literal_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(&command_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(
+      &distance_enc, tree, storage_ix, storage);
+
+  BrotliWriteBits(2, distance_postfix_bits, storage_ix, storage);
+  BrotliWriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
+                  storage_ix, storage);
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    BrotliWriteBits(2, literal_context_mode, storage_ix, storage);
   }
 
-  size_t num_literal_histograms = mb.literal_histograms.size();
-  if (mb.literal_context_map.empty()) {
-    StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits, tree,
-                           storage_ix, storage);
+  if (mb->literal_context_map_size == 0) {
+    StoreTrivialContextMap(mb->literal_histograms_size,
+        BROTLI_LITERAL_CONTEXT_BITS, tree, storage_ix, storage);
   } else {
-    EncodeContextMap(mb.literal_context_map, num_literal_histograms, tree,
-                     storage_ix, storage);
+    EncodeContextMap(m,
+        mb->literal_context_map, mb->literal_context_map_size,
+        mb->literal_histograms_size, tree, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
   }
 
-  size_t num_dist_histograms = mb.distance_histograms.size();
-  if (mb.distance_context_map.empty()) {
-    StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits, tree,
-                           storage_ix, storage);
+  if (mb->distance_context_map_size == 0) {
+    StoreTrivialContextMap(mb->distance_histograms_size,
+        BROTLI_DISTANCE_CONTEXT_BITS, tree, storage_ix, storage);
   } else {
-    EncodeContextMap(mb.distance_context_map, num_dist_histograms, tree,
-                     storage_ix, storage);
+    EncodeContextMap(m,
+        mb->distance_context_map, mb->distance_context_map_size,
+        mb->distance_histograms_size, tree, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
   }
 
-  literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms, tree,
-                                        storage_ix, storage);
-  command_enc.BuildAndStoreEntropyCodes(mb.command_histograms, tree,
-                                        storage_ix, storage);
-  distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms, tree,
-                                         storage_ix, storage);
-  free(tree);
-
-  size_t pos = start_pos;
-  for (size_t i = 0; i < n_commands; ++i) {
+  BuildAndStoreEntropyCodesLiteral(m, &literal_enc, mb->literal_histograms,
+      mb->literal_histograms_size, tree, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BuildAndStoreEntropyCodesCommand(m, &command_enc, mb->command_histograms,
+      mb->command_histograms_size, tree, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BuildAndStoreEntropyCodesDistance(m, &distance_enc, mb->distance_histograms,
+      mb->distance_histograms_size, tree, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, tree);
+
+  for (i = 0; i < n_commands; ++i) {
     const Command cmd = commands[i];
     size_t cmd_code = cmd.cmd_prefix_;
-    command_enc.StoreSymbol(cmd_code, storage_ix, storage);
-    StoreCommandExtra(cmd, storage_ix, storage);
-    if (mb.literal_context_map.empty()) {
-      for (size_t j = cmd.insert_len_; j != 0; --j) {
-        literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
+    StoreSymbol(&command_enc, cmd_code, storage_ix, storage);
+    StoreCommandExtra(&cmd, storage_ix, storage);
+    if (mb->literal_context_map_size == 0) {
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
+        StoreSymbol(&literal_enc, input[pos & mask], storage_ix, storage);
         ++pos;
       }
     } else {
-      for (size_t j = cmd.insert_len_; j != 0; --j) {
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
         size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
         uint8_t literal = input[pos & mask];
-        literal_enc.StoreSymbolWithContext<kLiteralContextBits>(
-            literal, context, mb.literal_context_map, storage_ix, storage);
+        StoreSymbolWithContext(&literal_enc, literal, context,
+            mb->literal_context_map, storage_ix, storage,
+            BROTLI_LITERAL_CONTEXT_BITS);
         prev_byte2 = prev_byte;
         prev_byte = literal;
         ++pos;
       }
     }
-    pos += cmd.copy_len();
-    if (cmd.copy_len()) {
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd)) {
       prev_byte2 = input[(pos - 2) & mask];
       prev_byte = input[(pos - 1) & mask];
       if (cmd.cmd_prefix_ >= 128) {
         size_t dist_code = cmd.dist_prefix_;
         uint32_t distnumextra = cmd.dist_extra_ >> 24;
         uint64_t distextra = cmd.dist_extra_ & 0xffffff;
-        if (mb.distance_context_map.empty()) {
-          distance_enc.StoreSymbol(dist_code, storage_ix, storage);
+        if (mb->distance_context_map_size == 0) {
+          StoreSymbol(&distance_enc, dist_code, storage_ix, storage);
         } else {
-          size_t context = cmd.DistanceContext();
-          distance_enc.StoreSymbolWithContext<kDistanceContextBits>(
-              dist_code, context, mb.distance_context_map, storage_ix, storage);
+          size_t context = CommandDistanceContext(&cmd);
+          StoreSymbolWithContext(&distance_enc, dist_code, context,
+              mb->distance_context_map, storage_ix, storage,
+              BROTLI_DISTANCE_CONTEXT_BITS);
         }
-        brotli::WriteBits(distnumextra, distextra, storage_ix, storage);
+        BrotliWriteBits(distnumextra, distextra, storage_ix, storage);
       }
     }
   }
+  CleanupBlockEncoder(m, &distance_enc);
+  CleanupBlockEncoder(m, &command_enc);
+  CleanupBlockEncoder(m, &literal_enc);
   if (is_last) {
     JumpToByteBoundary(storage_ix, storage);
   }
@@ -963,22 +1076,24 @@ void StoreMetaBlock(const uint8_t* input,
 static void BuildHistograms(const uint8_t* input,
                             size_t start_pos,
                             size_t mask,
-                            const brotli::Command *commands,
+                            const Command *commands,
                             size_t n_commands,
                             HistogramLiteral* lit_histo,
                             HistogramCommand* cmd_histo,
                             HistogramDistance* dist_histo) {
   size_t pos = start_pos;
-  for (size_t i = 0; i < n_commands; ++i) {
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
     const Command cmd = commands[i];
-    cmd_histo->Add(cmd.cmd_prefix_);
-    for (size_t j = cmd.insert_len_; j != 0; --j) {
-      lit_histo->Add(input[pos & mask]);
+    size_t j;
+    HistogramAddCommand(cmd_histo, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      HistogramAddLiteral(lit_histo, input[pos & mask]);
       ++pos;
     }
-    pos += cmd.copy_len();
-    if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
-      dist_histo->Add(cmd.dist_prefix_);
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
+      HistogramAddDistance(dist_histo, cmd.dist_prefix_);
     }
   }
 }
@@ -986,7 +1101,7 @@ static void BuildHistograms(const uint8_t* input,
 static void StoreDataWithHuffmanCodes(const uint8_t* input,
                                       size_t start_pos,
                                       size_t mask,
-                                      const brotli::Command *commands,
+                                      const Command *commands,
                                       size_t n_commands,
                                       const uint8_t* lit_depth,
                                       const uint16_t* lit_bits,
@@ -997,113 +1112,126 @@ static void StoreDataWithHuffmanCodes(const uint8_t* input,
                                       size_t* storage_ix,
                                       uint8_t* storage) {
   size_t pos = start_pos;
-  for (size_t i = 0; i < n_commands; ++i) {
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
     const Command cmd = commands[i];
     const size_t cmd_code = cmd.cmd_prefix_;
-    WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
-    StoreCommandExtra(cmd, storage_ix, storage);
-    for (size_t j = cmd.insert_len_; j != 0; --j) {
+    size_t j;
+    BrotliWriteBits(
+        cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
+    StoreCommandExtra(&cmd, storage_ix, storage);
+    for (j = cmd.insert_len_; j != 0; --j) {
       const uint8_t literal = input[pos & mask];
-      WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
+      BrotliWriteBits(
+          lit_depth[literal], lit_bits[literal], storage_ix, storage);
       ++pos;
     }
-    pos += cmd.copy_len();
-    if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
       const size_t dist_code = cmd.dist_prefix_;
       const uint32_t distnumextra = cmd.dist_extra_ >> 24;
       const uint32_t distextra = cmd.dist_extra_ & 0xffffff;
-      WriteBits(dist_depth[dist_code], dist_bits[dist_code],
-                storage_ix, storage);
-      WriteBits(distnumextra, distextra, storage_ix, storage);
+      BrotliWriteBits(dist_depth[dist_code], dist_bits[dist_code],
+                      storage_ix, storage);
+      BrotliWriteBits(distnumextra, distextra, storage_ix, storage);
     }
   }
 }
 
-void StoreMetaBlockTrivial(const uint8_t* input,
-                           size_t start_pos,
-                           size_t length,
-                           size_t mask,
-                           bool is_last,
-                           const brotli::Command *commands,
-                           size_t n_commands,
-                           size_t *storage_ix,
-                           uint8_t *storage) {
-  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
-
+void BrotliStoreMetaBlockTrivial(MemoryManager* m,
+                                 const uint8_t* input,
+                                 size_t start_pos,
+                                 size_t length,
+                                 size_t mask,
+                                 int is_last,
+                                 const Command *commands,
+                                 size_t n_commands,
+                                 size_t *storage_ix,
+                                 uint8_t *storage) {
   HistogramLiteral lit_histo;
   HistogramCommand cmd_histo;
   HistogramDistance dist_histo;
+  uint8_t lit_depth[256];
+  uint16_t lit_bits[256];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint8_t dist_depth[64];
+  uint16_t dist_bits[64];
+  HuffmanTree* tree;
+
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
+
+  HistogramClearLiteral(&lit_histo);
+  HistogramClearCommand(&cmd_histo);
+  HistogramClearDistance(&dist_histo);
 
   BuildHistograms(input, start_pos, mask, commands, n_commands,
                   &lit_histo, &cmd_histo, &dist_histo);
 
-  WriteBits(13, 0, storage_ix, storage);
+  BrotliWriteBits(13, 0, storage_ix, storage);
 
-  std::vector<uint8_t> lit_depth(256);
-  std::vector<uint16_t> lit_bits(256);
-  std::vector<uint8_t> cmd_depth(kNumCommandPrefixes);
-  std::vector<uint16_t> cmd_bits(kNumCommandPrefixes);
-  std::vector<uint8_t> dist_depth(64);
-  std::vector<uint16_t> dist_bits(64);
-
-  HuffmanTree* tree = static_cast<HuffmanTree*>(
-      malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
-  BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256, tree,
-                           &lit_depth[0], &lit_bits[0],
+  tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
+  if (BROTLI_IS_OOM(m)) return;
+  BuildAndStoreHuffmanTree(lit_histo.data_, 256, tree,
+                           lit_depth, lit_bits,
                            storage_ix, storage);
-  BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes, tree,
-                           &cmd_depth[0], &cmd_bits[0],
+  BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS, tree,
+                           cmd_depth, cmd_bits,
                            storage_ix, storage);
-  BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64, tree,
-                           &dist_depth[0], &dist_bits[0],
+  BuildAndStoreHuffmanTree(dist_histo.data_, 64, tree,
+                           dist_depth, dist_bits,
                            storage_ix, storage);
-  free(tree);
+  BROTLI_FREE(m, tree);
   StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
-                            n_commands, &lit_depth[0], &lit_bits[0],
-                            &cmd_depth[0], &cmd_bits[0],
-                            &dist_depth[0], &dist_bits[0],
+                            n_commands, lit_depth, lit_bits,
+                            cmd_depth, cmd_bits,
+                            dist_depth, dist_bits,
                             storage_ix, storage);
   if (is_last) {
     JumpToByteBoundary(storage_ix, storage);
   }
 }
 
-void StoreMetaBlockFast(const uint8_t* input,
-                        size_t start_pos,
-                        size_t length,
-                        size_t mask,
-                        bool is_last,
-                        const brotli::Command *commands,
-                        size_t n_commands,
-                        size_t *storage_ix,
-                        uint8_t *storage) {
+void BrotliStoreMetaBlockFast(MemoryManager* m,
+                              const uint8_t* input,
+                              size_t start_pos,
+                              size_t length,
+                              size_t mask,
+                              int is_last,
+                              const Command *commands,
+                              size_t n_commands,
+                              size_t *storage_ix,
+                              uint8_t *storage) {
   StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
 
-  WriteBits(13, 0, storage_ix, storage);
+  BrotliWriteBits(13, 0, storage_ix, storage);
 
   if (n_commands <= 128) {
-    uint32_t histogram[256] = { 0 };
+    uint32_t histogram[BROTLI_NUM_LITERAL_SYMBOLS] = { 0 };
     size_t pos = start_pos;
     size_t num_literals = 0;
-    for (size_t i = 0; i < n_commands; ++i) {
+    size_t i;
+    uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
+    uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
+    for (i = 0; i < n_commands; ++i) {
       const Command cmd = commands[i];
-      for (size_t j = cmd.insert_len_; j != 0; --j) {
+      size_t j;
+      for (j = cmd.insert_len_; j != 0; --j) {
         ++histogram[input[pos & mask]];
         ++pos;
       }
       num_literals += cmd.insert_len_;
-      pos += cmd.copy_len();
+      pos += CommandCopyLen(&cmd);
     }
-    uint8_t lit_depth[256] = { 0 };
-    uint16_t lit_bits[256] = { 0 };
-    BuildAndStoreHuffmanTreeFast(histogram, num_literals,
-                                 /* max_bits = */ 8,
-                                 lit_depth, lit_bits,
-                                 storage_ix, storage);
+    BrotliBuildAndStoreHuffmanTreeFast(m, histogram, num_literals,
+                                       /* max_bits = */ 8,
+                                       lit_depth, lit_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
     StoreStaticCommandHuffmanTree(storage_ix, storage);
     StoreStaticDistanceHuffmanTree(storage_ix, storage);
     StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
-                              n_commands, &lit_depth[0], &lit_bits[0],
+                              n_commands, lit_depth, lit_bits,
                               kStaticCommandCodeDepth,
                               kStaticCommandCodeBits,
                               kStaticDistanceCodeDepth,
@@ -1113,30 +1241,39 @@ void StoreMetaBlockFast(const uint8_t* input,
     HistogramLiteral lit_histo;
     HistogramCommand cmd_histo;
     HistogramDistance dist_histo;
+    uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
+    uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
+    uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+    uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
+    uint8_t dist_depth[64];
+    uint16_t dist_bits[64];
+    HistogramClearLiteral(&lit_histo);
+    HistogramClearCommand(&cmd_histo);
+    HistogramClearDistance(&dist_histo);
     BuildHistograms(input, start_pos, mask, commands, n_commands,
                     &lit_histo, &cmd_histo, &dist_histo);
-    std::vector<uint8_t> lit_depth(256);
-    std::vector<uint16_t> lit_bits(256);
-    std::vector<uint8_t> cmd_depth(kNumCommandPrefixes);
-    std::vector<uint16_t> cmd_bits(kNumCommandPrefixes);
-    std::vector<uint8_t> dist_depth(64);
-    std::vector<uint16_t> dist_bits(64);
-    BuildAndStoreHuffmanTreeFast(&lit_histo.data_[0], lit_histo.total_count_,
-                                 /* max_bits = */ 8,
-                                 &lit_depth[0], &lit_bits[0],
-                                 storage_ix, storage);
-    BuildAndStoreHuffmanTreeFast(&cmd_histo.data_[0], cmd_histo.total_count_,
-                                 /* max_bits = */ 10,
-                                 &cmd_depth[0], &cmd_bits[0],
-                                 storage_ix, storage);
-    BuildAndStoreHuffmanTreeFast(&dist_histo.data_[0], dist_histo.total_count_,
-                                 /* max_bits = */ 6,
-                                 &dist_depth[0], &dist_bits[0],
-                                 storage_ix, storage);
+    BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo.data_,
+                                       lit_histo.total_count_,
+                                       /* max_bits = */ 8,
+                                       lit_depth, lit_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    BrotliBuildAndStoreHuffmanTreeFast(m, cmd_histo.data_,
+                                       cmd_histo.total_count_,
+                                       /* max_bits = */ 10,
+                                       cmd_depth, cmd_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    BrotliBuildAndStoreHuffmanTreeFast(m, dist_histo.data_,
+                                       dist_histo.total_count_,
+                                       /* max_bits = */ 6,
+                                       dist_depth, dist_bits,
+                                       storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
     StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
-                              n_commands, &lit_depth[0], &lit_bits[0],
-                              &cmd_depth[0], &cmd_bits[0],
-                              &dist_depth[0], &dist_bits[0],
+                              n_commands, lit_depth, lit_bits,
+                              cmd_depth, cmd_bits,
+                              dist_depth, dist_bits,
                               storage_ix, storage);
   }
 
@@ -1147,16 +1284,16 @@ void StoreMetaBlockFast(const uint8_t* input,
 
 /* This is for storing uncompressed blocks (simple raw storage of
    bytes-as-bytes). */
-void StoreUncompressedMetaBlock(bool final_block,
-                                const uint8_t * __restrict input,
-                                size_t position, size_t mask,
-                                size_t len,
-                                size_t * __restrict storage_ix,
-                                uint8_t * __restrict storage) {
-  StoreUncompressedMetaBlockHeader(len, storage_ix, storage);
+void BrotliStoreUncompressedMetaBlock(int is_final_block,
+                                      const uint8_t * BROTLI_RESTRICT input,
+                                      size_t position, size_t mask,
+                                      size_t len,
+                                      size_t * BROTLI_RESTRICT storage_ix,
+                                      uint8_t * BROTLI_RESTRICT storage) {
+  size_t masked_pos = position & mask;
+  BrotliStoreUncompressedMetaBlockHeader(len, storage_ix, storage);
   JumpToByteBoundary(storage_ix, storage);
 
-  size_t masked_pos = position & mask;
   if (masked_pos + len > mask + 1) {
     size_t len1 = mask + 1 - masked_pos;
     memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
@@ -1169,26 +1306,29 @@ void StoreUncompressedMetaBlock(bool final_block,
 
   /* We need to clear the next 4 bytes to continue to be
      compatible with BrotliWriteBits. */
-  brotli::WriteBitsPrepareStorage(*storage_ix, storage);
+  BrotliWriteBitsPrepareStorage(*storage_ix, storage);
 
   /* Since the uncompressed block itself may not be the final block, add an
      empty one after this. */
-  if (final_block) {
-    brotli::WriteBits(1, 1, storage_ix, storage);  /* islast */
-    brotli::WriteBits(1, 1, storage_ix, storage);  /* isempty */
+  if (is_final_block) {
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
     JumpToByteBoundary(storage_ix, storage);
   }
 }
 
-void StoreSyncMetaBlock(size_t * __restrict storage_ix,
-                        uint8_t * __restrict storage) {
-  // Empty metadata meta-block bit pattern:
-  //   1 bit:  is_last (0)
-  //   2 bits: num nibbles (3)
-  //   1 bit:  reserved (0)
-  //   2 bits: metadata length bytes (0)
-  WriteBits(6, 6, storage_ix, storage);
+void BrotliStoreSyncMetaBlock(size_t* BROTLI_RESTRICT storage_ix,
+                              uint8_t* BROTLI_RESTRICT storage) {
+  /* Empty metadata meta-block bit pattern:
+       1 bit:  is_last (0)
+       2 bits: num nibbles (3)
+       1 bit:  reserved (0)
+       2 bits: metadata length bytes (0) */
+  BrotliWriteBits(6, 6, storage_ix, storage);
   JumpToByteBoundary(storage_ix, storage);
 }
 
-}  // namespace brotli
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/brotli_bit_stream.h b/enc/brotli_bit_stream.h
index 27a6d07..e85f65d 100644
--- a/enc/brotli_bit_stream.h
+++ b/enc/brotli_bit_stream.h
@@ -16,164 +16,92 @@
 #ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
 #define BROTLI_ENC_BROTLI_BIT_STREAM_H_
 
-#include <vector>
-
 #include "../common/types.h"
+#include "./command.h"
+#include "./context.h"
 #include "./entropy_encode.h"
+#include "./memory.h"
 #include "./metablock.h"
+#include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* All Store functions here will use a storage_ix, which is always the bit
    position for the current storage. */
 
-// Stores a number between 0 and 255.
-void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage);
-
-// Stores the compressed meta-block header.
-// REQUIRES: length > 0
-// REQUIRES: length <= (1 << 24)
-void StoreCompressedMetaBlockHeader(bool final_block,
-                                    size_t length,
-                                    size_t* storage_ix,
-                                    uint8_t* storage);
-
-// Stores the uncompressed meta-block header.
-// REQUIRES: length > 0
-// REQUIRES: length <= (1 << 24)
-void StoreUncompressedMetaBlockHeader(size_t length,
-                                      size_t* storage_ix,
-                                      uint8_t* storage);
-
-// Stores a context map where the histogram type is always the block type.
-void StoreTrivialContextMap(size_t num_types,
-                            size_t context_bits,
-                            HuffmanTree* tree,
-                            size_t* storage_ix,
-                            uint8_t* storage);
-
-void StoreHuffmanTreeOfHuffmanTreeToBitMask(
-    const int num_codes,
-    const uint8_t *code_length_bitdepth,
-    size_t *storage_ix,
-    uint8_t *storage);
-
-void StoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree,
-                      size_t *storage_ix, uint8_t *storage);
-
-// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
-// bits[0:length] and stores the encoded tree to the bit stream.
-void BuildAndStoreHuffmanTree(const uint32_t *histogram,
-                              const size_t length,
-                              HuffmanTree* tree,
-                              uint8_t* depth,
-                              uint16_t* bits,
-                              size_t* storage_ix,
-                              uint8_t* storage);
-
-void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
-                                  const size_t histogram_total,
-                                  const size_t max_bits,
-                                  uint8_t* depth,
-                                  uint16_t* bits,
-                                  size_t* storage_ix,
-                                  uint8_t* storage);
-
-// Encodes the given context map to the bit stream. The number of different
-// histogram ids is given by num_clusters.
-void EncodeContextMap(const std::vector<uint32_t>& context_map,
-                      size_t num_clusters,
-                      HuffmanTree* tree,
-                      size_t* storage_ix, uint8_t* storage);
-
-// Data structure that stores everything that is needed to encode each block
-// switch command.
-struct BlockSplitCode {
-  std::vector<uint32_t> type_code;
-  std::vector<uint32_t> length_prefix;
-  std::vector<uint32_t> length_nextra;
-  std::vector<uint32_t> length_extra;
-  std::vector<uint8_t> type_depths;
-  std::vector<uint16_t> type_bits;
-  uint8_t length_depths[kNumBlockLenPrefixes];
-  uint16_t length_bits[kNumBlockLenPrefixes];
-};
-
-// Builds a BlockSplitCode data structure from the block split given by the
-// vector of block types and block lengths and stores it to the bit stream.
-void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
-                                 const std::vector<uint32_t>& lengths,
-                                 const size_t num_types,
-                                 BlockSplitCode* code,
-                                 size_t* storage_ix,
-                                 uint8_t* storage);
-
-// Stores the block switch command with index block_ix to the bit stream.
-void StoreBlockSwitch(const BlockSplitCode& code,
-                      const size_t block_ix,
-                      size_t* storage_ix,
-                      uint8_t* storage);
+BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
+    HuffmanTree* tree, size_t *storage_ix, uint8_t *storage);
+
+BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
+    MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
+    const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
+    uint8_t* storage);
 
 /* REQUIRES: length > 0 */
 /* REQUIRES: length <= (1 << 24) */
-void StoreMetaBlock(const uint8_t* input,
-                    size_t start_pos,
-                    size_t length,
-                    size_t mask,
-                    uint8_t prev_byte,
-                    uint8_t prev_byte2,
-                    bool final_block,
-                    uint32_t num_direct_distance_codes,
-                    uint32_t distance_postfix_bits,
-                    ContextType literal_context_mode,
-                    const brotli::Command *commands,
-                    size_t n_commands,
-                    const MetaBlockSplit& mb,
-                    size_t *storage_ix,
-                    uint8_t *storage);
+BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
+                                          const uint8_t* input,
+                                          size_t start_pos,
+                                          size_t length,
+                                          size_t mask,
+                                          uint8_t prev_byte,
+                                          uint8_t prev_byte2,
+                                          int is_final_block,
+                                          uint32_t num_direct_distance_codes,
+                                          uint32_t distance_postfix_bits,
+                                          ContextType literal_context_mode,
+                                          const Command* commands,
+                                          size_t n_commands,
+                                          const MetaBlockSplit* mb,
+                                          size_t* storage_ix,
+                                          uint8_t* storage);
 
 /* Stores the meta-block without doing any block splitting, just collects
    one histogram per block category and uses that for entropy coding.
    REQUIRES: length > 0
    REQUIRES: length <= (1 << 24) */
-void StoreMetaBlockTrivial(const uint8_t* input,
-                           size_t start_pos,
-                           size_t length,
-                           size_t mask,
-                           bool is_last,
-                           const brotli::Command *commands,
-                           size_t n_commands,
-                           size_t *storage_ix,
-                           uint8_t *storage);
+BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
+                                                 const uint8_t* input,
+                                                 size_t start_pos,
+                                                 size_t length,
+                                                 size_t mask,
+                                                 int is_last,
+                                                 const Command *commands,
+                                                 size_t n_commands,
+                                                 size_t* storage_ix,
+                                                 uint8_t* storage);
 
 /* Same as above, but uses static prefix codes for histograms with a only a few
    symbols, and uses static code length prefix codes for all other histograms.
    REQUIRES: length > 0
    REQUIRES: length <= (1 << 24) */
-void StoreMetaBlockFast(const uint8_t* input,
-                        size_t start_pos,
-                        size_t length,
-                        size_t mask,
-                        bool is_last,
-                        const brotli::Command *commands,
-                        size_t n_commands,
-                        size_t *storage_ix,
-                        uint8_t *storage);
+BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
+                                              const uint8_t* input,
+                                              size_t start_pos,
+                                              size_t length,
+                                              size_t mask,
+                                              int is_last,
+                                              const Command *commands,
+                                              size_t n_commands,
+                                              size_t* storage_ix,
+                                              uint8_t* storage);
 
 /* This is for storing uncompressed blocks (simple raw storage of
    bytes-as-bytes).
    REQUIRES: length > 0
    REQUIRES: length <= (1 << 24) */
-void StoreUncompressedMetaBlock(bool final_block,
-                                const uint8_t* input,
-                                size_t position, size_t mask,
-                                size_t len,
-                                size_t* storage_ix,
-                                uint8_t* storage);
+BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
+    int is_final_block, const uint8_t* input, size_t position, size_t mask,
+    size_t len, size_t* storage_ix, uint8_t* storage);
 
 /* Stores an empty metadata meta-block and syncs to a byte boundary. */
-void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage);
+BROTLI_INTERNAL void BrotliStoreSyncMetaBlock(size_t* storage_ix,
+                                              uint8_t* storage);
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
diff --git a/enc/cluster.c b/enc/cluster.c
new file mode 100644
index 0000000..d90873c
--- /dev/null
+++ b/enc/cluster.c
@@ -0,0 +1,56 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for clustering similar histograms together. */
+
+#include "./cluster.h"
+
+#include "../common/types.h"
+#include "./bit_cost.h"  /* BrotliPopulationCost */
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE int HistogramPairIsLess(
+    const HistogramPair* p1, const HistogramPair* p2) {
+  if (p1->cost_diff != p2->cost_diff) {
+    return (p1->cost_diff > p2->cost_diff) ? 1 : 0;
+  }
+  return ((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1)) ? 1 : 0;
+}
+
+/* Returns entropy reduction of the context map when we combine two clusters. */
+static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
+  size_t size_c = size_a + size_b;
+  return (double)size_a * FastLog2(size_a) +
+    (double)size_b * FastLog2(size_b) -
+    (double)size_c * FastLog2(size_c);
+}
+
+#define CODE(X) X
+
+#define FN(X) X ## Literal
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#undef CODE
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/cluster.h b/enc/cluster.h
index 166fd36..241e309 100644
--- a/enc/cluster.h
+++ b/enc/cluster.h
@@ -9,323 +9,40 @@
 #ifndef BROTLI_ENC_CLUSTER_H_
 #define BROTLI_ENC_CLUSTER_H_
 
-#include <math.h>
-
-#include <algorithm>
-#include <utility>
-#include <vector>
-
 #include "../common/types.h"
-#include "./bit_cost.h"
-#include "./entropy_encode.h"
-#include "./fast_log.h"
 #include "./histogram.h"
+#include "./memory.h"
 #include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
-struct HistogramPair {
+typedef struct HistogramPair {
   uint32_t idx1;
   uint32_t idx2;
   double cost_combo;
   double cost_diff;
-};
-
-inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) {
-  if (p1.cost_diff != p2.cost_diff) {
-    return p1.cost_diff > p2.cost_diff;
-  }
-  return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1);
-}
-
-// Returns entropy reduction of the context map when we combine two clusters.
-inline double ClusterCostDiff(size_t size_a, size_t size_b) {
-  size_t size_c = size_a + size_b;
-  return static_cast<double>(size_a) * FastLog2(size_a) +
-      static_cast<double>(size_b) * FastLog2(size_b) -
-      static_cast<double>(size_c) * FastLog2(size_c);
-}
-
-// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
-// it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue.
-template<typename HistogramType>
-void CompareAndPushToQueue(const HistogramType* out,
-                           const uint32_t* cluster_size,
-                           uint32_t idx1, uint32_t idx2,
-                           size_t max_num_pairs,
-                           HistogramPair* pairs,
-                           size_t* num_pairs) {
-  if (idx1 == idx2) {
-    return;
-  }
-  if (idx2 < idx1) {
-    uint32_t t = idx2;
-    idx2 = idx1;
-    idx1 = t;
-  }
-  bool store_pair = false;
-  HistogramPair p;
-  p.idx1 = idx1;
-  p.idx2 = idx2;
-  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
-  p.cost_diff -= out[idx1].bit_cost_;
-  p.cost_diff -= out[idx2].bit_cost_;
-
-  if (out[idx1].total_count_ == 0) {
-    p.cost_combo = out[idx2].bit_cost_;
-    store_pair = true;
-  } else if (out[idx2].total_count_ == 0) {
-    p.cost_combo = out[idx1].bit_cost_;
-    store_pair = true;
-  } else {
-    double threshold = *num_pairs == 0 ? 1e99 :
-        std::max(0.0, pairs[0].cost_diff);
-    HistogramType combo = out[idx1];
-    combo.AddHistogram(out[idx2]);
-    double cost_combo = PopulationCost(combo);
-    if (cost_combo < threshold - p.cost_diff) {
-      p.cost_combo = cost_combo;
-      store_pair = true;
-    }
-  }
-  if (store_pair) {
-    p.cost_diff += p.cost_combo;
-    if (*num_pairs > 0 && pairs[0] < p) {
-      // Replace the top of the queue if needed.
-      if (*num_pairs < max_num_pairs) {
-        pairs[*num_pairs] = pairs[0];
-        ++(*num_pairs);
-      }
-      pairs[0] = p;
-    } else if (*num_pairs < max_num_pairs) {
-      pairs[*num_pairs] = p;
-      ++(*num_pairs);
-    }
-  }
-}
-
-template<typename HistogramType>
-size_t HistogramCombine(HistogramType* out,
-                        uint32_t* cluster_size,
-                        uint32_t* symbols,
-                        uint32_t* clusters,
-                        HistogramPair* pairs,
-                        size_t num_clusters,
-                        size_t symbols_size,
-                        size_t max_clusters,
-                        size_t max_num_pairs) {
-  double cost_diff_threshold = 0.0;
-  size_t min_cluster_size = 1;
-
-  // We maintain a vector of histogram pairs, with the property that the pair
-  // with the maximum bit cost reduction is the first.
-  size_t num_pairs = 0;
-  for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) {
-    for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
-      CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
-                            max_num_pairs, &pairs[0], &num_pairs);
-    }
-  }
-
-  while (num_clusters > min_cluster_size) {
-    if (pairs[0].cost_diff >= cost_diff_threshold) {
-      cost_diff_threshold = 1e99;
-      min_cluster_size = max_clusters;
-      continue;
-    }
-    // Take the best pair from the top of heap.
-    uint32_t best_idx1 = pairs[0].idx1;
-    uint32_t best_idx2 = pairs[0].idx2;
-    out[best_idx1].AddHistogram(out[best_idx2]);
-    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
-    cluster_size[best_idx1] += cluster_size[best_idx2];
-    for (size_t i = 0; i < symbols_size; ++i) {
-      if (symbols[i] == best_idx2) {
-        symbols[i] = best_idx1;
-      }
-    }
-    for (size_t i = 0; i < num_clusters; ++i) {
-      if (clusters[i] == best_idx2) {
-        memmove(&clusters[i], &clusters[i + 1],
-                (num_clusters - i - 1) * sizeof(clusters[0]));
-        break;
-      }
-    }
-    --num_clusters;
-    // Remove pairs intersecting the just combined best pair.
-    size_t copy_to_idx = 0;
-    for (size_t i = 0; i < num_pairs; ++i) {
-      HistogramPair& p = pairs[i];
-      if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
-          p.idx1 == best_idx2 || p.idx2 == best_idx2) {
-        // Remove invalid pair from the queue.
-        continue;
-      }
-      if (pairs[0] < p) {
-        // Replace the top of the queue if needed.
-        HistogramPair front = pairs[0];
-        pairs[0] = p;
-        pairs[copy_to_idx] = front;
-      } else {
-        pairs[copy_to_idx] = p;
-      }
-      ++copy_to_idx;
-    }
-    num_pairs = copy_to_idx;
-
-    // Push new pairs formed with the combined histogram to the heap.
-    for (size_t i = 0; i < num_clusters; ++i) {
-      CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i],
-                            max_num_pairs, &pairs[0], &num_pairs);
-    }
-  }
-  return num_clusters;
-}
-
-// -----------------------------------------------------------------------------
-// Histogram refinement
-
-// What is the bit cost of moving histogram from cur_symbol to candidate.
-template<typename HistogramType>
-double HistogramBitCostDistance(const HistogramType& histogram,
-                                const HistogramType& candidate) {
-  if (histogram.total_count_ == 0) {
-    return 0.0;
-  }
-  HistogramType tmp = histogram;
-  tmp.AddHistogram(candidate);
-  return PopulationCost(tmp) - candidate.bit_cost_;
-}
-
-// Find the best 'out' histogram for each of the 'in' histograms.
-// When called, clusters[0..num_clusters) contains the unique values from
-// symbols[0..in_size), but this property is not preserved in this function.
-// Note: we assume that out[]->bit_cost_ is already up-to-date.
-template<typename HistogramType>
-void HistogramRemap(const HistogramType* in, size_t in_size,
-                    const uint32_t* clusters, size_t num_clusters,
-                    HistogramType* out, uint32_t* symbols) {
-  for (size_t i = 0; i < in_size; ++i) {
-    uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
-    double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
-    for (size_t j = 0; j < num_clusters; ++j) {
-      const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]);
-      if (cur_bits < best_bits) {
-        best_bits = cur_bits;
-        best_out = clusters[j];
-      }
-    }
-    symbols[i] = best_out;
-  }
-
-  // Recompute each out based on raw and symbols.
-  for (size_t j = 0; j < num_clusters; ++j) {
-    out[clusters[j]].Clear();
-  }
-  for (size_t i = 0; i < in_size; ++i) {
-    out[symbols[i]].AddHistogram(in[i]);
-  }
-}
-
-// Reorders elements of the out[0..length) array and changes values in
-// symbols[0..length) array in the following way:
-//   * when called, symbols[] contains indexes into out[], and has N unique
-//     values (possibly N < length)
-//   * on return, symbols'[i] = f(symbols[i]) and
-//                out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
-//     where f is a bijection between the range of symbols[] and [0..N), and
-//     the first occurrences of values in symbols'[i] come in consecutive
-//     increasing order.
-// Returns N, the number of unique values in symbols[].
-template<typename HistogramType>
-size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) {
-  static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
-  std::vector<uint32_t> new_index(length, kInvalidIndex);
-  uint32_t next_index = 0;
-  for (size_t i = 0; i < length; ++i) {
-    if (new_index[symbols[i]] == kInvalidIndex) {
-      new_index[symbols[i]] = next_index;
-      ++next_index;
-    }
-  }
-  std::vector<HistogramType> tmp(next_index);
-  next_index = 0;
-  for (size_t i = 0; i < length; ++i) {
-    if (new_index[symbols[i]] == next_index) {
-      tmp[next_index] = out[symbols[i]];
-      ++next_index;
-    }
-    symbols[i] = new_index[symbols[i]];
-  }
-  for (size_t i = 0; i < next_index; ++i) {
-    out[i] = tmp[i];
-  }
-  return next_index;
-}
-
-// Clusters similar histograms in 'in' together, the selected histograms are
-// placed in 'out', and for each index in 'in', *histogram_symbols will
-// indicate which of the 'out' histograms is the best approximation.
-template<typename HistogramType>
-void ClusterHistograms(const std::vector<HistogramType>& in,
-                       size_t num_contexts, size_t num_blocks,
-                       size_t max_histograms,
-                       std::vector<HistogramType>* out,
-                       std::vector<uint32_t>* histogram_symbols) {
-  const size_t in_size = num_contexts * num_blocks;
-  assert(in_size == in.size());
-  std::vector<uint32_t> cluster_size(in_size, 1);
-  std::vector<uint32_t> clusters(in_size);
-  size_t num_clusters = 0;
-  out->resize(in_size);
-  histogram_symbols->resize(in_size);
-  for (size_t i = 0; i < in_size; ++i) {
-    (*out)[i] = in[i];
-    (*out)[i].bit_cost_ = PopulationCost(in[i]);
-    (*histogram_symbols)[i] = static_cast<uint32_t>(i);
-  }
-
-  const size_t max_input_histograms = 64;
-  // For the first pass of clustering, we allow all pairs.
-  size_t max_num_pairs = max_input_histograms * max_input_histograms / 2;
-  std::vector<HistogramPair> pairs(max_num_pairs + 1);
+} HistogramPair;
 
-  for (size_t i = 0; i < in_size; i += max_input_histograms) {
-    size_t num_to_combine = std::min(in_size - i, max_input_histograms);
-    for (size_t j = 0; j < num_to_combine; ++j) {
-      clusters[num_clusters + j] = static_cast<uint32_t>(i + j);
-    }
-    size_t num_new_clusters =
-        HistogramCombine(&(*out)[0], &cluster_size[0],
-                         &(*histogram_symbols)[i],
-                         &clusters[num_clusters], &pairs[0],
-                         num_to_combine, num_to_combine,
-                         max_histograms, max_num_pairs);
-    num_clusters += num_new_clusters;
-  }
+#define CODE(X) /* Declaration */;
 
-  // For the second pass, we limit the total number of histogram pairs.
-  // After this limit is reached, we only keep searching for the best pair.
-  max_num_pairs =
-      std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
-  pairs.resize(max_num_pairs + 1);
+#define FN(X) X ## Literal
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
 
-  // Collapse similar histograms.
-  num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0],
-                                  &(*histogram_symbols)[0], &clusters[0],
-                                  &pairs[0], num_clusters, in_size,
-                                  max_histograms, max_num_pairs);
+#define FN(X) X ## Command
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
 
-  // Find the optimal map from original histograms to the final ones.
-  HistogramRemap(&in[0], in_size, &clusters[0], num_clusters,
-                 &(*out)[0], &(*histogram_symbols)[0]);
+#define FN(X) X ## Distance
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
 
-  // Convert the context map to a canonical form.
-  size_t num_histograms =
-      HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size);
-  out->resize(num_histograms);
-}
+#undef CODE
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_CLUSTER_H_ */
diff --git a/enc/cluster_inc.h b/enc/cluster_inc.h
new file mode 100644
index 0000000..399a654
--- /dev/null
+++ b/enc/cluster_inc.h
@@ -0,0 +1,315 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, CODE */
+
+#define HistogramType FN(Histogram)
+
+/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+   it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
+BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
+    const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,
+    uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
+    size_t* num_pairs) CODE({
+  int is_good_pair = 0;
+  HistogramPair p;
+  if (idx1 == idx2) {
+    return;
+  }
+  if (idx2 < idx1) {
+    uint32_t t = idx2;
+    idx2 = idx1;
+    idx1 = t;
+  }
+  p.idx1 = idx1;
+  p.idx2 = idx2;
+  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+  p.cost_diff -= out[idx1].bit_cost_;
+  p.cost_diff -= out[idx2].bit_cost_;
+
+  if (out[idx1].total_count_ == 0) {
+    p.cost_combo = out[idx2].bit_cost_;
+    is_good_pair = 1;
+  } else if (out[idx2].total_count_ == 0) {
+    p.cost_combo = out[idx1].bit_cost_;
+    is_good_pair = 1;
+  } else {
+    double threshold = *num_pairs == 0 ? 1e99 :
+        BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
+    HistogramType combo = out[idx1];
+    double cost_combo;
+    FN(HistogramAddHistogram)(&combo, &out[idx2]);
+    cost_combo = FN(BrotliPopulationCost)(&combo);
+    if (cost_combo < threshold - p.cost_diff) {
+      p.cost_combo = cost_combo;
+      is_good_pair = 1;
+    }
+  }
+  if (is_good_pair) {
+    p.cost_diff += p.cost_combo;
+    if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {
+      /* Replace the top of the queue if needed. */
+      if (*num_pairs < max_num_pairs) {
+        pairs[*num_pairs] = pairs[0];
+        ++(*num_pairs);
+      }
+      pairs[0] = p;
+    } else if (*num_pairs < max_num_pairs) {
+      pairs[*num_pairs] = p;
+      ++(*num_pairs);
+    }
+  }
+})
+
+BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
+                                                  uint32_t* cluster_size,
+                                                  uint32_t* symbols,
+                                                  uint32_t* clusters,
+                                                  HistogramPair* pairs,
+                                                  size_t num_clusters,
+                                                  size_t symbols_size,
+                                                  size_t max_clusters,
+                                                  size_t max_num_pairs) CODE({
+  double cost_diff_threshold = 0.0;
+  size_t min_cluster_size = 1;
+  size_t num_pairs = 0;
+
+  {
+    /* We maintain a vector of histogram pairs, with the property that the pair
+       with the maximum bit cost reduction is the first. */
+    size_t idx1;
+    for (idx1 = 0; idx1 < num_clusters; ++idx1) {
+      size_t idx2;
+      for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
+        FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],
+            clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
+      }
+    }
+  }
+
+  while (num_clusters > min_cluster_size) {
+    uint32_t best_idx1;
+    uint32_t best_idx2;
+    size_t i;
+    if (pairs[0].cost_diff >= cost_diff_threshold) {
+      cost_diff_threshold = 1e99;
+      min_cluster_size = max_clusters;
+      continue;
+    }
+    /* Take the best pair from the top of heap. */
+    best_idx1 = pairs[0].idx1;
+    best_idx2 = pairs[0].idx2;
+    FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);
+    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
+    cluster_size[best_idx1] += cluster_size[best_idx2];
+    for (i = 0; i < symbols_size; ++i) {
+      if (symbols[i] == best_idx2) {
+        symbols[i] = best_idx1;
+      }
+    }
+    for (i = 0; i < num_clusters; ++i) {
+      if (clusters[i] == best_idx2) {
+        memmove(&clusters[i], &clusters[i + 1],
+                (num_clusters - i - 1) * sizeof(clusters[0]));
+        break;
+      }
+    }
+    --num_clusters;
+    {
+      /* Remove pairs intersecting the just combined best pair. */
+      size_t copy_to_idx = 0;
+      for (i = 0; i < num_pairs; ++i) {
+        HistogramPair* p = &pairs[i];
+        if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
+            p->idx1 == best_idx2 || p->idx2 == best_idx2) {
+          /* Remove invalid pair from the queue. */
+          continue;
+        }
+        if (HistogramPairIsLess(&pairs[0], p)) {
+          /* Replace the top of the queue if needed. */
+          HistogramPair front = pairs[0];
+          pairs[0] = *p;
+          pairs[copy_to_idx] = front;
+        } else {
+          pairs[copy_to_idx] = *p;
+        }
+        ++copy_to_idx;
+      }
+      num_pairs = copy_to_idx;
+    }
+
+    /* Push new pairs formed with the combined histogram to the heap. */
+    for (i = 0; i < num_clusters; ++i) {
+      FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],
+                                      max_num_pairs, &pairs[0], &num_pairs);
+    }
+  }
+  return num_clusters;
+})
+
+/* What is the bit cost of moving histogram from cur_symbol to candidate. */
+BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
+    const HistogramType* histogram, const HistogramType* candidate) CODE({
+  if (histogram->total_count_ == 0) {
+    return 0.0;
+  } else {
+    HistogramType tmp = *histogram;
+    FN(HistogramAddHistogram)(&tmp, candidate);
+    return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;
+  }
+})
+
+/* Find the best 'out' histogram for each of the 'in' histograms.
+   When called, clusters[0..num_clusters) contains the unique values from
+   symbols[0..in_size), but this property is not preserved in this function.
+   Note: we assume that out[]->bit_cost_ is already up-to-date. */
+BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
+    size_t in_size, const uint32_t* clusters, size_t num_clusters,
+    HistogramType* out, uint32_t* symbols) CODE({
+  size_t i;
+  for (i = 0; i < in_size; ++i) {
+    uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
+    double best_bits =
+        FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);
+    size_t j;
+    for (j = 0; j < num_clusters; ++j) {
+      const double cur_bits =
+          FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = clusters[j];
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  /* Recompute each out based on raw and symbols. */
+  for (i = 0; i < num_clusters; ++i) {
+    FN(HistogramClear)(&out[clusters[i]]);
+  }
+  for (i = 0; i < in_size; ++i) {
+    FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);
+  }
+})
+
+/* Reorders elements of the out[0..length) array and changes values in
+   symbols[0..length) array in the following way:
+     * when called, symbols[] contains indexes into out[], and has N unique
+       values (possibly N < length)
+     * on return, symbols'[i] = f(symbols[i]) and
+                  out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
+       where f is a bijection between the range of symbols[] and [0..N), and
+       the first occurrences of values in symbols'[i] come in consecutive
+       increasing order.
+   Returns N, the number of unique values in symbols[]. */
+BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,
+    HistogramType* out, uint32_t* symbols, size_t length) CODE({
+  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
+  uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);
+  uint32_t next_index;
+  HistogramType* tmp;
+  size_t i;
+  if (BROTLI_IS_OOM(m)) return 0;
+  for (i = 0; i < length; ++i) {
+      new_index[i] = kInvalidIndex;
+  }
+  next_index = 0;
+  for (i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == kInvalidIndex) {
+      new_index[symbols[i]] = next_index;
+      ++next_index;
+    }
+  }
+  /* TODO: by using idea of "cycle-sort" we can avoid allocation of
+     tmp and reduce the number of copying by the factor of 2. */
+  tmp = BROTLI_ALLOC(m, HistogramType, next_index);
+  if (BROTLI_IS_OOM(m)) return 0;
+  next_index = 0;
+  for (i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == next_index) {
+      tmp[next_index] = out[symbols[i]];
+      ++next_index;
+    }
+    symbols[i] = new_index[symbols[i]];
+  }
+  BROTLI_FREE(m, new_index);
+  for (i = 0; i < next_index; ++i) {
+    out[i] = tmp[i];
+  }
+  BROTLI_FREE(m, tmp);
+  return next_index;
+})
+
+BROTLI_INTERNAL void FN(BrotliClusterHistograms)(
+    MemoryManager* m, const HistogramType* in, const size_t in_size,
+    size_t max_histograms, HistogramType* out, size_t* out_size,
+    uint32_t* histogram_symbols) CODE({
+  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);
+  uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);
+  size_t num_clusters = 0;
+  const size_t max_input_histograms = 64;
+  size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
+  /* For the first pass of clustering, we allow all pairs. */
+  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
+  size_t i;
+
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < in_size; ++i) {
+    cluster_size[i] = 1;
+  }
+
+  for (i = 0; i < in_size; ++i) {
+    out[i] = in[i];
+    out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);
+    histogram_symbols[i] = (uint32_t)i;
+  }
+
+  for (i = 0; i < in_size; i += max_input_histograms) {
+    size_t num_to_combine =
+        BROTLI_MIN(size_t, in_size - i, max_input_histograms);
+    size_t num_new_clusters;
+    size_t j;
+    for (j = 0; j < num_to_combine; ++j) {
+      clusters[num_clusters + j] = (uint32_t)(i + j);
+    }
+    num_new_clusters =
+        FN(BrotliHistogramCombine)(out, cluster_size,
+                                   &histogram_symbols[i],
+                                   &clusters[num_clusters], pairs,
+                                   num_to_combine, num_to_combine,
+                                   max_histograms, pairs_capacity);
+    num_clusters += num_new_clusters;
+  }
+
+  {
+    /* For the second pass, we limit the total number of histogram pairs.
+       After this limit is reached, we only keep searching for the best pair. */
+    size_t max_num_pairs = BROTLI_MIN(size_t,
+        64 * num_clusters, (num_clusters / 2) * num_clusters);
+    BROTLI_ENSURE_CAPACITY(
+        m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);
+    if (BROTLI_IS_OOM(m)) return;
+
+    /* Collapse similar histograms. */
+    num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,
+                                              histogram_symbols, clusters,
+                                              pairs, num_clusters, in_size,
+                                              max_histograms, max_num_pairs);
+  }
+  BROTLI_FREE(m, pairs);
+  BROTLI_FREE(m, cluster_size);
+  /* Find the optimal map from original histograms to the final ones. */
+  FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
+                           out, histogram_symbols);
+  BROTLI_FREE(m, clusters);
+  /* Convert the context map to a canonical form. */
+  *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
+  if (BROTLI_IS_OOM(m)) return;
+})
+
+#undef HistogramType
diff --git a/enc/command.h b/enc/command.h
index c601dac..4427371 100644
--- a/enc/command.h
+++ b/enc/command.h
@@ -10,10 +10,13 @@
 #define BROTLI_ENC_COMMAND_H_
 
 #include "../common/types.h"
+#include "../common/port.h"
 #include "./fast_log.h"
 #include "./prefix.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 static uint32_t kInsBase[] =   { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
     66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
@@ -24,15 +27,14 @@ static uint32_t kCopyBase[] =  { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
 static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0,  1,  1,  2,  2,  3,  3,
      4,  4,   5,   5,   6,   7,   8,     9,   10,    24 };
 
-static inline uint16_t GetInsertLengthCode(size_t insertlen) {
+static BROTLI_INLINE uint16_t GetInsertLengthCode(size_t insertlen) {
   if (insertlen < 6) {
-    return static_cast<uint16_t>(insertlen);
+    return (uint16_t)insertlen;
   } else if (insertlen < 130) {
-    insertlen -= 2;
-    uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
-    return static_cast<uint16_t>((nbits << 1) + (insertlen >> nbits) + 2);
+    uint32_t nbits = Log2FloorNonZero(insertlen - 2) - 1u;
+    return (uint16_t)((nbits << 1) + ((insertlen - 2) >> nbits) + 2);
   } else if (insertlen < 2114) {
-    return static_cast<uint16_t>(Log2FloorNonZero(insertlen - 66) + 10);
+    return (uint16_t)(Log2FloorNonZero(insertlen - 66) + 10);
   } else if (insertlen < 6210) {
     return 21u;
   } else if (insertlen < 22594) {
@@ -42,24 +44,23 @@ static inline uint16_t GetInsertLengthCode(size_t insertlen) {
   }
 }
 
-static inline uint16_t GetCopyLengthCode(size_t copylen) {
+static BROTLI_INLINE uint16_t GetCopyLengthCode(size_t copylen) {
   if (copylen < 10) {
-    return static_cast<uint16_t>(copylen - 2);
+    return (uint16_t)(copylen - 2);
   } else if (copylen < 134) {
-    copylen -= 6;
-    uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
-    return static_cast<uint16_t>((nbits << 1) + (copylen >> nbits) + 4);
+    uint32_t nbits = Log2FloorNonZero(copylen - 6) - 1u;
+    return (uint16_t)((nbits << 1) + ((copylen - 6) >> nbits) + 4);
   } else if (copylen < 2118) {
-    return static_cast<uint16_t>(Log2FloorNonZero(copylen - 70) + 12);
+    return (uint16_t)(Log2FloorNonZero(copylen - 70) + 12);
   } else {
     return 23u;
   }
 }
 
-static inline uint16_t CombineLengthCodes(
-    uint16_t inscode, uint16_t copycode, bool use_last_distance) {
+static BROTLI_INLINE uint16_t CombineLengthCodes(
+    uint16_t inscode, uint16_t copycode, int use_last_distance) {
   uint16_t bits64 =
-      static_cast<uint16_t>((copycode & 0x7u) | ((inscode & 0x7u) << 3));
+      (uint16_t)((copycode & 0x7u) | ((inscode & 0x7u) << 3));
   if (use_last_distance && inscode < 8 && copycode < 16) {
     return (copycode < 8) ? bits64 : (bits64 | 64);
   } else {
@@ -71,86 +72,91 @@ static inline uint16_t CombineLengthCodes(
   }
 }
 
-static inline void GetLengthCode(size_t insertlen, size_t copylen,
-                                 bool use_last_distance,
-                                 uint16_t* code) {
+static BROTLI_INLINE void GetLengthCode(size_t insertlen, size_t copylen,
+                                        int use_last_distance,
+                                        uint16_t* code) {
   uint16_t inscode = GetInsertLengthCode(insertlen);
   uint16_t copycode = GetCopyLengthCode(copylen);
   *code = CombineLengthCodes(inscode, copycode, use_last_distance);
 }
 
-static inline uint32_t GetInsertBase(uint16_t inscode) {
+static BROTLI_INLINE uint32_t GetInsertBase(uint16_t inscode) {
   return kInsBase[inscode];
 }
 
-static inline uint32_t GetInsertExtra(uint16_t inscode) {
+static BROTLI_INLINE uint32_t GetInsertExtra(uint16_t inscode) {
   return kInsExtra[inscode];
 }
 
-static inline uint32_t GetCopyBase(uint16_t copycode) {
+static BROTLI_INLINE uint32_t GetCopyBase(uint16_t copycode) {
   return kCopyBase[copycode];
 }
 
-static inline uint32_t GetCopyExtra(uint16_t copycode) {
+static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
   return kCopyExtra[copycode];
 }
 
-struct Command {
-  // distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
-  Command(size_t insertlen, size_t copylen, size_t copylen_code,
-          size_t distance_code)
-      : insert_len_(static_cast<uint32_t>(insertlen)) {
-    copy_len_ = static_cast<uint32_t>(
-        copylen | ((copylen_code ^ copylen) << 24));
-    // The distance prefix and extra bits are stored in this Command as if
-    // npostfix and ndirect were 0, they are only recomputed later after the
-    // clustering if needed.
-    PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
-    GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0,
-                  &cmd_prefix_);
-  }
+typedef struct Command {
+  uint32_t insert_len_;
+  /* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
+  uint32_t copy_len_;
+  uint32_t dist_extra_;
+  uint16_t cmd_prefix_;
+  uint16_t dist_prefix_;
+} Command;
+
+/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
+static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen,
+    size_t copylen, size_t copylen_code, size_t distance_code) {
+  self->insert_len_ = (uint32_t)insertlen;
+  self->copy_len_ = (uint32_t)(copylen | ((copylen_code ^ copylen) << 24));
+  /* The distance prefix and extra bits are stored in this Command as if
+     npostfix and ndirect were 0, they are only recomputed later after the
+     clustering if needed. */
+  PrefixEncodeCopyDistance(
+      distance_code, 0, 0, &self->dist_prefix_, &self->dist_extra_);
+  GetLengthCode(
+      insertlen, copylen_code, self->dist_prefix_ == 0, &self->cmd_prefix_);
+}
 
-  explicit Command(size_t insertlen)
-      : insert_len_(static_cast<uint32_t>(insertlen))
-      , copy_len_(4 << 24), dist_extra_(0), dist_prefix_(16) {
-    GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_);
-  }
+static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
+  self->insert_len_ = (uint32_t)insertlen;
+  self->copy_len_ = 4 << 24;
+  self->dist_extra_ = 0;
+  self->dist_prefix_ = 16;
+  GetLengthCode(insertlen, 4, 0, &self->cmd_prefix_);
+}
 
-  uint32_t DistanceCode(void) const {
-    if (dist_prefix_ < 16) {
-      return dist_prefix_;
-    }
-    uint32_t nbits = dist_extra_ >> 24;
-    uint32_t extra = dist_extra_ & 0xffffff;
-    uint32_t prefix = dist_prefix_ - 12 - 2 * nbits;
+static BROTLI_INLINE uint32_t CommandDistanceCode(const Command* self) {
+  if (self->dist_prefix_ < 16) {
+    return self->dist_prefix_;
+  } else {
+    uint32_t nbits = self->dist_extra_ >> 24;
+    uint32_t extra = self->dist_extra_ & 0xffffff;
+    uint32_t prefix = self->dist_prefix_ - 12u - 2u * nbits;
     return (prefix << nbits) + extra + 12;
   }
+}
 
-  uint32_t DistanceContext(void) const {
-    uint32_t r = cmd_prefix_ >> 6;
-    uint32_t c = cmd_prefix_ & 7;
-    if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
-      return c;
-    }
-    return 3;
-  }
-
-  inline uint32_t copy_len(void) const {
-    return copy_len_ & 0xFFFFFF;
+static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
+  uint32_t r = self->cmd_prefix_ >> 6;
+  uint32_t c = self->cmd_prefix_ & 7;
+  if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
+    return c;
   }
+  return 3;
+}
 
-  inline uint32_t copy_len_code(void) const {
-    return (copy_len_ & 0xFFFFFF) ^ (copy_len_ >> 24);
-  }
+static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
+  return self->copy_len_ & 0xFFFFFF;
+}
 
-  uint32_t insert_len_;
-  /* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
-  uint32_t copy_len_;
-  uint32_t dist_extra_;
-  uint16_t cmd_prefix_;
-  uint16_t dist_prefix_;
-};
+static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
+  return (self->copy_len_ & 0xFFFFFF) ^ (self->copy_len_ >> 24);
+}
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_COMMAND_H_ */
diff --git a/enc/compress_fragment.c b/enc/compress_fragment.c
index 92951c2..d69cc66 100644
--- a/enc/compress_fragment.c
+++ b/enc/compress_fragment.c
@@ -14,18 +14,21 @@
 
 #include "./compress_fragment.h"
 
-#include <algorithm>
-#include <cstring>
+#include <string.h>  /* memcmp, memcpy, memset */
 
 #include "../common/types.h"
 #include "./brotli_bit_stream.h"
 #include "./entropy_encode.h"
 #include "./fast_log.h"
 #include "./find_match_length.h"
+#include "./memory.h"
 #include "./port.h"
 #include "./write_bits.h"
 
-namespace brotli {
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* kHashMul32 multiplier has these properties:
    * The multiplier must be odd. Otherwise we may lose the highest bit.
@@ -35,19 +38,22 @@ namespace brotli {
    * The number has been tuned heuristically against compression benchmarks. */
 static const uint32_t kHashMul32 = 0x1e35a7bd;
 
-static inline uint32_t Hash(const uint8_t* p, size_t shift) {
+static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
   const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
-  return static_cast<uint32_t>(h >> shift);
+  return (uint32_t)(h >> shift);
 }
 
-static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
+static BROTLI_INLINE uint32_t HashBytesAtOffset(
+    uint64_t v, int offset, size_t shift) {
   assert(offset >= 0);
   assert(offset <= 3);
-  const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
-  return static_cast<uint32_t>(h >> shift);
+  {
+    const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
+    return (uint32_t)(h >> shift);
+  }
 }
 
-static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
+static BROTLI_INLINE int IsMatch(const uint8_t* p1, const uint8_t* p2) {
   return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
           p1[4] == p2[4]);
 }
@@ -57,281 +63,295 @@ static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
    Note that the prefix code here is built from the pre-LZ77 input, therefore
    we can only approximate the statistics of the actual literal stream.
    Moreover, for long inputs we build a histogram from a sample of the input
-   and thus have to assign a non-zero depth for each literal. */
-static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
-                                           const size_t input_size,
-                                           uint8_t depths[256],
-                                           uint16_t bits[256],
-                                           size_t* storage_ix,
-                                           uint8_t* storage) {
+   and thus have to assign a non-zero depth for each literal.
+   Returns estimated compression ratio millibytes/char for encoding given input
+   with generated code. */
+static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
+                                             const uint8_t* input,
+                                             const size_t input_size,
+                                             uint8_t depths[256],
+                                             uint16_t bits[256],
+                                             size_t* storage_ix,
+                                             uint8_t* storage) {
   uint32_t histogram[256] = { 0 };
   size_t histogram_total;
+  size_t i;
   if (input_size < (1 << 15)) {
-    for (size_t i = 0; i < input_size; ++i)  {
+    for (i = 0; i < input_size; ++i) {
       ++histogram[input[i]];
     }
     histogram_total = input_size;
-    for (size_t i = 0; i < 256; ++i) {
+    for (i = 0; i < 256; ++i) {
       /* We weigh the first 11 samples with weight 3 to account for the
          balancing effect of the LZ77 phase on the histogram. */
-      const uint32_t adjust = 2 * std::min(histogram[i], 11u);
+      const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
       histogram[i] += adjust;
       histogram_total += adjust;
     }
   } else {
     static const size_t kSampleRate = 29;
-    for (size_t i = 0; i < input_size; i += kSampleRate) {
+    for (i = 0; i < input_size; i += kSampleRate) {
       ++histogram[input[i]];
     }
     histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
-    for (size_t i = 0; i < 256; ++i) {
+    for (i = 0; i < 256; ++i) {
       /* We add 1 to each population count to avoid 0 bit depths (since this is
          only a sample and we don't know if the symbol appears or not), and we
          weigh the first 11 samples with weight 3 to account for the balancing
          effect of the LZ77 phase on the histogram (more frequent symbols are
          more likely to be in backward references instead as literals). */
-      const uint32_t adjust = 1 + 2 * std::min(histogram[i], 11u);
+      const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
       histogram[i] += adjust;
       histogram_total += adjust;
     }
   }
-  BuildAndStoreHuffmanTreeFast(histogram, histogram_total,
-                               /* max_bits = */ 8,
-                               depths, bits, storage_ix, storage);
+  BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
+                                     /* max_bits = */ 8,
+                                     depths, bits, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return 0;
+  {
+    size_t literal_ratio = 0;
+    for (i = 0; i < 256; ++i) {
+      if (histogram[i]) literal_ratio += histogram[i] * depths[i];
+    }
+    /* Estimated encoding ratio, millibytes per symbol. */
+    return (literal_ratio * 125) / histogram_total;
+  }
 }
 
 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
    "bits" based on "histogram" and stores it into the bit stream. */
 static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
-                                           uint8_t depth[128],
-                                           uint16_t bits[128],
-                                           size_t* storage_ix,
-                                           uint8_t* storage) {
+    uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
+    uint8_t* storage) {
   /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
-  static const size_t kTreeSize = 129;
-  HuffmanTree tree[kTreeSize];
-  CreateHuffmanTree(histogram, 64, 15, tree, depth);
-  CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  HuffmanTree tree[129];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
+  uint16_t cmd_bits[64];
+
+  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
+  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
   /* We have to jump through a few hoopes here in order to compute
      the command bits because the symbols are in a different order than in
      the full alphabet. This looks complicated, but having the symbols
      in this order in the command bits saves a few branches in the Emit*
      functions. */
-  uint8_t cmd_depth[64];
-  uint16_t cmd_bits[64];
   memcpy(cmd_depth, depth, 24);
   memcpy(cmd_depth + 24, depth + 40, 8);
   memcpy(cmd_depth + 32, depth + 24, 8);
   memcpy(cmd_depth + 40, depth + 48, 8);
   memcpy(cmd_depth + 48, depth + 32, 8);
   memcpy(cmd_depth + 56, depth + 56, 8);
-  ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
+  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
   memcpy(bits, cmd_bits, 48);
   memcpy(bits + 24, cmd_bits + 32, 16);
   memcpy(bits + 32, cmd_bits + 48, 16);
   memcpy(bits + 40, cmd_bits + 24, 16);
   memcpy(bits + 48, cmd_bits + 40, 16);
   memcpy(bits + 56, cmd_bits + 56, 16);
-  ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
   {
     /* Create the bit length array for the full command alphabet. */
-    uint8_t cmd_depth[704] = { 0 };
+    size_t i;
+    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
     memcpy(cmd_depth, depth, 8);
     memcpy(cmd_depth + 64, depth + 8, 8);
     memcpy(cmd_depth + 128, depth + 16, 8);
     memcpy(cmd_depth + 192, depth + 24, 8);
     memcpy(cmd_depth + 384, depth + 32, 8);
-    for (size_t i = 0; i < 8; ++i) {
+    for (i = 0; i < 8; ++i) {
       cmd_depth[128 + 8 * i] = depth[40 + i];
       cmd_depth[256 + 8 * i] = depth[48 + i];
       cmd_depth[448 + 8 * i] = depth[56 + i];
     }
-    StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
+    BrotliStoreHuffmanTree(
+        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
   }
-  StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
 }
 
 /* REQUIRES: insertlen < 6210 */
-inline void EmitInsertLen(size_t insertlen,
-                          const uint8_t depth[128],
-                          const uint16_t bits[128],
-                          uint32_t histo[128],
-                          size_t* storage_ix,
-                          uint8_t* storage) {
+static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
+                                        const uint8_t depth[128],
+                                        const uint16_t bits[128],
+                                        uint32_t histo[128],
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
   if (insertlen < 6) {
     const size_t code = insertlen + 40;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
     ++histo[code];
   } else if (insertlen < 130) {
-    insertlen -= 2;
-    const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
-    const size_t prefix = insertlen >> nbits;
+    const size_t tail = insertlen - 2;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const size_t prefix = tail >> nbits;
     const size_t inscode = (nbits << 1) + prefix + 42;
-    WriteBits(depth[inscode], bits[inscode], storage_ix, storage);
-    WriteBits(nbits, insertlen - (prefix << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
     ++histo[inscode];
   } else if (insertlen < 2114) {
-    insertlen -= 66;
-    const uint32_t nbits = Log2FloorNonZero(insertlen);
+    const size_t tail = insertlen - 66;
+    const uint32_t nbits = Log2FloorNonZero(tail);
     const size_t code = nbits + 50;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(nbits, insertlen - (1 << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (1u << nbits), storage_ix, storage);
     ++histo[code];
   } else {
-    WriteBits(depth[61], bits[61], storage_ix, storage);
-    WriteBits(12, insertlen - 2114, storage_ix, storage);
+    BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
+    BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
     ++histo[21];
   }
 }
 
-inline void EmitLongInsertLen(size_t insertlen,
-                              const uint8_t depth[128],
-                              const uint16_t bits[128],
-                              uint32_t histo[128],
-                              size_t* storage_ix,
-                              uint8_t* storage) {
+static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
+                                            const uint8_t depth[128],
+                                            const uint16_t bits[128],
+                                            uint32_t histo[128],
+                                            size_t* storage_ix,
+                                            uint8_t* storage) {
   if (insertlen < 22594) {
-    WriteBits(depth[62], bits[62], storage_ix, storage);
-    WriteBits(14, insertlen - 6210, storage_ix, storage);
+    BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
+    BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
     ++histo[22];
   } else {
-    WriteBits(depth[63], bits[63], storage_ix, storage);
-    WriteBits(24, insertlen - 22594, storage_ix, storage);
+    BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
+    BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
     ++histo[23];
   }
 }
 
-inline void EmitCopyLen(size_t copylen,
-                        const uint8_t depth[128],
-                        const uint16_t bits[128],
-                        uint32_t histo[128],
-                        size_t* storage_ix,
-                        uint8_t* storage) {
+static BROTLI_INLINE void EmitCopyLen(size_t copylen,
+                                      const uint8_t depth[128],
+                                      const uint16_t bits[128],
+                                      uint32_t histo[128],
+                                      size_t* storage_ix,
+                                      uint8_t* storage) {
   if (copylen < 10) {
-    WriteBits(depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
+    BrotliWriteBits(
+        depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
     ++histo[copylen + 14];
   } else if (copylen < 134) {
-    copylen -= 6;
-    const uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
-    const size_t prefix = copylen >> nbits;
+    const size_t tail = copylen - 6;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const size_t prefix = tail >> nbits;
     const size_t code = (nbits << 1) + prefix + 20;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
     ++histo[code];
   } else if (copylen < 2118) {
-    copylen -= 70;
-    const uint32_t nbits = Log2FloorNonZero(copylen);
+    const size_t tail = copylen - 70;
+    const uint32_t nbits = Log2FloorNonZero(tail);
     const size_t code = nbits + 28;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (1u << nbits), storage_ix, storage);
     ++histo[code];
   } else {
-    WriteBits(depth[39], bits[39], storage_ix, storage);
-    WriteBits(24, copylen - 2118, storage_ix, storage);
+    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
+    BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
     ++histo[47];
   }
 }
 
-inline void EmitCopyLenLastDistance(size_t copylen,
-                                    const uint8_t depth[128],
-                                    const uint16_t bits[128],
-                                    uint32_t histo[128],
-                                    size_t* storage_ix,
-                                    uint8_t* storage) {
+static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
+                                                  const uint8_t depth[128],
+                                                  const uint16_t bits[128],
+                                                  uint32_t histo[128],
+                                                  size_t* storage_ix,
+                                                  uint8_t* storage) {
   if (copylen < 12) {
-    WriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
+    BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
     ++histo[copylen - 4];
   } else if (copylen < 72) {
-    copylen -= 8;
-    const uint32_t nbits = Log2FloorNonZero(copylen) - 1;
-    const size_t prefix = copylen >> nbits;
+    const size_t tail = copylen - 8;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
     const size_t code = (nbits << 1) + prefix + 4;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
     ++histo[code];
   } else if (copylen < 136) {
-    copylen -= 8;
-    const size_t code = (copylen >> 5) + 30;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(5, copylen & 31, storage_ix, storage);
-    WriteBits(depth[64], bits[64], storage_ix, storage);
+    const size_t tail = copylen - 8;
+    const size_t code = (tail >> 5) + 30;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(5, tail & 31, storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
     ++histo[code];
     ++histo[64];
   } else if (copylen < 2120) {
-    copylen -= 72;
-    const uint32_t nbits = Log2FloorNonZero(copylen);
+    const size_t tail = copylen - 72;
+    const uint32_t nbits = Log2FloorNonZero(tail);
     const size_t code = nbits + 28;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
-    WriteBits(depth[64], bits[64], storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (1u << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
     ++histo[code];
     ++histo[64];
   } else {
-    WriteBits(depth[39], bits[39], storage_ix, storage);
-    WriteBits(24, copylen - 2120, storage_ix, storage);
-    WriteBits(depth[64], bits[64], storage_ix, storage);
+    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
+    BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
     ++histo[47];
     ++histo[64];
   }
 }
 
-inline void EmitDistance(size_t distance,
-                         const uint8_t depth[128],
-                         const uint16_t bits[128],
-                         uint32_t histo[128],
-                         size_t* storage_ix, uint8_t* storage) {
-  distance += 3;
-  const uint32_t nbits = Log2FloorNonZero(distance) - 1u;
-  const size_t prefix = (distance >> nbits) & 1;
+static BROTLI_INLINE void EmitDistance(size_t distance,
+                                       const uint8_t depth[128],
+                                       const uint16_t bits[128],
+                                       uint32_t histo[128],
+                                       size_t* storage_ix, uint8_t* storage) {
+  const size_t d = distance + 3;
+  const uint32_t nbits = Log2FloorNonZero(d) - 1u;
+  const size_t prefix = (d >> nbits) & 1;
   const size_t offset = (2 + prefix) << nbits;
   const size_t distcode = 2 * (nbits - 1) + prefix + 80;
-  WriteBits(depth[distcode], bits[distcode], storage_ix, storage);
-  WriteBits(nbits, distance - offset, storage_ix, storage);
+  BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
+  BrotliWriteBits(nbits, d - offset, storage_ix, storage);
   ++histo[distcode];
 }
 
-inline void EmitLiterals(const uint8_t* input, const size_t len,
-                         const uint8_t depth[256], const uint16_t bits[256],
-                         size_t* storage_ix, uint8_t* storage) {
-  for (size_t j = 0; j < len; j++) {
+static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
+                                       const uint8_t depth[256],
+                                       const uint16_t bits[256],
+                                       size_t* storage_ix, uint8_t* storage) {
+  size_t j;
+  for (j = 0; j < len; j++) {
     const uint8_t lit = input[j];
-    WriteBits(depth[lit], bits[lit], storage_ix, storage);
+    BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
   }
 }
 
 /* REQUIRES: len <= 1 << 20. */
-static void StoreMetaBlockHeader(
-    size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
+static void BrotliStoreMetaBlockHeader(
+    size_t len, int is_uncompressed, size_t* storage_ix, uint8_t* storage) {
   /* ISLAST */
-  WriteBits(1, 0, storage_ix, storage);
+  BrotliWriteBits(1, 0, storage_ix, storage);
   if (len <= (1U << 16)) {
     /* MNIBBLES is 4 */
-    WriteBits(2, 0, storage_ix, storage);
-    WriteBits(16, len - 1, storage_ix, storage);
+    BrotliWriteBits(2, 0, storage_ix, storage);
+    BrotliWriteBits(16, len - 1, storage_ix, storage);
   } else {
     /* MNIBBLES is 5 */
-    WriteBits(2, 1, storage_ix, storage);
-    WriteBits(20, len - 1, storage_ix, storage);
+    BrotliWriteBits(2, 1, storage_ix, storage);
+    BrotliWriteBits(20, len - 1, storage_ix, storage);
   }
   /* ISUNCOMPRESSED */
-  WriteBits(1, is_uncompressed, storage_ix, storage);
+  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
 }
 
-static void UpdateBits(size_t n_bits,
-                       uint32_t bits,
-                       size_t pos,
-                       uint8_t *array) {
+static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
+    uint8_t *array) {
   while (n_bits > 0) {
     size_t byte_pos = pos >> 3;
     size_t n_unchanged_bits = pos & 7;
-    size_t n_changed_bits = std::min(n_bits, 8 - n_unchanged_bits);
+    size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
     size_t total_bits = n_unchanged_bits + n_changed_bits;
-    uint32_t mask = (~((1 << total_bits) - 1)) | ((1 << n_unchanged_bits) - 1);
+    uint32_t mask =
+        (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
     uint32_t unchanged_bits = array[byte_pos] & mask;
-    uint32_t changed_bits = bits & ((1 << n_changed_bits) - 1);
+    uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
     array[byte_pos] =
-        static_cast<uint8_t>((changed_bits << n_unchanged_bits) |
-                             unchanged_bits);
+        (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
     n_bits -= n_changed_bits;
     bits >>= n_changed_bits;
     pos += n_changed_bits;
@@ -342,69 +362,72 @@ static void RewindBitPosition(const size_t new_storage_ix,
                               size_t* storage_ix, uint8_t* storage) {
   const size_t bitpos = new_storage_ix & 7;
   const size_t mask = (1u << bitpos) - 1;
-  storage[new_storage_ix >> 3] &= static_cast<uint8_t>(mask);
+  storage[new_storage_ix >> 3] &= (uint8_t)mask;
   *storage_ix = new_storage_ix;
 }
 
-static bool ShouldMergeBlock(const uint8_t* data, size_t len,
-                             const uint8_t* depths) {
+static int ShouldMergeBlock(const uint8_t* data, size_t len,
+                            const uint8_t* depths) {
   size_t histo[256] = { 0 };
   static const size_t kSampleRate = 43;
-  for (size_t i = 0; i < len; i += kSampleRate) {
+  size_t i;
+  for (i = 0; i < len; i += kSampleRate) {
     ++histo[data[i]];
   }
-  const size_t total = (len + kSampleRate - 1) / kSampleRate;
-  double r = (FastLog2(total) + 0.5) * static_cast<double>(total) + 200;
-  for (size_t i = 0; i < 256; ++i) {
-    r -= static_cast<double>(histo[i]) * (depths[i] + FastLog2(histo[i]));
+  {
+    const size_t total = (len + kSampleRate - 1) / kSampleRate;
+    double r = (FastLog2(total) + 0.5) * (double)total + 200;
+    for (i = 0; i < 256; ++i) {
+      r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
+    }
+    return (r >= 0.0) ? 1 : 0;
   }
-  return r >= 0.0;
 }
 
-inline bool ShouldUseUncompressedMode(const uint8_t* metablock_start,
-                                      const uint8_t* next_emit,
-                                      const size_t insertlen,
-                                      const uint8_t literal_depths[256]) {
-  const size_t compressed = static_cast<size_t>(next_emit - metablock_start);
+/* Acceptable loss for uncompressible speedup is 2% */
+#define MIN_RATIO 980
+
+static BROTLI_INLINE int ShouldUseUncompressedMode(
+    const uint8_t* metablock_start, const uint8_t* next_emit,
+    const size_t insertlen, const size_t literal_ratio) {
+  const size_t compressed = (size_t)(next_emit - metablock_start);
   if (compressed * 50 > insertlen) {
-    return false;
-  }
-  static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
-  static const double kMinEntropy =
-      8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
-  uint32_t sum = 0;
-  for (int i = 0; i < 256; ++i) {
-    const uint32_t n = literal_depths[i];
-    sum += n << (15 - n);
+    return 0;
+  } else {
+    return (literal_ratio > MIN_RATIO) ? 1 : 0;
   }
-  return sum > static_cast<uint32_t>((1 << 15) * kMinEntropy);
 }
 
 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
                                       const size_t storage_ix_start,
                                       size_t* storage_ix, uint8_t* storage) {
-  const size_t len = static_cast<size_t>(end - begin);
+  const size_t len = (size_t)(end - begin);
   RewindBitPosition(storage_ix_start, storage_ix, storage);
-  StoreMetaBlockHeader(len, 1, storage_ix, storage);
+  BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
   *storage_ix = (*storage_ix + 7u) & ~7u;
   memcpy(&storage[*storage_ix >> 3], begin, len);
   *storage_ix += len << 3;
   storage[*storage_ix >> 3] = 0;
 }
 
-void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
-                                bool is_last,
+static uint32_t kCmdHistoSeed[128] = {
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 0, 0, 0, 0,
+};
+
+void BrotliCompressFragmentFast(MemoryManager* m,
+                                const uint8_t* input, size_t input_size,
+                                int is_last,
                                 int* table, size_t table_size,
                                 uint8_t cmd_depth[128], uint16_t cmd_bits[128],
                                 size_t* cmd_code_numbits, uint8_t* cmd_code,
                                 size_t* storage_ix, uint8_t* storage) {
-  if (input_size == 0) {
-    assert(is_last);
-    WriteBits(1, 1, storage_ix, storage);  // islast
-    WriteBits(1, 1, storage_ix, storage);  // isempty
-    *storage_ix = (*storage_ix + 7u) & ~7u;
-    return;
-  }
+  uint32_t cmd_histo[128];
+  const uint8_t* ip_end;
 
   /* "next_emit" is a pointer to the first byte that is not covered by a
      previous copy. Bytes between "next_emit" and the start of the next copy or
@@ -417,66 +440,81 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
   static const size_t kFirstBlockSize = 3 << 15;
   static const size_t kMergeBlockSize = 1 << 16;
 
+  const size_t kInputMarginBytes = 16;
+  const size_t kMinMatchLen = 5;
+
   const uint8_t* metablock_start = input;
-  size_t block_size = std::min(input_size, kFirstBlockSize);
+  size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
   size_t total_block_size = block_size;
   /* Save the bit position of the MLEN field of the meta-block header, so that
      we can update it later if we decide to extend this meta-block. */
   size_t mlen_storage_ix = *storage_ix + 3;
-  StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
-  // No block splits, no contexts.
-  WriteBits(13, 0, storage_ix, storage);
-
-  uint8_t lit_depth[256] = { 0 };
-  uint16_t lit_bits[256] = { 0 };
-  BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
-                                 storage_ix, storage);
-
-  // Store the pre-compressed command and distance prefix codes.
-  for (size_t i = 0; i + 7 < *cmd_code_numbits; i += 8) {
-    WriteBits(8, cmd_code[i >> 3], storage_ix, storage);
+
+  uint8_t lit_depth[256];
+  uint16_t lit_bits[256];
+
+  size_t literal_ratio;
+
+  const uint8_t* ip;
+  int last_distance;
+
+  const size_t shift = 64u - Log2FloorNonZero(table_size);
+  assert(table_size);
+  assert(table_size <= (1u << 31));
+  /* table must be power of two */
+  assert((table_size & (table_size - 1)) == 0);
+  assert(table_size - 1 ==
+      (size_t)(MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
+
+  if (input_size == 0) {
+    assert(is_last);
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+    return;
   }
-  WriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
-            storage_ix, storage);
+
+  BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+  /* No block splits, no contexts. */
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  literal_ratio = BuildAndStoreLiteralPrefixCode(
+      m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+
+  {
+    /* Store the pre-compressed command and distance prefix codes. */
+    size_t i;
+    for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
+      BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
+    }
+  }
+  BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
+                  storage_ix, storage);
 
  emit_commands:
   /* Initialize the command and distance histograms. We will gather
      statistics of command and distance codes during the processing
      of this block and use it to update the command and distance
      prefix codes for the next block. */
-  uint32_t cmd_histo[128] = {
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 0, 0, 0, 0,
-  };
-
-  // "ip" is the input pointer.
-  const uint8_t* ip = input;
-  assert(table_size);
-  assert(table_size <= (1u << 31));
-  assert((table_size & (table_size - 1)) == 0);  // table must be power of two
-  const size_t shift = 64u - Log2FloorNonZero(table_size);
-  assert(table_size - 1 == static_cast<size_t>(
-      MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
-  const uint8_t* ip_end = input + block_size;
+  memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
+
+  /* "ip" is the input pointer. */
+  ip = input;
+  last_distance = -1;
+  ip_end = input + block_size;
 
-  int last_distance = -1;
-  const size_t kInputMarginBytes = 16;
-  const size_t kMinMatchLen = 5;
   if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
     /* For the last block, we need to keep a 16 bytes margin so that we can be
        sure that all distances are at most window size - 16.
        For all other blocks, we only need to keep a margin of 5 bytes so that
        we don't go over the block size with a copy. */
-    const size_t len_limit = std::min(block_size - kMinMatchLen,
-                                      input_size - kInputMarginBytes);
+    const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
+                                        input_size - kInputMarginBytes);
     const uint8_t* ip_limit = input + len_limit;
 
-    for (uint32_t next_hash = Hash(++ip, shift); ; ) {
-      assert(next_emit < ip);
+    uint32_t next_hash;
+    for (next_hash = Hash(++ip, shift); ; ) {
       /* Step 1: Scan forward in the input looking for a 5-byte-long match.
          If we get close to exhausting the input then goto emit_remainder.
 
@@ -496,11 +534,13 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
 
       const uint8_t* next_ip = ip;
       const uint8_t* candidate;
+      assert(next_emit < ip);
+
       do {
-        ip = next_ip;
         uint32_t hash = next_hash;
-        assert(hash == Hash(ip, shift));
         uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        assert(hash == Hash(next_ip, shift));
+        ip = next_ip;
         next_ip = ip + bytes_between_hash_lookups;
         if (PREDICT_FALSE(next_ip > ip_limit)) {
           goto emit_remainder;
@@ -509,7 +549,7 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
         candidate = ip - last_distance;
         if (IsMatch(ip, candidate)) {
           if (PREDICT_TRUE(candidate < ip)) {
-            table[hash] = static_cast<int>(ip - base_ip);
+            table[hash] = (int)(ip - base_ip);
             break;
           }
         }
@@ -517,33 +557,32 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
         assert(candidate >= base_ip);
         assert(candidate < ip);
 
-        table[hash] = static_cast<int>(ip - base_ip);
+        table[hash] = (int)(ip - base_ip);
       } while (PREDICT_TRUE(!IsMatch(ip, candidate)));
 
       /* Step 2: Emit the found match together with the literal bytes from
          "next_emit" to the bit stream, and then see if we can find a next macth
          immediately afterwards. Repeat until we find no match for the input
          without emitting some literal bytes. */
-      uint64_t input_bytes;
 
       {
         /* We have a 5-byte match at ip, and we need to emit bytes in
            [next_emit, ip). */
         const uint8_t* base = ip;
         size_t matched = 5 + FindMatchLengthWithLimit(
-            candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
+            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
+        int distance = (int)(base - candidate);  /* > 0 */
+        size_t insert = (size_t)(base - next_emit);
         ip += matched;
-        int distance = static_cast<int>(base - candidate);  /* > 0 */
-        size_t insert = static_cast<size_t>(base - next_emit);
         assert(0 == memcmp(base, candidate, matched));
         if (PREDICT_TRUE(insert < 6210)) {
           EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
                         storage_ix, storage);
         } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
-                                             lit_depth)) {
+                                             literal_ratio)) {
           EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
                                     storage_ix, storage);
-          input_size -= static_cast<size_t>(base - input);
+          input_size -= (size_t)(base - input);
           input = base;
           next_emit = input;
           goto next_block;
@@ -554,10 +593,10 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
         EmitLiterals(next_emit, insert, lit_depth, lit_bits,
                      storage_ix, storage);
         if (distance == last_distance) {
-          WriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
+          BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
           ++cmd_histo[64];
         } else {
-          EmitDistance(static_cast<size_t>(distance), cmd_depth, cmd_bits,
+          EmitDistance((size_t)distance, cmd_depth, cmd_bits,
                        cmd_histo, storage_ix, storage);
           last_distance = distance;
         }
@@ -571,17 +610,19 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
         /* We could immediately start working at ip now, but to improve
            compression we first update "table" with the hashes of some positions
            within the last copy. */
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
-        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
-
-        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
-        candidate = base_ip + table[cur_hash];
-        table[cur_hash] = static_cast<int>(ip - base_ip);
+        {
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
       }
 
       while (IsMatch(ip, candidate)) {
@@ -589,13 +630,13 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
            prior to ip. */
         const uint8_t* base = ip;
         size_t matched = 5 + FindMatchLengthWithLimit(
-            candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
+            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
         ip += matched;
-        last_distance = static_cast<int>(base - candidate);  /* > 0 */
+        last_distance = (int)(base - candidate);  /* > 0 */
         assert(0 == memcmp(base, candidate, matched));
         EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
                     storage_ix, storage);
-        EmitDistance(static_cast<size_t>(last_distance), cmd_depth, cmd_bits,
+        EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
                      cmd_histo, storage_ix, storage);
 
         next_emit = ip;
@@ -605,17 +646,19 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
         /* We could immediately start working at ip now, but to improve
            compression we first update "table" with the hashes of some positions
            within the last copy. */
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
-        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
-
-        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
-        candidate = base_ip + table[cur_hash];
-        table[cur_hash] = static_cast<int>(ip - base_ip);
+        {
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
       }
 
       next_hash = Hash(++ip, shift);
@@ -626,7 +669,7 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
   assert(next_emit <= ip_end);
   input += block_size;
   input_size -= block_size;
-  block_size = std::min(input_size, kMergeBlockSize);
+  block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
 
   /* Decide if we want to continue this meta-block instead of emitting the
      last insert-only command. */
@@ -638,20 +681,19 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
        We can do this because the current size and the new size both have 5
        nibbles. */
     total_block_size += block_size;
-    UpdateBits(20, static_cast<uint32_t>(total_block_size - 1),
-               mlen_storage_ix, storage);
+    UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
     goto emit_commands;
   }
 
   /* Emit the remaining bytes as literals. */
   if (next_emit < ip_end) {
-    const size_t insert = static_cast<size_t>(ip_end - next_emit);
+    const size_t insert = (size_t)(ip_end - next_emit);
     if (PREDICT_TRUE(insert < 6210)) {
       EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
                     storage_ix, storage);
       EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
     } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
-                                         lit_depth)) {
+                                         literal_ratio)) {
       EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
                                 storage_ix, storage);
     } else {
@@ -668,26 +710,25 @@ next_block:
      then continue emitting commands. */
   if (input_size > 0) {
     metablock_start = input;
-    block_size = std::min(input_size, kFirstBlockSize);
+    block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
     total_block_size = block_size;
     /* Save the bit position of the MLEN field of the meta-block header, so that
        we can update it later if we decide to extend this meta-block. */
     mlen_storage_ix = *storage_ix + 3;
-    StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+    BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
     /* No block splits, no contexts. */
-    WriteBits(13, 0, storage_ix, storage);
-    memset(lit_depth, 0, sizeof(lit_depth));
-    memset(lit_bits, 0, sizeof(lit_bits));
-    BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
-                                   storage_ix, storage);
+    BrotliWriteBits(13, 0, storage_ix, storage);
+    literal_ratio = BuildAndStoreLiteralPrefixCode(
+        m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
     BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
                                    storage_ix, storage);
     goto emit_commands;
   }
 
   if (is_last) {
-    WriteBits(1, 1, storage_ix, storage);  /* islast */
-    WriteBits(1, 1, storage_ix, storage);  /* isempty */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
     *storage_ix = (*storage_ix + 7u) & ~7u;
   } else {
     /* If this is not the last block, update the command and distance prefix
@@ -699,4 +740,6 @@ next_block:
   }
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/compress_fragment.h b/enc/compress_fragment.h
index a0aa13b..d3e8ba5 100644
--- a/enc/compress_fragment.h
+++ b/enc/compress_fragment.h
@@ -13,8 +13,12 @@
 #define BROTLI_ENC_COMPRESS_FRAGMENT_H_
 
 #include "../common/types.h"
+#include "./memory.h"
+#include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* Compresses "input" string to the "*storage" buffer as one or more complete
    meta-blocks, and updates the "*storage_ix" bit position.
@@ -35,13 +39,20 @@ namespace brotli {
    REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
    REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
    REQUIRES: "table_size" is a power of two */
-void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
-                                bool is_last,
-                                int* table, size_t table_size,
-                                uint8_t cmd_depth[128], uint16_t cmd_bits[128],
-                                size_t* cmd_code_numbits, uint8_t* cmd_code,
-                                size_t* storage_ix, uint8_t* storage);
-
-}  // namespace brotli
+BROTLI_INTERNAL void BrotliCompressFragmentFast(MemoryManager* m,
+                                                const uint8_t* input,
+                                                size_t input_size,
+                                                int is_last,
+                                                int* table, size_t table_size,
+                                                uint8_t cmd_depth[128],
+                                                uint16_t cmd_bits[128],
+                                                size_t* cmd_code_numbits,
+                                                uint8_t* cmd_code,
+                                                size_t* storage_ix,
+                                                uint8_t* storage);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */
diff --git a/enc/compress_fragment_two_pass.c b/enc/compress_fragment_two_pass.c
index cdba6e4..3eb1f94 100644
--- a/enc/compress_fragment_two_pass.c
+++ b/enc/compress_fragment_two_pass.c
@@ -12,7 +12,7 @@
 
 #include "./compress_fragment_two_pass.h"
 
-#include <algorithm>
+#include <string.h>  /* memcmp, memcpy, memset */
 
 #include "../common/types.h"
 #include "./bit_cost.h"
@@ -20,10 +20,14 @@
 #include "./entropy_encode.h"
 #include "./fast_log.h"
 #include "./find_match_length.h"
+#include "./memory.h"
 #include "./port.h"
 #include "./write_bits.h"
 
-namespace brotli {
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* kHashMul32 multiplier has these properties:
    * The multiplier must be odd. Otherwise we may lose the highest bit.
@@ -33,19 +37,22 @@ namespace brotli {
    * The number has been tuned heuristically against compression benchmarks. */
 static const uint32_t kHashMul32 = 0x1e35a7bd;
 
-static inline uint32_t Hash(const uint8_t* p, size_t shift) {
+static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
   const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
-  return static_cast<uint32_t>(h >> shift);
+  return (uint32_t)(h >> shift);
 }
 
-static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
+static BROTLI_INLINE uint32_t HashBytesAtOffset(
+    uint64_t v, int offset, size_t shift) {
   assert(offset >= 0);
   assert(offset <= 2);
-  const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
-  return static_cast<uint32_t>(h >> shift);
+  {
+    const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
+    return (uint32_t)(h >> shift);
+  }
 }
 
-static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
+static BROTLI_INLINE int IsMatch(const uint8_t* p1, const uint8_t* p2) {
   return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
           p1[4] == p2[4] &&
           p1[5] == p2[5]);
@@ -58,64 +65,66 @@ static void BuildAndStoreCommandPrefixCode(
     uint8_t depth[128], uint16_t bits[128],
     size_t* storage_ix, uint8_t* storage) {
   /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
-  static const size_t kTreeSize = 129;
-  HuffmanTree tree[kTreeSize];
-  CreateHuffmanTree(histogram, 64, 15, tree, depth);
-  CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  HuffmanTree tree[129];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
+  uint16_t cmd_bits[64];
+  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
+  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
   /* We have to jump through a few hoopes here in order to compute
      the command bits because the symbols are in a different order than in
      the full alphabet. This looks complicated, but having the symbols
      in this order in the command bits saves a few branches in the Emit*
      functions. */
-  uint8_t cmd_depth[64];
-  uint16_t cmd_bits[64];
   memcpy(cmd_depth, depth + 24, 24);
   memcpy(cmd_depth + 24, depth, 8);
   memcpy(cmd_depth + 32, depth + 48, 8);
   memcpy(cmd_depth + 40, depth + 8, 8);
   memcpy(cmd_depth + 48, depth + 56, 8);
   memcpy(cmd_depth + 56, depth + 16, 8);
-  ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
+  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
   memcpy(bits, cmd_bits + 24, 16);
   memcpy(bits + 8, cmd_bits + 40, 16);
   memcpy(bits + 16, cmd_bits + 56, 16);
   memcpy(bits + 24, cmd_bits, 48);
   memcpy(bits + 48, cmd_bits + 32, 16);
   memcpy(bits + 56, cmd_bits + 48, 16);
-  ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
   {
     /* Create the bit length array for the full command alphabet. */
-    uint8_t cmd_depth[704] = { 0 };
+    size_t i;
+    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
     memcpy(cmd_depth, depth + 24, 8);
     memcpy(cmd_depth + 64, depth + 32, 8);
     memcpy(cmd_depth + 128, depth + 40, 8);
     memcpy(cmd_depth + 192, depth + 48, 8);
     memcpy(cmd_depth + 384, depth + 56, 8);
-    for (size_t i = 0; i < 8; ++i) {
+    for (i = 0; i < 8; ++i) {
       cmd_depth[128 + 8 * i] = depth[i];
       cmd_depth[256 + 8 * i] = depth[8 + i];
       cmd_depth[448 + 8 * i] = depth[16 + i];
     }
-    StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
+    BrotliStoreHuffmanTree(
+        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
   }
-  StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
 }
 
-inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
+static BROTLI_INLINE void EmitInsertLen(
+    uint32_t insertlen, uint32_t** commands) {
   if (insertlen < 6) {
     **commands = insertlen;
   } else if (insertlen < 130) {
-    insertlen -= 2;
-    const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
-    const uint32_t prefix = insertlen >> nbits;
+    const uint32_t tail = insertlen - 2;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const uint32_t prefix = tail >> nbits;
     const uint32_t inscode = (nbits << 1) + prefix + 2;
-    const uint32_t extra = insertlen - (prefix << nbits);
+    const uint32_t extra = tail - (prefix << nbits);
     **commands = inscode | (extra << 8);
   } else if (insertlen < 2114) {
-    insertlen -= 66;
-    const uint32_t nbits = Log2FloorNonZero(insertlen);
+    const uint32_t tail = insertlen - 66;
+    const uint32_t nbits = Log2FloorNonZero(tail);
     const uint32_t code = nbits + 10;
-    const uint32_t extra = insertlen - (1 << nbits);
+    const uint32_t extra = tail - (1u << nbits);
     **commands = code | (extra << 8);
   } else if (insertlen < 6210) {
     const uint32_t extra = insertlen - 2114;
@@ -130,108 +139,103 @@ inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
   ++(*commands);
 }
 
-inline void EmitCopyLen(size_t copylen, uint32_t** commands) {
+static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
   if (copylen < 10) {
-    **commands = static_cast<uint32_t>(copylen + 38);
+    **commands = (uint32_t)(copylen + 38);
   } else if (copylen < 134) {
-    copylen -= 6;
-    const size_t nbits = Log2FloorNonZero(copylen) - 1;
-    const size_t prefix = copylen >> nbits;
+    const size_t tail = copylen - 6;
+    const size_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
     const size_t code = (nbits << 1) + prefix + 44;
-    const size_t extra = copylen - (prefix << nbits);
-    **commands = static_cast<uint32_t>(code | (extra << 8));
+    const size_t extra = tail - (prefix << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
   } else if (copylen < 2118) {
-    copylen -= 70;
-    const size_t nbits = Log2FloorNonZero(copylen);
+    const size_t tail = copylen - 70;
+    const size_t nbits = Log2FloorNonZero(tail);
     const size_t code = nbits + 52;
-    const size_t extra = copylen - (1 << nbits);
-    **commands = static_cast<uint32_t>(code | (extra << 8));
+    const size_t extra = tail - (1u << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
   } else {
     const size_t extra = copylen - 2118;
-    **commands = static_cast<uint32_t>(63 | (extra << 8));
+    **commands = (uint32_t)(63 | (extra << 8));
   }
   ++(*commands);
 }
 
-inline void EmitCopyLenLastDistance(size_t copylen, uint32_t** commands) {
+static BROTLI_INLINE void EmitCopyLenLastDistance(
+    size_t copylen, uint32_t** commands) {
   if (copylen < 12) {
-    **commands = static_cast<uint32_t>(copylen + 20);
+    **commands = (uint32_t)(copylen + 20);
     ++(*commands);
   } else if (copylen < 72) {
-    copylen -= 8;
-    const size_t nbits = Log2FloorNonZero(copylen) - 1;
-    const size_t prefix = copylen >> nbits;
+    const size_t tail = copylen - 8;
+    const size_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
     const size_t code = (nbits << 1) + prefix + 28;
-    const size_t extra = copylen - (prefix << nbits);
-    **commands = static_cast<uint32_t>(code | (extra << 8));
+    const size_t extra = tail - (prefix << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
     ++(*commands);
   } else if (copylen < 136) {
-    copylen -= 8;
-    const size_t code = (copylen >> 5) + 54;
-    const size_t extra = copylen & 31;
-    **commands = static_cast<uint32_t>(code | (extra << 8));
+    const size_t tail = copylen - 8;
+    const size_t code = (tail >> 5) + 54;
+    const size_t extra = tail & 31;
+    **commands = (uint32_t)(code | (extra << 8));
     ++(*commands);
     **commands = 64;
     ++(*commands);
   } else if (copylen < 2120) {
-    copylen -= 72;
-    const size_t nbits = Log2FloorNonZero(copylen);
+    const size_t tail = copylen - 72;
+    const size_t nbits = Log2FloorNonZero(tail);
     const size_t code = nbits + 52;
-    const size_t extra = copylen - (1 << nbits);
-    **commands = static_cast<uint32_t>(code | (extra << 8));
+    const size_t extra = tail - (1u << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
     ++(*commands);
     **commands = 64;
     ++(*commands);
   } else {
     const size_t extra = copylen - 2120;
-    **commands = static_cast<uint32_t>(63 | (extra << 8));
+    **commands = (uint32_t)(63 | (extra << 8));
     ++(*commands);
     **commands = 64;
     ++(*commands);
   }
 }
 
-inline void EmitDistance(uint32_t distance, uint32_t** commands) {
-  distance += 3;
-  uint32_t nbits = Log2FloorNonZero(distance) - 1;
-  const uint32_t prefix = (distance >> nbits) & 1;
+static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
+  uint32_t d = distance + 3;
+  uint32_t nbits = Log2FloorNonZero(d) - 1;
+  const uint32_t prefix = (d >> nbits) & 1;
   const uint32_t offset = (2 + prefix) << nbits;
   const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
-  uint32_t extra = distance - offset;
+  uint32_t extra = d - offset;
   **commands = distcode | (extra << 8);
   ++(*commands);
 }
 
 /* REQUIRES: len <= 1 << 20. */
-static void StoreMetaBlockHeader(
-    size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
+static void BrotliStoreMetaBlockHeader(
+    size_t len, int is_uncompressed, size_t* storage_ix, uint8_t* storage) {
   /* ISLAST */
-  WriteBits(1, 0, storage_ix, storage);
+  BrotliWriteBits(1, 0, storage_ix, storage);
   if (len <= (1U << 16)) {
     /* MNIBBLES is 4 */
-    WriteBits(2, 0, storage_ix, storage);
-    WriteBits(16, len - 1, storage_ix, storage);
+    BrotliWriteBits(2, 0, storage_ix, storage);
+    BrotliWriteBits(16, len - 1, storage_ix, storage);
   } else {
     /* MNIBBLES is 5 */
-    WriteBits(2, 1, storage_ix, storage);
-    WriteBits(20, len - 1, storage_ix, storage);
+    BrotliWriteBits(2, 1, storage_ix, storage);
+    BrotliWriteBits(20, len - 1, storage_ix, storage);
   }
   /* ISUNCOMPRESSED */
-  WriteBits(1, is_uncompressed, storage_ix, storage);
+  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
 }
 
 static void CreateCommands(const uint8_t* input, size_t block_size,
-                           size_t input_size, const uint8_t* base_ip,
-                           int* table, size_t table_size,
-                           uint8_t** literals, uint32_t** commands) {
+    size_t input_size, const uint8_t* base_ip, int* table, size_t table_size,
+    uint8_t** literals, uint32_t** commands) {
   /* "ip" is the input pointer. */
   const uint8_t* ip = input;
-  assert(table_size);
-  assert(table_size <= (1u << 31));
-  assert((table_size & (table_size - 1)) == 0);  // table must be power of two
   const size_t shift = 64u - Log2FloorNonZero(table_size);
-  assert(table_size - 1 == static_cast<size_t>(
-      MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
   const uint8_t* ip_end = input + block_size;
   /* "next_emit" is a pointer to the first byte that is not covered by a
      previous copy. Bytes between "next_emit" and the start of the next copy or
@@ -241,17 +245,25 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
   int last_distance = -1;
   const size_t kInputMarginBytes = 16;
   const size_t kMinMatchLen = 6;
+
+  assert(table_size);
+  assert(table_size <= (1u << 31));
+  /* table must be power of two */
+  assert((table_size & (table_size - 1)) == 0);
+  assert(table_size - 1 ==
+      (size_t)(MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
+
   if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
     /* For the last block, we need to keep a 16 bytes margin so that we can be
        sure that all distances are at most window size - 16.
        For all other blocks, we only need to keep a margin of 5 bytes so that
        we don't go over the block size with a copy. */
-    const size_t len_limit = std::min(block_size - kMinMatchLen,
-                                      input_size - kInputMarginBytes);
+    const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
+                                        input_size - kInputMarginBytes);
     const uint8_t* ip_limit = input + len_limit;
 
-    for (uint32_t next_hash = Hash(++ip, shift); ; ) {
-      assert(next_emit < ip);
+    uint32_t next_hash;
+    for (next_hash = Hash(++ip, shift); ; ) {
       /* Step 1: Scan forward in the input looking for a 6-byte-long match.
          If we get close to exhausting the input then goto emit_remainder.
 
@@ -271,11 +283,14 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
 
       const uint8_t* next_ip = ip;
       const uint8_t* candidate;
+
+      assert(next_emit < ip);
+
       do {
-        ip = next_ip;
         uint32_t hash = next_hash;
-        assert(hash == Hash(ip, shift));
         uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        ip = next_ip;
+        assert(hash == Hash(ip, shift));
         next_ip = ip + bytes_between_hash_lookups;
         if (PREDICT_FALSE(next_ip > ip_limit)) {
           goto emit_remainder;
@@ -284,7 +299,7 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
         candidate = ip - last_distance;
         if (IsMatch(ip, candidate)) {
           if (PREDICT_TRUE(candidate < ip)) {
-            table[hash] = static_cast<int>(ip - base_ip);
+            table[hash] = (int)(ip - base_ip);
             break;
           }
         }
@@ -292,33 +307,32 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
         assert(candidate >= base_ip);
         assert(candidate < ip);
 
-        table[hash] = static_cast<int>(ip - base_ip);
+        table[hash] = (int)(ip - base_ip);
       } while (PREDICT_TRUE(!IsMatch(ip, candidate)));
 
       /* Step 2: Emit the found match together with the literal bytes from
          "next_emit", and then see if we can find a next macth immediately
          afterwards. Repeat until we find no match for the input
          without emitting some literal bytes. */
-      uint64_t input_bytes;
 
       {
         /* We have a 6-byte match at ip, and we need to emit bytes in
            [next_emit, ip). */
         const uint8_t* base = ip;
         size_t matched = 6 + FindMatchLengthWithLimit(
-            candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
+            candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
+        int distance = (int)(base - candidate);  /* > 0 */
+        int insert = (int)(base - next_emit);
         ip += matched;
-        int distance = static_cast<int>(base - candidate);  /* > 0 */
-        int insert = static_cast<int>(base - next_emit);
         assert(0 == memcmp(base, candidate, matched));
-        EmitInsertLen(static_cast<uint32_t>(insert), commands);
-        memcpy(*literals, next_emit, static_cast<size_t>(insert));
+        EmitInsertLen((uint32_t)insert, commands);
+        memcpy(*literals, next_emit, (size_t)insert);
         *literals += insert;
         if (distance == last_distance) {
           **commands = 64;
           ++(*commands);
         } else {
-          EmitDistance(static_cast<uint32_t>(distance), commands);
+          EmitDistance((uint32_t)distance, commands);
           last_distance = distance;
         }
         EmitCopyLenLastDistance(matched, commands);
@@ -327,25 +341,28 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
         if (PREDICT_FALSE(ip >= ip_limit)) {
           goto emit_remainder;
         }
+        {
           /* We could immediately start working at ip now, but to improve
              compression we first update "table" with the hashes of some
              positions within the last copy. */
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
-        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 5);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 4);
-        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
-
-        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        candidate = base_ip + table[cur_hash];
-        table[cur_hash] = static_cast<int>(ip - base_ip);
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash;
+          table[prev_hash] = (int)(ip - base_ip - 5);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 4);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
+          cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
       }
 
       while (IsMatch(ip, candidate)) {
@@ -353,36 +370,39 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
            literal bytes prior to ip. */
         const uint8_t* base = ip;
         size_t matched = 6 + FindMatchLengthWithLimit(
-            candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
+            candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
         ip += matched;
-        last_distance = static_cast<int>(base - candidate);  /* > 0 */
+        last_distance = (int)(base - candidate);  /* > 0 */
         assert(0 == memcmp(base, candidate, matched));
         EmitCopyLen(matched, commands);
-        EmitDistance(static_cast<uint32_t>(last_distance), commands);
+        EmitDistance((uint32_t)last_distance, commands);
 
         next_emit = ip;
         if (PREDICT_FALSE(ip >= ip_limit)) {
           goto emit_remainder;
         }
+        {
           /* We could immediately start working at ip now, but to improve
              compression we first update "table" with the hashes of some
              positions within the last copy. */
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
-        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 5);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 4);
-        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
-
-        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        candidate = base_ip + table[cur_hash];
-        table[cur_hash] = static_cast<int>(ip - base_ip);
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash;
+          table[prev_hash] = (int)(ip - base_ip - 5);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 4);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
+          cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);
+
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
       }
 
       next_hash = Hash(++ip, shift);
@@ -393,40 +413,17 @@ emit_remainder:
   assert(next_emit <= ip_end);
   /* Emit the remaining bytes as literals. */
   if (next_emit < ip_end) {
-    const uint32_t insert = static_cast<uint32_t>(ip_end - next_emit);
+    const uint32_t insert = (uint32_t)(ip_end - next_emit);
     EmitInsertLen(insert, commands);
     memcpy(*literals, next_emit, insert);
     *literals += insert;
   }
 }
 
-static void StoreCommands(const uint8_t* literals, const size_t num_literals,
+static void StoreCommands(MemoryManager* m,
+                          const uint8_t* literals, const size_t num_literals,
                           const uint32_t* commands, const size_t num_commands,
                           size_t* storage_ix, uint8_t* storage) {
-  uint8_t lit_depths[256] = { 0 };
-  uint16_t lit_bits[256] = { 0 };
-  uint32_t lit_histo[256] = { 0 };
-  for (size_t i = 0; i < num_literals; ++i) {
-    ++lit_histo[literals[i]];
-  }
-  BuildAndStoreHuffmanTreeFast(lit_histo, num_literals,
-                               /* max_bits = */ 8,
-                               lit_depths, lit_bits,
-                               storage_ix, storage);
-
-  uint8_t cmd_depths[128] = { 0 };
-  uint16_t cmd_bits[128] = { 0 };
-  uint32_t cmd_histo[128] = { 0 };
-  for (size_t i = 0; i < num_commands; ++i) {
-    ++cmd_histo[commands[i] & 0xff];
-  }
-  cmd_histo[1] += 1;
-  cmd_histo[2] += 1;
-  cmd_histo[64] += 1;
-  cmd_histo[84] += 1;
-  BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
-                                 storage_ix, storage);
-
   static const uint32_t kNumExtraBits[128] = {
     0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
     0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
@@ -441,45 +438,73 @@ static void StoreCommands(const uint8_t* literals, const size_t num_literals,
     1090, 2114, 6210, 22594,
   };
 
-  for (size_t i = 0; i < num_commands; ++i) {
+  uint8_t lit_depths[256];
+  uint16_t lit_bits[256];
+  uint32_t lit_histo[256] = { 0 };
+  uint8_t cmd_depths[128] = { 0 };
+  uint16_t cmd_bits[128] = { 0 };
+  uint32_t cmd_histo[128] = { 0 };
+  size_t i;
+  for (i = 0; i < num_literals; ++i) {
+    ++lit_histo[literals[i]];
+  }
+  BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
+                                     /* max_bits = */ 8,
+                                     lit_depths, lit_bits,
+                                     storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < num_commands; ++i) {
+    ++cmd_histo[commands[i] & 0xff];
+  }
+  cmd_histo[1] += 1;
+  cmd_histo[2] += 1;
+  cmd_histo[64] += 1;
+  cmd_histo[84] += 1;
+  BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
+                                 storage_ix, storage);
+
+  for (i = 0; i < num_commands; ++i) {
     const uint32_t cmd = commands[i];
     const uint32_t code = cmd & 0xff;
     const uint32_t extra = cmd >> 8;
-    WriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
-    WriteBits(kNumExtraBits[code], extra, storage_ix, storage);
+    BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
+    BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
     if (code < 24) {
       const uint32_t insert = kInsertOffset[code] + extra;
-      for (uint32_t j = 0; j < insert; ++j) {
+      uint32_t j;
+      for (j = 0; j < insert; ++j) {
         const uint8_t lit = *literals;
-        WriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
+        BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
         ++literals;
       }
     }
   }
 }
 
-static bool ShouldCompress(const uint8_t* input, size_t input_size,
-                           size_t num_literals) {
-  static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
-  static const double kMaxRatioOfLiterals =
-      1.0 - kAcceptableLossForUncompressibleSpeedup;
-  if (num_literals < kMaxRatioOfLiterals * static_cast<double>(input_size)) {
-    return true;
-  }
-  uint32_t literal_histo[256] = { 0 };
-  static const uint32_t kSampleRate = 43;
-  static const double kMaxEntropy =
-      8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
-  const double max_total_bit_cost =
-      static_cast<double>(input_size) * kMaxEntropy / kSampleRate;
-  for (size_t i = 0; i < input_size; i += kSampleRate) {
-    ++literal_histo[input[i]];
+/* Acceptable loss for uncompressible speedup is 2% */
+#define MIN_RATIO 0.98
+#define SAMPLE_RATE 43
+
+static int ShouldCompress(const uint8_t* input, size_t input_size,
+                          size_t num_literals) {
+  double corpus_size = (double)input_size;
+  if (num_literals < MIN_RATIO * corpus_size) {
+    return 1;
+  } else {
+    uint32_t literal_histo[256] = { 0 };
+    const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
+    size_t i;
+    for (i = 0; i < input_size; i += SAMPLE_RATE) {
+      ++literal_histo[input[i]];
+    }
+    return BitsEntropy(literal_histo, 256) < max_total_bit_cost;
   }
-  return BitsEntropy(literal_histo, 256) < max_total_bit_cost;
 }
 
-void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
-                                   bool is_last,
+void BrotliCompressFragmentTwoPass(MemoryManager* m,
+                                   const uint8_t* input, size_t input_size,
+                                   int is_last,
                                    uint32_t* command_buf, uint8_t* literal_buf,
                                    int* table, size_t table_size,
                                    size_t* storage_ix, uint8_t* storage) {
@@ -488,24 +513,27 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
   const uint8_t* base_ip = input;
 
   while (input_size > 0) {
-    size_t block_size = std::min(input_size, kCompressFragmentTwoPassBlockSize);
+    size_t block_size =
+        BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
     uint32_t* commands = command_buf;
     uint8_t* literals = literal_buf;
+    size_t num_literals;
     CreateCommands(input, block_size, input_size, base_ip, table, table_size,
                    &literals, &commands);
-    const size_t num_literals = static_cast<size_t>(literals - literal_buf);
-    const size_t num_commands = static_cast<size_t>(commands - command_buf);
+    num_literals = (size_t)(literals - literal_buf);
     if (ShouldCompress(input, block_size, num_literals)) {
-      StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+      const size_t num_commands = (size_t)(commands - command_buf);
+      BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
       /* No block splits, no contexts. */
-      WriteBits(13, 0, storage_ix, storage);
-      StoreCommands(literal_buf, num_literals, command_buf, num_commands,
+      BrotliWriteBits(13, 0, storage_ix, storage);
+      StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
                     storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return;
     } else {
       /* Since we did not find many backward references and the entropy of
          the data is close to 8 bits, we can simply emit an uncompressed block.
          This makes compression speed of uncompressible data about 3x faster. */
-      StoreMetaBlockHeader(block_size, 1, storage_ix, storage);
+      BrotliStoreMetaBlockHeader(block_size, 1, storage_ix, storage);
       *storage_ix = (*storage_ix + 7u) & ~7u;
       memcpy(&storage[*storage_ix >> 3], input, block_size);
       *storage_ix += block_size << 3;
@@ -516,10 +544,12 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
   }
 
   if (is_last) {
-    WriteBits(1, 1, storage_ix, storage);  /* islast */
-    WriteBits(1, 1, storage_ix, storage);  /* isempty */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
     *storage_ix = (*storage_ix + 7u) & ~7u;
   }
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/compress_fragment_two_pass.h b/enc/compress_fragment_two_pass.h
index 8efe48a..75869cc 100644
--- a/enc/compress_fragment_two_pass.h
+++ b/enc/compress_fragment_two_pass.h
@@ -14,8 +14,12 @@
 #define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
 
 #include "../common/types.h"
+#include "./memory.h"
+#include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
 
@@ -29,12 +33,19 @@ static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
               kCompressFragmentTwoPassBlockSize long arrays.
    REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
    REQUIRES: "table_size" is a power of two */
-void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
-                                   bool is_last,
-                                   uint32_t* command_buf, uint8_t* literal_buf,
-                                   int* table, size_t table_size,
-                                   size_t* storage_ix, uint8_t* storage);
-
-}  // namespace brotli
+BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(MemoryManager* m,
+                                                   const uint8_t* input,
+                                                   size_t input_size,
+                                                   int is_last,
+                                                   uint32_t* command_buf,
+                                                   uint8_t* literal_buf,
+                                                   int* table,
+                                                   size_t table_size,
+                                                   size_t* storage_ix,
+                                                   uint8_t* storage);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */
diff --git a/enc/compressor.cc b/enc/compressor.cc
new file mode 100644
index 0000000..c53e4b3
--- /dev/null
+++ b/enc/compressor.cc
@@ -0,0 +1,138 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Brotli compressor API C++ wrapper and utilities. */
+
+#include "./compressor.h"
+
+#include <cstdlib>  /* exit */
+
+namespace brotli {
+
+static void ConvertParams(const BrotliParams* from, BrotliEncoderParams* to) {
+  BrotliEncoderParamsSetDefault(to);
+  if (from->mode == BrotliParams::MODE_TEXT) {
+    to->mode = BROTLI_MODE_TEXT;
+  } else if (from->mode == BrotliParams::MODE_FONT) {
+    to->mode = BROTLI_MODE_FONT;
+  }
+  to->quality = from->quality;
+  to->lgwin = from->lgwin;
+  to->lgblock = from->lgblock;
+}
+
+BrotliCompressor::BrotliCompressor(BrotliParams params) {
+  BrotliEncoderParams encoder_params;
+  ConvertParams(&params, &encoder_params);
+  state_ = BrotliEncoderCreateState(&encoder_params, 0, 0, 0);
+  if (state_ == 0) std::exit(EXIT_FAILURE);  /* OOM */
+}
+
+BrotliCompressor::~BrotliCompressor(void) { BrotliEncoderDestroyState(state_); }
+
+bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
+                                      const uint8_t* input_buffer,
+                                      const bool is_last, size_t* encoded_size,
+                                      uint8_t* encoded_buffer) {
+  return !!BrotliEncoderWriteMetaBlock(state_, input_size, input_buffer,
+                                       is_last ? 1 : 0, encoded_size,
+                                       encoded_buffer);
+}
+
+bool BrotliCompressor::WriteMetadata(const size_t input_size,
+                                     const uint8_t* input_buffer,
+                                     const bool is_last, size_t* encoded_size,
+                                     uint8_t* encoded_buffer) {
+  return !!BrotliEncoderWriteMetadata(state_, input_size, input_buffer,
+                                      is_last ? 1 : 0, encoded_size,
+                                      encoded_buffer);
+}
+
+bool BrotliCompressor::FinishStream(size_t* encoded_size,
+                                    uint8_t* encoded_buffer) {
+  return !!BrotliEncoderFinishStream(state_, encoded_size, encoded_buffer);
+}
+
+void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
+                                             const uint8_t* input_buffer) {
+  BrotliEncoderCopyInputToRingBuffer(state_, input_size, input_buffer);
+}
+
+bool BrotliCompressor::WriteBrotliData(const bool is_last,
+                                       const bool force_flush, size_t* out_size,
+                                       uint8_t** output) {
+  return !!BrotliEncoderWriteData(
+      state_, is_last ? 1 : 0, force_flush ? 1 : 0, out_size, output);
+}
+
+void BrotliCompressor::BrotliSetCustomDictionary(size_t size,
+                                                 const uint8_t* dict) {
+  BrotliEncoderSetCustomDictionary(state_, size, dict);
+}
+
+int BrotliCompressBuffer(BrotliParams params, size_t input_size,
+                         const uint8_t* input_buffer, size_t* encoded_size,
+                         uint8_t* encoded_buffer) {
+  return BrotliEncoderCompress(params.quality, params.lgwin,
+      (BrotliEncoderMode)params.mode, input_size, input_buffer,
+      encoded_size, encoded_buffer);
+}
+
+int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
+  return BrotliCompressWithCustomDictionary(0, 0, params, in, out);
+}
+
+int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
+                                       BrotliParams params, BrotliIn* in,
+                                       BrotliOut* out) {
+  const size_t kOutputBufferSize = 65536;
+  uint8_t* output_buffer;
+  bool result = true;
+  size_t available_in = 0;
+  const uint8_t* next_in = NULL;
+  size_t total_out = 0;
+  bool end_of_input = false;
+  BrotliEncoderParams encoder_params;
+  BrotliEncoderState* s;
+
+  ConvertParams(&params, &encoder_params);
+  s = BrotliEncoderCreateState(&encoder_params, 0, 0, 0);
+  if (!s) return 0;
+  BrotliEncoderSetCustomDictionary(s, dictsize, dict);
+  output_buffer = new uint8_t[kOutputBufferSize];
+
+  while (true) {
+    if (available_in == 0 && !end_of_input) {
+      next_in = reinterpret_cast<const uint8_t*>(
+          in->Read(BrotliEncoderInputBlockSize(s), &available_in));
+      if (!next_in) {
+        end_of_input = true;
+        available_in = 0;
+      } else if (available_in == 0) {
+        continue;
+      }
+    }
+    size_t available_out = kOutputBufferSize;
+    uint8_t* next_out = output_buffer;
+    result = !!BrotliEncoderCompressStream(
+        s, end_of_input ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+        &available_in, &next_in, &available_out, &next_out, &total_out);
+    if (!result) break;
+    size_t used_output = kOutputBufferSize - available_out;
+    if (used_output != 0) {
+      result = out->Write(output_buffer, used_output);
+      if (!result) break;
+    }
+    if (BrotliEncoderIsFinished(s)) break;
+  }
+
+  delete[] output_buffer;
+  BrotliEncoderDestroyState(s);
+  return result ? 1 : 0;
+}
+
+
+}  /* namespace brotli */
diff --git a/enc/compressor.h b/enc/compressor.h
index 1dfc42e..9f981bb 100644
--- a/enc/compressor.h
+++ b/enc/compressor.h
@@ -12,4 +12,145 @@
 #include "./encode.h"
 #include "./streams.h"
 
+namespace brotli {
+
+struct BrotliParams {
+  BrotliParams(void)
+      : mode(MODE_GENERIC),
+        quality(11),
+        lgwin(22),
+        lgblock(0),
+        enable_dictionary(true),
+        enable_transforms(false),
+        greedy_block_split(false),
+        enable_context_modeling(true) {}
+
+  enum Mode {
+    /* Default compression mode. The compressor does not know anything in
+       advance about the properties of the input. */
+    MODE_GENERIC = 0,
+    /* Compression mode for UTF-8 format text input. */
+    MODE_TEXT = 1,
+    /* Compression mode used in WOFF 2.0. */
+    MODE_FONT = 2
+  };
+  Mode mode;
+
+  /* Controls the compression-speed vs compression-density tradeoffs. The higher
+     the |quality|, the slower the compression. Range is 0 to 11. */
+  int quality;
+  /* Base 2 logarithm of the sliding window size. Range is 10 to 24. */
+  int lgwin;
+  /* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
+     If set to 0, the value will be set based on the quality. */
+  int lgblock;
+
+  /* These settings are deprecated and will be ignored.
+     All speed vs. size compromises are controlled by the |quality| param. */
+  bool enable_dictionary;
+  bool enable_transforms;
+  bool greedy_block_split;
+  bool enable_context_modeling;
+};
+
+/* An instance can not be reused for multiple brotli streams. */
+class BrotliCompressor {
+ public:
+  explicit BrotliCompressor(BrotliParams params);
+  ~BrotliCompressor(void);
+
+  /* The maximum input size that can be processed at once. */
+  size_t input_block_size(void) const {
+    return BrotliEncoderInputBlockSize(state_);
+  }
+
+  /* Encodes the data in |input_buffer| as a meta-block and writes it to
+     |encoded_buffer| (|*encoded_size should| be set to the size of
+     |encoded_buffer|) and sets |*encoded_size| to the number of bytes that
+     was written. The |input_size| must not be greater than input_block_size().
+     Returns false if there was an error and true otherwise. */
+  bool WriteMetaBlock(const size_t input_size,
+                      const uint8_t* input_buffer,
+                      const bool is_last,
+                      size_t* encoded_size,
+                      uint8_t* encoded_buffer);
+
+  /* Writes a metadata meta-block containing the given input to encoded_buffer.
+     |*encoded_size| should be set to the size of the encoded_buffer.
+     Sets |*encoded_size| to the number of bytes that was written.
+     Note that the given input data will not be part of the sliding window and
+     thus no backward references can be made to this data from subsequent
+     metablocks. |input_size| must not be greater than 2^24 and provided
+     |*encoded_size| must not be less than |input_size| + 6.
+     Returns false if there was an error and true otherwise. */
+  bool WriteMetadata(const size_t input_size,
+                     const uint8_t* input_buffer,
+                     const bool is_last,
+                     size_t* encoded_size,
+                     uint8_t* encoded_buffer);
+
+  /* Writes a zero-length meta-block with end-of-input bit set to the
+     internal output buffer and copies the output buffer to |encoded_buffer|
+     (|*encoded_size| should be set to the size of |encoded_buffer|) and sets
+     |*encoded_size| to the number of bytes written.
+     Returns false if there was an error and true otherwise. */
+  bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
+
+  /* Copies the given input data to the internal ring buffer of the compressor.
+     No processing of the data occurs at this time and this function can be
+     called multiple times before calling WriteBrotliData() to process the
+     accumulated input. At most input_block_size() bytes of input data can be
+     copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
+   */
+  void CopyInputToRingBuffer(const size_t input_size,
+                             const uint8_t* input_buffer);
+
+  /* Processes the accumulated input data and sets |*out_size| to the length of
+     the new output meta-block, or to zero if no new output meta-block has been
+     created (in this case the processed input data is buffered internally).
+     If |*out_size| is positive, |*output| points to the start of the output
+     data. If |is_last| or |force_flush| is true, an output meta-block is always
+     created. However, until |is_last| is true encoder may retain up to 7 bits
+     of the last byte of output. To force encoder to dump the remaining bits
+     use WriteMetadata() to append an empty meta-data block.
+     Returns false if the size of the input data is larger than
+     input_block_size(). */
+  bool WriteBrotliData(const bool is_last, const bool force_flush,
+                       size_t* out_size, uint8_t** output);
+
+  /* Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
+     e.g. for custom static dictionaries for data formats.
+     Not to be confused with the built-in transformable dictionary of Brotli.
+     To decode, use BrotliSetCustomDictionary() of the decoder with the same
+     dictionary. */
+  void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
+
+  /* No-op, but we keep it here for API backward-compatibility. */
+  void WriteStreamHeader(void) {}
+
+ private:
+  BrotliEncoderState* state_;
+};
+
+/* Compresses the data in |input_buffer| into |encoded_buffer|, and sets
+   |*encoded_size| to the compressed length.
+   Returns 0 if there was an error and 1 otherwise. */
+int BrotliCompressBuffer(BrotliParams params,
+                         size_t input_size,
+                         const uint8_t* input_buffer,
+                         size_t* encoded_size,
+                         uint8_t* encoded_buffer);
+
+/* Same as above, but uses the specified input and output classes instead
+   of reading from and writing to pre-allocated memory buffers. */
+int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
+
+/* Before compressing the data, sets a custom LZ77 dictionary with
+   BrotliCompressor::BrotliSetCustomDictionary. */
+int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
+                                       BrotliParams params,
+                                       BrotliIn* in, BrotliOut* out);
+
+}  /* namespace brotli */
+
 #endif  /* BROTLI_ENC_COMPRESSOR_H_ */
diff --git a/enc/context.h b/enc/context.h
index 1edc9e1..c50f6be 100644
--- a/enc/context.h
+++ b/enc/context.h
@@ -10,8 +10,11 @@
 #define BROTLI_ENC_CONTEXT_H_
 
 #include "../common/types.h"
+#include "../common/port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* Second-order context lookup table for UTF8 byte streams.
 
@@ -151,29 +154,31 @@ static const uint8_t kSigned3BitContextLookup[] = {
   6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
 };
 
-enum ContextType {
+typedef enum ContextType {
   CONTEXT_LSB6         = 0,
   CONTEXT_MSB6         = 1,
   CONTEXT_UTF8         = 2,
   CONTEXT_SIGNED       = 3
-};
+} ContextType;
 
-static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
+static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
   switch (mode) {
     case CONTEXT_LSB6:
       return p1 & 0x3f;
     case CONTEXT_MSB6:
-      return static_cast<uint8_t>(p1 >> 2);
+      return (uint8_t)(p1 >> 2);
     case CONTEXT_UTF8:
       return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
     case CONTEXT_SIGNED:
-      return static_cast<uint8_t>((kSigned3BitContextLookup[p1] << 3) +
-                                  kSigned3BitContextLookup[p2]);
+      return (uint8_t)((kSigned3BitContextLookup[p1] << 3) +
+                       kSigned3BitContextLookup[p2]);
     default:
       return 0;
   }
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_CONTEXT_H_ */
diff --git a/enc/dictionary_hash.h b/enc/dictionary_hash.h
index a6b3dd3..92576b3 100644
--- a/enc/dictionary_hash.h
+++ b/enc/dictionary_hash.h
@@ -11,7 +11,9 @@
 
 #include "../common/types.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 static const uint16_t kStaticDictionaryHash[] = {
   0x7d48, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@@ -4112,6 +4114,8 @@ static const uint16_t kStaticDictionaryHash[] = {
   0x0000, 0x0000, 0x0d88, 0x4ac5, 0x0000, 0x0000, 0x0000, 0x0000,
 };
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_DICTIONARY_HASH_H_ */
diff --git a/enc/encode.c b/enc/encode.c
index 86165fd..d234213 100644
--- a/enc/encode.c
+++ b/enc/encode.c
@@ -8,16 +8,12 @@
 
 #include "./encode.h"
 
-#include <algorithm>
-#include <cstdlib>  /* free, malloc */
-#include <cstring>  /* memset */
-#include <limits>
+#include <stdlib.h>  /* free, malloc */
+#include <string.h>  /* memcpy, memset */
 
 #include "./backward_references.h"
 #include "./bit_cost.h"
-#include "./block_splitter.h"
 #include "./brotli_bit_stream.h"
-#include "./cluster.h"
 #include "./compress_fragment.h"
 #include "./compress_fragment_two_pass.h"
 #include "./context.h"
@@ -25,13 +21,17 @@
 #include "./fast_log.h"
 #include "./hash.h"
 #include "./histogram.h"
+#include "./memory.h"
 #include "./metablock.h"
+#include "./port.h"
 #include "./prefix.h"
-#include "./transform.h"
+#include "./ringbuffer.h"
 #include "./utf8_util.h"
 #include "./write_bits.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 static const int kMinQualityForBlockSplit = 4;
 static const int kMinQualityForContextModeling = 5;
@@ -42,17 +42,134 @@ static const size_t kMaxNumDelayedSymbols = 0x2fff;
 
 #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
 
+void BrotliEncoderParamsSetDefault(BrotliEncoderParams* self) {
+  self->mode = BROTLI_DEFAULT_MODE;
+  self->quality = BROTLI_DEFAULT_QUALITY;
+  self->lgwin = BROTLI_DEFAULT_WINDOW;
+  self->lgblock = 0;
+}
+
+typedef enum BrotliEncoderStreamState {
+  /* Default state. */
+  BROTLI_STREAM_PROCESSING = 0,
+  /* Intermediate state; after next block is emitted, byte-padding should be
+     performed before getting back to default state. */
+  BROTLI_STREAM_FLUSH_REQUESTED = 1,
+  /* Last metablock was produced; no more input is acceptable. */
+  BROTLI_STREAM_FINISHED = 2
+} BrotliEncoderStreamState;
+
+typedef struct BrotliEncoderStateStruct {
+  BrotliEncoderParams params_;
+  MemoryManager memory_manager_;
+
+  Hashers hashers_;
+  int hash_type_;
+  uint64_t input_pos_;
+  RingBuffer ringbuffer_;
+  size_t cmd_alloc_size_;
+  Command* commands_;
+  size_t num_commands_;
+  size_t num_literals_;
+  size_t last_insert_len_;
+  uint64_t last_flush_pos_;
+  uint64_t last_processed_pos_;
+  int dist_cache_[4];
+  int saved_dist_cache_[4];
+  uint8_t last_byte_;
+  uint8_t last_byte_bits_;
+  uint8_t prev_byte_;
+  uint8_t prev_byte2_;
+  size_t storage_size_;
+  uint8_t* storage_;
+  /* Hash table for |quality| 0 mode. */
+  int small_table_[1 << 10];  /* 4KiB */
+  int* large_table_;          /* Allocated only when needed */
+  size_t large_table_size_;
+  /* Command and distance prefix codes (each 64 symbols, stored back-to-back)
+     used for the next block in |quality| 0. The command prefix code is over a
+     smaller alphabet with the following 64 symbols:
+        0 - 15: insert length code 0, copy length code 0 - 15, same distance
+       16 - 39: insert length code 0, copy length code 0 - 23
+       40 - 63: insert length code 0 - 23, copy length code 0
+     Note that symbols 16 and 40 represent the same code in the full alphabet,
+     but we do not use either of them in |quality| 0. */
+  uint8_t cmd_depths_[128];
+  uint16_t cmd_bits_[128];
+  /* The compressed form of the command and distance prefix codes for the next
+     block in |quality| 0. */
+  uint8_t cmd_code_[512];
+  size_t cmd_code_numbits_;
+  /* Command and literal buffers for quality 1. */
+  uint32_t* command_buf_;
+  uint8_t* literal_buf_;
+
+  uint8_t* next_out_;
+  size_t available_out_;
+  size_t total_out_;
+  uint8_t flush_buf_[2];
+  BrotliEncoderStreamState stream_state_;
+
+  int is_last_block_emitted_;
+  int is_initialized_;
+} BrotliEncoderStateStruct;
+
+static int EnsureInitialized(BrotliEncoderState* s);
+
+size_t BrotliEncoderInputBlockSize(BrotliEncoderState* s) {
+  if (!EnsureInitialized(s)) return 0;
+  return 1u << s->params_.lgblock;
+}
+
+static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
+  return s->input_pos_ - s->last_processed_pos_;
+}
+
+static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
+  const uint64_t delta = UnprocessedInputSize(s);
+  size_t block_size = BrotliEncoderInputBlockSize(s);
+  if (delta >= block_size) return 0;
+  return block_size - (size_t)delta;
+}
+
+int BrotliEncoderSetParameter(
+    BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
+  /* Changing parameters on the fly is not implemented yet. */
+  if (state->is_initialized_) return 0;
+  /* TODO: Validate/clamp params here. */
+  switch (p) {
+    case BROTLI_PARAM_MODE:
+      state->params_.mode = (BrotliEncoderMode)value;
+      return 1;
+
+    case BROTLI_PARAM_QUALITY:
+      state->params_.quality = (int)value;
+      return 1;
+
+    case BROTLI_PARAM_LGWIN:
+      state->params_.lgwin = (int)value;
+      return 1;
+
+    case BROTLI_PARAM_LGBLOCK:
+      state->params_.lgblock = (int)value;
+      return 1;
+
+    default: return 0;
+  }
+}
+
 static void RecomputeDistancePrefixes(Command* cmds,
                                       size_t num_commands,
                                       uint32_t num_direct_distance_codes,
                                       uint32_t distance_postfix_bits) {
+  size_t i;
   if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {
     return;
   }
-  for (size_t i = 0; i < num_commands; ++i) {
+  for (i = 0; i < num_commands; ++i) {
     Command* cmd = &cmds[i];
-    if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
-      PrefixEncodeCopyDistance(cmd->DistanceCode(),
+    if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+      PrefixEncodeCopyDistance(CommandDistanceCode(cmd),
                                num_direct_distance_codes,
                                distance_postfix_bits,
                                &cmd->dist_prefix_,
@@ -64,20 +181,22 @@ static void RecomputeDistancePrefixes(Command* cmds,
 /* Wraps 64-bit input position to 32-bit ringbuffer position preserving
    "not-a-first-lap" feature. */
 static uint32_t WrapPosition(uint64_t position) {
-  uint32_t result = static_cast<uint32_t>(position);
+  uint32_t result = (uint32_t)position;
   if (position > (1u << 30)) {
     result = (result & ((1u << 30) - 1)) | (1u << 30);
   }
   return result;
 }
 
-uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
-  if (storage_size_ < size) {
-    delete[] storage_;
-    storage_ = new uint8_t[size];
-    storage_size_ = size;
+static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
+  MemoryManager* m = &s->memory_manager_;
+  if (s->storage_size_ < size) {
+    BROTLI_FREE(m, s->storage_);
+    s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
+    if (BROTLI_IS_OOM(m)) return NULL;
+    s->storage_size_ = size;
   }
-  return storage_;
+  return s->storage_;
 }
 
 static size_t MaxHashTableSize(int quality) {
@@ -92,25 +211,28 @@ static size_t HashTableSize(size_t max_table_size, size_t input_size) {
   return htsize;
 }
 
-int* BrotliCompressor::GetHashTable(int quality,
-                                    size_t input_size,
-                                    size_t* table_size) {
+static int* GetHashTable(BrotliEncoderState* s, int quality,
+                         size_t input_size, size_t* table_size) {
   /* Use smaller hash table when input.size() is smaller, since we
      fill the table, incurring O(hash table size) overhead for
      compression, and if the input is short, we won't need that
      many hash table entries anyway. */
+  MemoryManager* m = &s->memory_manager_;
   const size_t max_table_size = MaxHashTableSize(quality);
-  assert(max_table_size >= 256);
   size_t htsize = HashTableSize(max_table_size, input_size);
-
   int* table;
-  if (htsize <= sizeof(small_table_) / sizeof(small_table_[0])) {
-    table = small_table_;
+  assert(max_table_size >= 256);
+
+  if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
+    table = s->small_table_;
   } else {
-    if (large_table_ == NULL) {
-      large_table_ = new int[max_table_size];
+    if (htsize > s->large_table_size_) {
+      s->large_table_size_ = htsize;
+      BROTLI_FREE(m, s->large_table_);
+      s->large_table_ = BROTLI_ALLOC(m, int, htsize);
+      if (BROTLI_IS_OOM(m)) return 0;
     }
-    table = large_table_;
+    table = s->large_table_;
   }
 
   *table_size = htsize;
@@ -119,7 +241,7 @@ int* BrotliCompressor::GetHashTable(int quality,
 }
 
 static void EncodeWindowBits(int lgwin, uint8_t* last_byte,
-                             uint8_t* last_byte_bits) {
+    uint8_t* last_byte_bits) {
   if (lgwin == 16) {
     *last_byte = 0;
     *last_byte_bits = 1;
@@ -127,10 +249,10 @@ static void EncodeWindowBits(int lgwin, uint8_t* last_byte,
     *last_byte = 1;
     *last_byte_bits = 7;
   } else if (lgwin > 17) {
-    *last_byte = static_cast<uint8_t>(((lgwin - 17) << 1) | 1);
+    *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1);
     *last_byte_bits = 4;
   } else {
-    *last_byte = static_cast<uint8_t>(((lgwin - 8) << 4) | 1);
+    *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1);
     *last_byte_bits = 7;
   }
 }
@@ -164,11 +286,6 @@ static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
     2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
     767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
   };
-  COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
-  COPY_ARRAY(cmd_bits, kDefaultCommandBits);
-
-  /* Initialize the pre-compressed form of the command and distance prefix
-     codes. */
   static const uint8_t kDefaultCommandCode[] = {
     0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
     0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
@@ -176,7 +293,12 @@ static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
     0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
     0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
   };
-  static const int kDefaultCommandCodeNumBits = 448;
+  static const size_t kDefaultCommandCodeNumBits = 448;
+  COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
+  COPY_ARRAY(cmd_bits, kDefaultCommandBits);
+
+  /* Initialize the pre-compressed form of the command and distance prefix
+     codes. */
   COPY_ARRAY(cmd_code, kDefaultCommandCode);
   *cmd_code_numbits = kDefaultCommandCodeNumBits;
 }
@@ -192,55 +314,58 @@ static void ChooseContextMap(int quality,
                              uint32_t* bigram_histo,
                              size_t* num_literal_contexts,
                              const uint32_t** literal_context_map) {
+  static const uint32_t kStaticContextMapContinuation[64] = {
+    1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+  static const uint32_t kStaticContextMapSimpleUTF8[64] = {
+    0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+
   uint32_t monogram_histo[3] = { 0 };
   uint32_t two_prefix_histo[6] = { 0 };
   size_t total = 0;
-  for (size_t i = 0; i < 9; ++i) {
+  size_t i;
+  size_t dummy;
+  double entropy[4];
+  for (i = 0; i < 9; ++i) {
+    size_t j = i;
     total += bigram_histo[i];
     monogram_histo[i % 3] += bigram_histo[i];
-    size_t j = i;
     if (j >= 6) {
       j -= 6;
     }
     two_prefix_histo[j] += bigram_histo[i];
   }
-  size_t dummy;
-  double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
-  double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
-                     ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
-  double entropy3 = 0;
-  for (size_t k = 0; k < 3; ++k) {
-    entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
+  entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
+  entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
+                ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
+  entropy[3] = 0;
+  for (i = 0; i < 3; ++i) {
+    entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
   }
 
   assert(total != 0);
-  double scale = 1.0 / static_cast<double>(total);
-  entropy1 *= scale;
-  entropy2 *= scale;
-  entropy3 *= scale;
+  entropy[0] = 1.0 / (double)total;
+  entropy[1] *= entropy[0];
+  entropy[2] *= entropy[0];
+  entropy[3] *= entropy[0];
 
-  static const uint32_t kStaticContextMapContinuation[64] = {
-    1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  };
-  static const uint32_t kStaticContextMapSimpleUTF8[64] = {
-    0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  };
   if (quality < 7) {
     /* 3 context models is a bit slower, don't use it at lower qualities. */
-    entropy3 = entropy1 * 10;
+    entropy[3] = entropy[1] * 10;
   }
   /* If expected savings by symbol are less than 0.2 bits, skip the
      context modeling -- in exchange for faster decoding speed. */
-  if (entropy1 - entropy2 < 0.2 &&
-      entropy1 - entropy3 < 0.2) {
+  if (entropy[1] - entropy[2] < 0.2 &&
+      entropy[1] - entropy[3] < 0.2) {
     *num_literal_contexts = 1;
-  } else if (entropy2 - entropy3 < 0.02) {
+  } else if (entropy[2] - entropy[3] < 0.02) {
     *num_literal_contexts = 2;
     *literal_context_map = kStaticContextMapSimpleUTF8;
   } else {
@@ -249,72 +374,71 @@ static void ChooseContextMap(int quality,
   }
 }
 
-static void DecideOverLiteralContextModeling(
-    const uint8_t* input,
-    size_t start_pos,
-    size_t length,
-    size_t mask,
-    int quality,
-    ContextType* literal_context_mode,
-    size_t* num_literal_contexts,
+static void DecideOverLiteralContextModeling(const uint8_t* input,
+    size_t start_pos, size_t length, size_t mask, int quality,
+    ContextType* literal_context_mode, size_t* num_literal_contexts,
     const uint32_t** literal_context_map) {
   if (quality < kMinQualityForContextModeling || length < 64) {
     return;
-  }
+  } else {
     /* Gather bigram data of the UTF8 byte prefixes. To make the analysis of
        UTF8 data faster we only examine 64 byte long strides at every 4kB
        intervals. */
-  const size_t end_pos = start_pos + length;
-  uint32_t bigram_prefix_histo[9] = { 0 };
-  for (; start_pos + 64 <= end_pos; start_pos += 4096) {
+    const size_t end_pos = start_pos + length;
+    uint32_t bigram_prefix_histo[9] = { 0 };
+    for (; start_pos + 64 <= end_pos; start_pos += 4096) {
       static const int lut[4] = { 0, 0, 1, 2 };
-    const size_t stride_end_pos = start_pos + 64;
-    int prev = lut[input[start_pos & mask] >> 6] * 3;
-    for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
-      const uint8_t literal = input[pos & mask];
-      ++bigram_prefix_histo[prev + lut[literal >> 6]];
-      prev = lut[literal >> 6] * 3;
+      const size_t stride_end_pos = start_pos + 64;
+      int prev = lut[input[start_pos & mask] >> 6] * 3;
+      size_t pos;
+      for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
+        const uint8_t literal = input[pos & mask];
+        ++bigram_prefix_histo[prev + lut[literal >> 6]];
+        prev = lut[literal >> 6] * 3;
+      }
     }
+    *literal_context_mode = CONTEXT_UTF8;
+    ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
+                     literal_context_map);
   }
-  *literal_context_mode = CONTEXT_UTF8;
-  ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
-                   literal_context_map);
 }
 
-static bool ShouldCompress(const uint8_t* data,
-                           const size_t mask,
-                           const uint64_t last_flush_pos,
-                           const size_t bytes,
-                           const size_t num_literals,
-                           const size_t num_commands) {
+static int ShouldCompress(const uint8_t* data,
+                          const size_t mask,
+                          const uint64_t last_flush_pos,
+                          const size_t bytes,
+                          const size_t num_literals,
+                          const size_t num_commands) {
   if (num_commands < (bytes >> 8) + 2) {
-    if (num_literals > 0.99 * static_cast<double>(bytes)) {
+    if (num_literals > 0.99 * (double)bytes) {
       uint32_t literal_histo[256] = { 0 };
       static const uint32_t kSampleRate = 13;
       static const double kMinEntropy = 7.92;
       const double bit_cost_threshold =
-          static_cast<double>(bytes) * kMinEntropy / kSampleRate;
+          (double)bytes * kMinEntropy / kSampleRate;
       size_t t = (bytes + kSampleRate - 1) / kSampleRate;
-      uint32_t pos = static_cast<uint32_t>(last_flush_pos);
-      for (size_t i = 0; i < t; i++) {
+      uint32_t pos = (uint32_t)last_flush_pos;
+      size_t i;
+      for (i = 0; i < t; i++) {
         ++literal_histo[data[pos & mask]];
         pos += kSampleRate;
       }
       if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
-        return false;
+        return 0;
       }
     }
   }
-  return true;
+  return 1;
 }
 
-static void WriteMetaBlockInternal(const uint8_t* data,
+static void WriteMetaBlockInternal(MemoryManager* m,
+                                   const uint8_t* data,
                                    const size_t mask,
                                    const uint64_t last_flush_pos,
                                    const size_t bytes,
-                                   const bool is_last,
+                                   const int is_last,
                                    const int quality,
-                                   const bool font_mode,
+                                   const int is_font_mode,
                                    const uint8_t prev_byte,
                                    const uint8_t prev_byte2,
                                    const size_t num_literals,
@@ -324,9 +448,14 @@ static void WriteMetaBlockInternal(const uint8_t* data,
                                    int* dist_cache,
                                    size_t* storage_ix,
                                    uint8_t* storage) {
+  uint8_t last_byte;
+  uint8_t last_byte_bits;
+  uint32_t num_direct_distance_codes = 0;
+  uint32_t distance_postfix_bits = 0;
+
   if (bytes == 0) {
     /* Write the ISLAST and ISEMPTY bits. */
-    WriteBits(2, 3, storage_ix, storage);
+    BrotliWriteBits(2, 3, storage_ix, storage);
     *storage_ix = (*storage_ix + 7u) & ~7u;
     return;
   }
@@ -336,17 +465,15 @@ static void WriteMetaBlockInternal(const uint8_t* data,
     /* Restore the distance cache, as its last update by
        CreateBackwardReferences is now unused. */
     memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
-    StoreUncompressedMetaBlock(is_last, data,
-                               WrapPosition(last_flush_pos), mask, bytes,
-                               storage_ix, storage);
+    BrotliStoreUncompressedMetaBlock(is_last, data,
+                                     WrapPosition(last_flush_pos), mask, bytes,
+                                     storage_ix, storage);
     return;
   }
 
-  const uint8_t last_byte = storage[0];
-  const uint8_t last_byte_bits = static_cast<uint8_t>(*storage_ix & 0xff);
-  uint32_t num_direct_distance_codes = 0;
-  uint32_t distance_postfix_bits = 0;
-  if (quality > 9 && font_mode) {
+  last_byte = storage[0];
+  last_byte_bits = (uint8_t)(*storage_ix & 0xff);
+  if (quality > 9 && is_font_mode) {
     num_direct_distance_codes = 12;
     distance_postfix_bits = 1;
     RecomputeDistancePrefixes(commands,
@@ -355,18 +482,21 @@ static void WriteMetaBlockInternal(const uint8_t* data,
                               distance_postfix_bits);
   }
   if (quality == 2) {
-    StoreMetaBlockFast(data, WrapPosition(last_flush_pos),
-                       bytes, mask, is_last,
-                       commands, num_commands,
-                       storage_ix, storage);
+    BrotliStoreMetaBlockFast(m, data, WrapPosition(last_flush_pos),
+                             bytes, mask, is_last,
+                             commands, num_commands,
+                             storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
   } else if (quality < kMinQualityForBlockSplit) {
-    StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos),
-                          bytes, mask, is_last,
-                          commands, num_commands,
-                          storage_ix, storage);
+    BrotliStoreMetaBlockTrivial(m, data, WrapPosition(last_flush_pos),
+                                bytes, mask, is_last,
+                                commands, num_commands,
+                                storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
   } else {
-    MetaBlockSplit mb;
     ContextType literal_context_mode = CONTEXT_UTF8;
+    MetaBlockSplit mb;
+    InitMetaBlockSplit(&mb);
     if (quality <= 9) {
       size_t num_literal_contexts = 1;
       const uint32_t* literal_context_map = NULL;
@@ -377,92 +507,86 @@ static void WriteMetaBlockInternal(const uint8_t* data,
                                        &num_literal_contexts,
                                        &literal_context_map);
       if (literal_context_map == NULL) {
-        BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos), mask,
-                             commands, num_commands, &mb);
+        BrotliBuildMetaBlockGreedy(m, data, WrapPosition(last_flush_pos), mask,
+                                   commands, num_commands, &mb);
+        if (BROTLI_IS_OOM(m)) return;
       } else {
-        BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos),
-                                         mask,
-                                         prev_byte, prev_byte2,
-                                         literal_context_mode,
-                                         num_literal_contexts,
-                                         literal_context_map,
-                                         commands, num_commands,
-                                         &mb);
+        BrotliBuildMetaBlockGreedyWithContexts(m, data,
+                                               WrapPosition(last_flush_pos),
+                                               mask,
+                                               prev_byte, prev_byte2,
+                                               literal_context_mode,
+                                               num_literal_contexts,
+                                               literal_context_map,
+                                               commands, num_commands,
+                                               &mb);
+        if (BROTLI_IS_OOM(m)) return;
       }
     } else {
-      if (!IsMostlyUTF8(data, WrapPosition(last_flush_pos), mask, bytes,
-                        kMinUTF8Ratio)) {
+      if (!BrotliIsMostlyUTF8(data, WrapPosition(last_flush_pos), mask, bytes,
+                              kMinUTF8Ratio)) {
         literal_context_mode = CONTEXT_SIGNED;
       }
-      BuildMetaBlock(data, WrapPosition(last_flush_pos), mask,
-                     prev_byte, prev_byte2,
-                     commands, num_commands,
-                     literal_context_mode,
-                     &mb);
+      BrotliBuildMetaBlock(m, data, WrapPosition(last_flush_pos), mask, quality,
+                           prev_byte, prev_byte2,
+                           commands, num_commands,
+                           literal_context_mode,
+                           &mb);
+      if (BROTLI_IS_OOM(m)) return;
     }
     if (quality >= kMinQualityForOptimizeHistograms) {
-      OptimizeHistograms(num_direct_distance_codes,
-                         distance_postfix_bits,
-                         &mb);
+      BrotliOptimizeHistograms(num_direct_distance_codes,
+                               distance_postfix_bits,
+                               &mb);
     }
-    StoreMetaBlock(data, WrapPosition(last_flush_pos), bytes, mask,
-                   prev_byte, prev_byte2,
-                   is_last,
-                   num_direct_distance_codes,
-                   distance_postfix_bits,
-                   literal_context_mode,
-                   commands, num_commands,
-                   mb,
-                   storage_ix, storage);
+    BrotliStoreMetaBlock(m, data, WrapPosition(last_flush_pos), bytes, mask,
+                         prev_byte, prev_byte2,
+                         is_last,
+                         num_direct_distance_codes,
+                         distance_postfix_bits,
+                         literal_context_mode,
+                         commands, num_commands,
+                         &mb,
+                         storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
+    DestroyMetaBlockSplit(m, &mb);
   }
   if (bytes + 4 < (*storage_ix >> 3)) {
     /* Restore the distance cache and last byte. */
     memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
     storage[0] = last_byte;
     *storage_ix = last_byte_bits;
-    StoreUncompressedMetaBlock(is_last, data,
-                               WrapPosition(last_flush_pos), mask,
-                               bytes, storage_ix, storage);
+    BrotliStoreUncompressedMetaBlock(is_last, data,
+                                     WrapPosition(last_flush_pos), mask,
+                                     bytes, storage_ix, storage);
   }
 }
 
-BrotliCompressor::BrotliCompressor(BrotliParams params)
-    : params_(params),
-      hashers_(new Hashers()),
-      input_pos_(0),
-      num_commands_(0),
-      num_literals_(0),
-      last_insert_len_(0),
-      last_flush_pos_(0),
-      last_processed_pos_(0),
-      prev_byte_(0),
-      prev_byte2_(0),
-      storage_size_(0),
-      storage_(0),
-      large_table_(NULL),
-      cmd_code_numbits_(0),
-      command_buf_(NULL),
-      literal_buf_(NULL),
-      is_last_block_emitted_(0) {
+static int EnsureInitialized(BrotliEncoderState* s) {
+  BrotliEncoderParams* params = &s->params_;
+
+  if (BROTLI_IS_OOM(&s->memory_manager_)) return 0;
+  if (s->is_initialized_) return 1;
+
   /* Sanitize params. */
-  params_.quality = std::max(0, params_.quality);
-  if (params_.lgwin < kMinWindowBits) {
-    params_.lgwin = kMinWindowBits;
-  } else if (params_.lgwin > kMaxWindowBits) {
-    params_.lgwin = kMaxWindowBits;
+  params->quality = BROTLI_MAX(int, 0, params->quality);
+  if (params->lgwin < kBrotliMinWindowBits) {
+    params->lgwin = kBrotliMinWindowBits;
+  } else if (params->lgwin > kBrotliMaxWindowBits) {
+    params->lgwin = kBrotliMaxWindowBits;
   }
-  if (params_.quality <= 1) {
-    params_.lgblock = params_.lgwin;
-  } else if (params_.quality < kMinQualityForBlockSplit) {
-    params_.lgblock = 14;
-  } else if (params_.lgblock == 0) {
-    params_.lgblock = 16;
-    if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
-      params_.lgblock = std::min(18, params_.lgwin);
+  if (params->quality <= 1) {
+    params->lgblock = params->lgwin;
+  } else if (params->quality < kMinQualityForBlockSplit) {
+    params->lgblock = 14;
+  } else if (params->lgblock == 0) {
+    params->lgblock = 16;
+    if (params->quality >= 9 && params->lgwin > params->lgblock) {
+      params->lgblock = BROTLI_MIN(int, 18, params->lgwin);
     }
   } else {
-    params_.lgblock = std::min(kMaxInputBlockBits,
-                               std::max(kMinInputBlockBits, params_.lgblock));
+    params->lgblock = BROTLI_MIN(int, kBrotliMaxInputBlockBits,
+        BROTLI_MAX(int, kBrotliMinInputBlockBits, params->lgblock));
   }
 
   /* Initialize input and literal cost ring buffers.
@@ -470,51 +594,126 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
      added block fits there completely and we still get lgwin bits and at least
      read_block_size_bits + 1 bits because the copy tail length needs to be
      smaller than ringbuffer size. */
-  int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
-  ringbuffer_ = new RingBuffer(ringbuffer_bits, params_.lgblock);
-
-  commands_ = 0;
-  cmd_alloc_size_ = 0;
+  {
+    int ringbuffer_bits =
+        BROTLI_MAX(int, params->lgwin + 1, params->lgblock + 1);
+    RingBufferSetup(ringbuffer_bits, params->lgblock, &s->ringbuffer_);
+  }
 
   /* Initialize last byte with stream header. */
-  EncodeWindowBits(params_.lgwin, &last_byte_, &last_byte_bits_);
-
-  // Initialize distance cache.
-  dist_cache_[0] = 4;
-  dist_cache_[1] = 11;
-  dist_cache_[2] = 15;
-  dist_cache_[3] = 16;
-  // Save the state of the distance cache in case we need to restore it for
-  // emitting an uncompressed block.
-  memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
-
-  if (params_.quality == 0) {
-    InitCommandPrefixCodes(cmd_depths_, cmd_bits_,
-                           cmd_code_, &cmd_code_numbits_);
-  } else if (params_.quality == 1) {
-    command_buf_ = new uint32_t[kCompressFragmentTwoPassBlockSize];
-    literal_buf_ = new uint8_t[kCompressFragmentTwoPassBlockSize];
+  EncodeWindowBits(params->lgwin, &s->last_byte_, &s->last_byte_bits_);
+
+  if (params->quality == 0) {
+    InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_,
+                           s->cmd_code_, &s->cmd_code_numbits_);
   }
 
   /* Initialize hashers. */
-  hash_type_ = std::min(10, params_.quality);
-  hashers_->Init(hash_type_);
+  s->hash_type_ = BROTLI_MIN(int, 10, params->quality);
+  HashersSetup(&s->memory_manager_, &s->hashers_, s->hash_type_);
+  if (BROTLI_IS_OOM(&s->memory_manager_)) return 0;
+
+  s->is_initialized_ = 1;
+  return 1;
 }
 
-BrotliCompressor::~BrotliCompressor(void) {
-  delete[] storage_;
-  free(commands_);
-  delete ringbuffer_;
-  delete hashers_;
-  delete[] large_table_;
-  delete[] command_buf_;
-  delete[] literal_buf_;
+static void BrotliEncoderInitState(BrotliEncoderState* s) {
+  BrotliEncoderParamsSetDefault(&s->params_);
+  s->input_pos_ = 0;
+  s->num_commands_ = 0;
+  s->num_literals_ = 0;
+  s->last_insert_len_ = 0;
+  s->last_flush_pos_ = 0;
+  s->last_processed_pos_ = 0;
+  s->prev_byte_ = 0;
+  s->prev_byte2_ = 0;
+  s->storage_size_ = 0;
+  s->storage_ = 0;
+  s->large_table_ = NULL;
+  s->large_table_size_ = 0;
+  s->cmd_code_numbits_ = 0;
+  s->command_buf_ = NULL;
+  s->literal_buf_ = NULL;
+  s->next_out_ = NULL;
+  s->available_out_ = 0;
+  s->total_out_ = 0;
+  s->stream_state_ = BROTLI_STREAM_PROCESSING;
+  s->is_last_block_emitted_ = 0;
+  s->is_initialized_ = 0;
+
+  InitHashers(&s->hashers_);
+
+  RingBufferInit(&s->ringbuffer_);
+
+  s->commands_ = 0;
+  s->cmd_alloc_size_ = 0;
+
+  /* Initialize distance cache. */
+  s->dist_cache_[0] = 4;
+  s->dist_cache_[1] = 11;
+  s->dist_cache_[2] = 15;
+  s->dist_cache_[3] = 16;
+  /* Save the state of the distance cache in case we need to restore it for
+     emitting an uncompressed block. */
+  memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->dist_cache_));
 }
 
-void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
-                                             const uint8_t* input_buffer) {
-  ringbuffer_->Write(input_buffer, input_size);
-  input_pos_ += input_size;
+BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,
+                                                brotli_free_func free_func,
+                                                void* opaque) {
+  BrotliEncoderState* state = 0;
+  if (!alloc_func && !free_func) {
+    state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState));
+  } else if (alloc_func && free_func) {
+    state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState));
+  }
+  if (state == 0) {
+    /* BROTLI_DUMP(); */
+    return 0;
+  }
+  BrotliInitMemoryManager(
+      &state->memory_manager_, alloc_func, free_func, opaque);
+  BrotliEncoderInitState(state);
+  return state;
+}
+
+static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
+  MemoryManager* m = &s->memory_manager_;
+  if (BROTLI_IS_OOM(m)) {
+    BrotliWipeOutMemoryManager(m);
+    return;
+  }
+  BROTLI_FREE(m, s->storage_);
+  BROTLI_FREE(m, s->commands_);
+  RingBufferFree(m, &s->ringbuffer_);
+  DestroyHashers(m, &s->hashers_);
+  BROTLI_FREE(m, s->large_table_);
+  BROTLI_FREE(m, s->command_buf_);
+  BROTLI_FREE(m, s->literal_buf_);
+}
+
+/* Deinitializes and frees BrotliEncoderState instance. */
+void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
+  if (!state) {
+    return;
+  } else {
+    MemoryManager* m = &state->memory_manager_;
+    brotli_free_func free_func = m->free_func;
+    void* opaque = m->opaque;
+    BrotliEncoderCleanupState(state);
+    free_func(opaque, state);
+  }
+}
+
+void BrotliEncoderCopyInputToRingBuffer(BrotliEncoderState* s,
+                                        const size_t input_size,
+                                        const uint8_t* input_buffer) {
+  RingBuffer* ringbuffer_ = &s->ringbuffer_;
+  MemoryManager* m = &s->memory_manager_;
+  if (!EnsureInitialized(s)) return;
+  RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
+  if (BROTLI_IS_OOM(m)) return;
+  s->input_pos_ += input_size;
 
   /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
      hashing not depend on uninitialized data. This makes compression
@@ -545,8 +744,7 @@ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
 
      Only clear during the first round of ringbuffer writes. On
      subsequent rounds data in the ringbuffer would be affected. */
-  size_t pos = ringbuffer_->position();
-  if (pos <= ringbuffer_->mask()) {
+  if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
     /* This is the first time when the ring buffer is being written.
        We clear 7 bytes just after the bytes that have been copied from
        the input buffer.
@@ -559,209 +757,259 @@ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
        memory. Due to performance reasons, hashing reads data using a
        LOAD64, which can go 7 bytes beyond the bytes written in the
        ringbuffer. */
-    memset(ringbuffer_->start() + pos, 0, 7);
+    memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
   }
 }
 
-void BrotliCompressor::BrotliSetCustomDictionary(
-    const size_t size, const uint8_t* dict) {
-  CopyInputToRingBuffer(size, dict);
-  last_flush_pos_ = size;
-  last_processed_pos_ = size;
-  if (size > 0) {
-    prev_byte_ = dict[size - 1];
+void BrotliEncoderSetCustomDictionary(BrotliEncoderState* s, const size_t size,
+                                      const uint8_t* dict) {
+  size_t max_dict_size = MaxBackwardLimit(s->params_.lgwin);
+  size_t dict_size = size;
+  MemoryManager* m = &s->memory_manager_;
+
+  if (!EnsureInitialized(s)) return;
+
+  if (dict_size == 0 || s->params_.quality <= 1) {
+    return;
   }
-  if (size > 1) {
-    prev_byte2_ = dict[size - 2];
+  if (size > max_dict_size) {
+    dict += size - max_dict_size;
+    dict_size = max_dict_size;
   }
-  hashers_->PrependCustomDictionary(hash_type_, params_.lgwin, size, dict);
+  BrotliEncoderCopyInputToRingBuffer(s, dict_size, dict);
+  s->last_flush_pos_ = dict_size;
+  s->last_processed_pos_ = dict_size;
+  if (dict_size > 0) {
+    s->prev_byte_ = dict[dict_size - 1];
+  }
+  if (dict_size > 1) {
+    s->prev_byte2_ = dict[dict_size - 2];
+  }
+  HashersPrependCustomDictionary(m, &s->hashers_,
+      s->hash_type_, s->params_.lgwin, dict_size, dict);
+  if (BROTLI_IS_OOM(m)) return;
 }
 
-bool BrotliCompressor::WriteBrotliData(const bool is_last,
-                                       const bool force_flush,
-                                       size_t* out_size,
-                                       uint8_t** output) {
-  const uint64_t delta = input_pos_ - last_processed_pos_;
-  const uint8_t* data = ringbuffer_->start();
-  const uint32_t mask = ringbuffer_->mask();
-
-   /* Adding more blocks after "last" block is forbidden. */
-  if (is_last_block_emitted_) return false;
-  if (is_last) is_last_block_emitted_ = 1;
-
-  if (delta > input_block_size()) {
-    return false;
+int BrotliEncoderWriteData(BrotliEncoderState* s, const int is_last,
+                           const int force_flush, size_t* out_size,
+                           uint8_t** output) {
+  const uint64_t delta = UnprocessedInputSize(s);
+  const uint32_t bytes = (uint32_t)delta;
+  size_t max_length;
+  uint8_t* data;
+  uint32_t mask;
+  MemoryManager* m = &s->memory_manager_;
+
+  if (!EnsureInitialized(s)) return 0;
+  data = s->ringbuffer_.buffer_;
+  mask = s->ringbuffer_.mask_;
+
+  /* Adding more blocks after "last" block is forbidden. */
+  if (s->is_last_block_emitted_) return 0;
+  if (is_last) s->is_last_block_emitted_ = 1;
+
+  if (delta > BrotliEncoderInputBlockSize(s)) {
+    return 0;
+  }
+  if (s->params_.quality == 1 && !s->command_buf_) {
+    s->command_buf_ =
+        BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
+    s->literal_buf_ =
+        BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
+    if (BROTLI_IS_OOM(m)) return 0;
   }
-  const uint32_t bytes = static_cast<uint32_t>(delta);
 
-  if (params_.quality <= 1) {
+  if (s->params_.quality <= 1) {
+    uint8_t* storage;
+    size_t storage_ix = s->last_byte_bits_;
+    size_t table_size;
+    int* table;
+
     if (delta == 0 && !is_last) {
       /* We have no new input data and we don't have to finish the stream, so
          nothing to do. */
       *out_size = 0;
-      return true;
+      return 1;
     }
-    const size_t max_out_size = 2 * bytes + 500;
-    uint8_t* storage = GetBrotliStorage(max_out_size);
-    storage[0] = last_byte_;
-    size_t storage_ix = last_byte_bits_;
-    size_t table_size;
-    int* table = GetHashTable(params_.quality, bytes, &table_size);
-    if (params_.quality == 0) {
+    storage = GetBrotliStorage(s, 2 * bytes + 500);
+    if (BROTLI_IS_OOM(m)) return 0;
+    storage[0] = s->last_byte_;
+    table = GetHashTable(s, s->params_.quality, bytes, &table_size);
+    if (BROTLI_IS_OOM(m)) return 0;
+    if (s->params_.quality == 0) {
       BrotliCompressFragmentFast(
-          &data[WrapPosition(last_processed_pos_) & mask],
+          m, &data[WrapPosition(s->last_processed_pos_) & mask],
           bytes, is_last,
           table, table_size,
-          cmd_depths_, cmd_bits_,
-          &cmd_code_numbits_, cmd_code_,
+          s->cmd_depths_, s->cmd_bits_,
+          &s->cmd_code_numbits_, s->cmd_code_,
           &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return 0;
     } else {
       BrotliCompressFragmentTwoPass(
-          &data[WrapPosition(last_processed_pos_) & mask],
+          m, &data[WrapPosition(s->last_processed_pos_) & mask],
           bytes, is_last,
-          command_buf_, literal_buf_,
+          s->command_buf_, s->literal_buf_,
           table, table_size,
           &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return 0;
     }
-    last_byte_ = storage[storage_ix >> 3];
-    last_byte_bits_ = storage_ix & 7u;
-    last_processed_pos_ = input_pos_;
+    s->last_byte_ = storage[storage_ix >> 3];
+    s->last_byte_bits_ = storage_ix & 7u;
+    s->last_processed_pos_ = s->input_pos_;
     *output = &storage[0];
     *out_size = storage_ix >> 3;
-    return true;
+    return 1;
   }
 
+  {
     /* Theoretical max number of commands is 1 per 2 bytes. */
-  size_t newsize = num_commands_ + bytes / 2 + 1;
-  if (newsize > cmd_alloc_size_) {
+    size_t newsize = s->num_commands_ + bytes / 2 + 1;
+    if (newsize > s->cmd_alloc_size_) {
+      Command* new_commands;
       /* Reserve a bit more memory to allow merging with a next block
          without realloc: that would impact speed. */
-    newsize += (bytes / 4) + 16;
-    cmd_alloc_size_ = newsize;
-    commands_ =
-        static_cast<Command*>(realloc(commands_, sizeof(Command) * newsize));
+      newsize += (bytes / 4) + 16;
+      s->cmd_alloc_size_ = newsize;
+      new_commands = BROTLI_ALLOC(m, Command, newsize);
+      if (BROTLI_IS_OOM(m)) return 0;
+      if (s->commands_) {
+        memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
+        BROTLI_FREE(m, s->commands_);
+      }
+      s->commands_ = new_commands;
+    }
   }
 
-  CreateBackwardReferences(bytes, WrapPosition(last_processed_pos_),
-                           is_last, data, mask,
-                           params_.quality,
-                           params_.lgwin,
-                           hashers_,
-                           hash_type_,
-                           dist_cache_,
-                           &last_insert_len_,
-                           &commands_[num_commands_],
-                           &num_commands_,
-                           &num_literals_);
-
-  size_t max_length = std::min<size_t>(mask + 1, 1u << kMaxInputBlockBits);
-  const size_t max_literals = max_length / 8;
-  const size_t max_commands = max_length / 8;
-  if (!is_last && !force_flush &&
-      (params_.quality >= kMinQualityForBlockSplit ||
-       (num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
-      num_literals_ < max_literals &&
-      num_commands_ < max_commands &&
-      input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
+  BrotliCreateBackwardReferences(m, bytes, WrapPosition(s->last_processed_pos_),
+                                 is_last, data, mask,
+                                 s->params_.quality,
+                                 s->params_.lgwin,
+                                 &s->hashers_,
+                                 s->hash_type_,
+                                 s->dist_cache_,
+                                 &s->last_insert_len_,
+                                 &s->commands_[s->num_commands_],
+                                 &s->num_commands_,
+                                 &s->num_literals_);
+  if (BROTLI_IS_OOM(m)) return 0;
+
+  max_length = BROTLI_MIN(size_t, mask + 1, 1u << kBrotliMaxInputBlockBits);
+  {
+    const size_t max_literals = max_length / 8;
+    const size_t max_commands = max_length / 8;
+    const uint64_t input_limit = s->input_pos_ + BrotliEncoderInputBlockSize(s);
+    if (!is_last && !force_flush &&
+        (s->params_.quality >= kMinQualityForBlockSplit ||
+         (s->num_literals_ + s->num_commands_ < kMaxNumDelayedSymbols)) &&
+        s->num_literals_ < max_literals &&
+        s->num_commands_ < max_commands &&
+        input_limit <= s->last_flush_pos_ + max_length) {
       /* Merge with next input block. Everything will happen later. */
-    last_processed_pos_ = input_pos_;
-    *out_size = 0;
-    return true;
+      s->last_processed_pos_ = s->input_pos_;
+      *out_size = 0;
+      return 1;
+    }
   }
 
   /* Create the last insert-only command. */
-  if (last_insert_len_ > 0) {
-    brotli::Command cmd(last_insert_len_);
-    commands_[num_commands_++] = cmd;
-    num_literals_ += last_insert_len_;
-    last_insert_len_ = 0;
+  if (s->last_insert_len_ > 0) {
+    InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
+    s->num_literals_ += s->last_insert_len_;
+    s->last_insert_len_ = 0;
   }
 
-  if (!is_last && input_pos_ == last_flush_pos_) {
+  if (!is_last && s->input_pos_ == s->last_flush_pos_) {
     /* We have no new input data and we don't have to finish the stream, so
        nothing to do. */
     *out_size = 0;
-    return true;
-  }
-  assert(input_pos_ >= last_flush_pos_);
-  assert(input_pos_ > last_flush_pos_ || is_last);
-  assert(input_pos_ - last_flush_pos_ <= 1u << 24);
-  const uint32_t metablock_size =
-      static_cast<uint32_t>(input_pos_ - last_flush_pos_);
-  const size_t max_out_size = 2 * metablock_size + 500;
-  uint8_t* storage = GetBrotliStorage(max_out_size);
-  storage[0] = last_byte_;
-  size_t storage_ix = last_byte_bits_;
-  bool font_mode = params_.mode == BrotliParams::MODE_FONT;
-  WriteMetaBlockInternal(
-      data, mask, last_flush_pos_, metablock_size, is_last, params_.quality,
-      font_mode, prev_byte_, prev_byte2_, num_literals_, num_commands_,
-      commands_, saved_dist_cache_, dist_cache_, &storage_ix, storage);
-  last_byte_ = storage[storage_ix >> 3];
-  last_byte_bits_ = storage_ix & 7u;
-  last_flush_pos_ = input_pos_;
-  last_processed_pos_ = input_pos_;
-  if (last_flush_pos_ > 0) {
-    prev_byte_ = data[(static_cast<uint32_t>(last_flush_pos_) - 1) & mask];
-  }
-  if (last_flush_pos_ > 1) {
-    prev_byte2_ = data[(static_cast<uint32_t>(last_flush_pos_) - 2) & mask];
+    return 1;
   }
-  num_commands_ = 0;
-  num_literals_ = 0;
+  assert(s->input_pos_ >= s->last_flush_pos_);
+  assert(s->input_pos_ > s->last_flush_pos_ || is_last);
+  assert(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
+  {
+    const uint32_t metablock_size =
+        (uint32_t)(s->input_pos_ - s->last_flush_pos_);
+    uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 500);
+    size_t storage_ix = s->last_byte_bits_;
+    int is_font_mode = (s->params_.mode == BROTLI_MODE_FONT) ? 1 : 0;
+    if (BROTLI_IS_OOM(m)) return 0;
+    storage[0] = s->last_byte_;
+    WriteMetaBlockInternal(
+        m, data, mask, s->last_flush_pos_, metablock_size, is_last,
+        s->params_.quality, is_font_mode, s->prev_byte_, s->prev_byte2_,
+        s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
+        s->dist_cache_, &storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return 0;
+    s->last_byte_ = storage[storage_ix >> 3];
+    s->last_byte_bits_ = storage_ix & 7u;
+    s->last_flush_pos_ = s->input_pos_;
+    s->last_processed_pos_ = s->input_pos_;
+    if (s->last_flush_pos_ > 0) {
+      s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
+    }
+    if (s->last_flush_pos_ > 1) {
+      s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
+    }
+    s->num_commands_ = 0;
+    s->num_literals_ = 0;
     /* Save the state of the distance cache in case we need to restore it for
        emitting an uncompressed block. */
-  memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
-  *output = &storage[0];
-  *out_size = storage_ix >> 3;
-  return true;
+    memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->dist_cache_));
+    *output = &storage[0];
+    *out_size = storage_ix >> 3;
+    return 1;
+  }
 }
 
-bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
-                                      const uint8_t* input_buffer,
-                                      const bool is_last,
-                                      size_t* encoded_size,
-                                      uint8_t* encoded_buffer) {
-  CopyInputToRingBuffer(input_size, input_buffer);
+int BrotliEncoderWriteMetaBlock(BrotliEncoderState* s, const size_t input_size,
+                                const uint8_t* input_buffer, const int is_last,
+                                size_t* encoded_size, uint8_t* encoded_buffer) {
   size_t out_size = 0;
   uint8_t* output;
-  if (!WriteBrotliData(is_last, /* force_flush = */ true, &out_size, &output) ||
-      out_size > *encoded_size) {
-    return false;
+  int result;
+  if (!EnsureInitialized(s)) return 0;
+  BrotliEncoderCopyInputToRingBuffer(s, input_size, input_buffer);
+  result = BrotliEncoderWriteData(
+      s, is_last, /* force_flush */ 1, &out_size, &output);
+  if (!result || out_size > *encoded_size) {
+    return 0;
   }
   if (out_size > 0) {
     memcpy(encoded_buffer, output, out_size);
   }
   *encoded_size = out_size;
-  return true;
+  return 1;
 }
 
-bool BrotliCompressor::WriteMetadata(const size_t input_size,
-                                     const uint8_t* input_buffer,
-                                     const bool is_last,
-                                     size_t* encoded_size,
-                                     uint8_t* encoded_buffer) {
+int BrotliEncoderWriteMetadata(BrotliEncoderState* s, const size_t input_size,
+                               const uint8_t* input_buffer, const int is_last,
+                               size_t* encoded_size, uint8_t* encoded_buffer) {
+  uint64_t hdr_buffer_data[2];
+  uint8_t* hdr_buffer = (uint8_t*)&hdr_buffer_data[0];
+  size_t storage_ix;
+  if (!EnsureInitialized(s)) return 0;
   if (input_size > (1 << 24) || input_size + 6 > *encoded_size) {
-    return false;
+    return 0;
   }
-  uint64_t hdr_buffer_data[2];
-  uint8_t* hdr_buffer = reinterpret_cast<uint8_t*>(&hdr_buffer_data[0]);
-  size_t storage_ix = last_byte_bits_;
-  hdr_buffer[0] = last_byte_;
-  WriteBits(1, 0, &storage_ix, hdr_buffer);
-  WriteBits(2, 3, &storage_ix, hdr_buffer);
-  WriteBits(1, 0, &storage_ix, hdr_buffer);
+  storage_ix = s->last_byte_bits_;
+  hdr_buffer[0] = s->last_byte_;
+  BrotliWriteBits(1, 0, &storage_ix, hdr_buffer);
+  BrotliWriteBits(2, 3, &storage_ix, hdr_buffer);
+  BrotliWriteBits(1, 0, &storage_ix, hdr_buffer);
   if (input_size == 0) {
-    WriteBits(2, 0, &storage_ix, hdr_buffer);
+    BrotliWriteBits(2, 0, &storage_ix, hdr_buffer);
     *encoded_size = (storage_ix + 7u) >> 3;
     memcpy(encoded_buffer, hdr_buffer, *encoded_size);
   } else {
-    uint32_t nbits = (input_size == 1) ? 0 : (Log2FloorNonZero(
-        static_cast<uint32_t>(input_size) - 1) + 1);
+    uint32_t nbits = (input_size == 1) ? 0 :
+        (Log2FloorNonZero((uint32_t)input_size - 1) + 1);
     uint32_t nbytes = (nbits + 7) / 8;
-    WriteBits(2, nbytes, &storage_ix, hdr_buffer);
-    WriteBits(8 * nbytes, input_size - 1, &storage_ix, hdr_buffer);
-    size_t hdr_size = (storage_ix + 7u) >> 3;
+    size_t hdr_size;
+    BrotliWriteBits(2, nbytes, &storage_ix, hdr_buffer);
+    BrotliWriteBits(8 * nbytes, input_size - 1, &storage_ix, hdr_buffer);
+    hdr_size = (storage_ix + 7u) >> 3;
     memcpy(encoded_buffer, hdr_buffer, hdr_size);
     memcpy(&encoded_buffer[hdr_size], input_buffer, input_size);
     *encoded_size = hdr_size + input_size;
@@ -769,14 +1017,16 @@ bool BrotliCompressor::WriteMetadata(const size_t input_size,
   if (is_last) {
     encoded_buffer[(*encoded_size)++] = 3;
   }
-  last_byte_ = 0;
-  last_byte_bits_ = 0;
-  return true;
+  s->last_byte_ = 0;
+  s->last_byte_bits_ = 0;
+  return 1;
 }
 
-bool BrotliCompressor::FinishStream(
-    size_t* encoded_size, uint8_t* encoded_buffer) {
-  return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
+int BrotliEncoderFinishStream(BrotliEncoderState* s, size_t* encoded_size,
+                              uint8_t* encoded_buffer) {
+  if (!EnsureInitialized(s)) return 0;
+  return BrotliEncoderWriteMetaBlock(
+      s, 0, NULL, 1, encoded_size, encoded_buffer);
 }
 
 static int BrotliCompressBufferQuality10(int lgwin,
@@ -784,9 +1034,11 @@ static int BrotliCompressBufferQuality10(int lgwin,
                                          const uint8_t* input_buffer,
                                          size_t* encoded_size,
                                          uint8_t* encoded_buffer) {
-  const size_t mask = std::numeric_limits<size_t>::max() >> 1;
-  assert(input_size <= mask + 1);
-  const size_t max_backward_limit = (1 << lgwin) - 16;
+  MemoryManager memory_manager;
+  MemoryManager* m = &memory_manager;
+
+  const size_t mask = BROTLI_SIZE_MAX >> 1;
+  const size_t max_backward_limit = MaxBackwardLimit(lgwin);
   int dist_cache[4] = { 4, 11, 15, 16 };
   int saved_dist_cache[4] = { 4, 11, 15, 16 };
   int ok = 1;
@@ -794,24 +1046,35 @@ static int BrotliCompressBufferQuality10(int lgwin,
   size_t total_out_size = 0;
   uint8_t last_byte;
   uint8_t last_byte_bits;
-  EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
+  H10* hasher;
 
-  Hashers::H10* hasher = new Hashers::H10;
-  const size_t hasher_eff_size = std::min(input_size, max_backward_limit + 16);
-  hasher->Init(lgwin, 0, hasher_eff_size, true);
+  const size_t hasher_eff_size =
+      BROTLI_MIN(size_t, input_size, max_backward_limit + 16);
 
-  const int lgblock = std::min(18, lgwin);
-  const int lgmetablock = std::min(24, lgwin + 1);
-  const size_t max_block_size = static_cast<size_t>(1) << lgblock;
-  const size_t max_metablock_size = static_cast<size_t>(1) << lgmetablock;
+  const int quality = 10;
+  const int lgblock = BROTLI_MIN(int, 18, lgwin);
+  const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
+  const size_t max_block_size = (size_t)1 << lgblock;
+  const size_t max_metablock_size = (size_t)1 << lgmetablock;
   const size_t max_literals_per_metablock = max_metablock_size / 8;
   const size_t max_commands_per_metablock = max_metablock_size / 8;
   size_t metablock_start = 0;
   uint8_t prev_byte = 0;
   uint8_t prev_byte2 = 0;
+
+  BrotliInitMemoryManager(m, 0, 0, 0);
+
+  assert(input_size <= mask + 1);
+  EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
+  hasher = BROTLI_ALLOC(m, H10, 1);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  InitializeH10(hasher);
+  InitH10(m, hasher, input_buffer, lgwin, 0, hasher_eff_size, 1);
+  if (BROTLI_IS_OOM(m)) goto oom;
+
   while (ok && metablock_start < input_size) {
     const size_t metablock_end =
-        std::min(input_size, metablock_start + max_metablock_size);
+        BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);
     const size_t expected_num_commands =
         (metablock_end - metablock_start) / 12 + 16;
     Command* commands = 0;
@@ -820,16 +1083,25 @@ static int BrotliCompressBufferQuality10(int lgwin,
     size_t num_literals = 0;
     size_t metablock_size = 0;
     size_t cmd_alloc_size = 0;
-
-    for (size_t block_start = metablock_start; block_start < metablock_end; ) {
-      size_t block_size = std::min(metablock_end - block_start, max_block_size);
-      ZopfliNode* nodes = new ZopfliNode[block_size + 1];
-      std::vector<uint32_t> path;
-      hasher->StitchToPreviousBlock(block_size, block_start,
-                                    input_buffer, mask);
-      ZopfliComputeShortestPath(block_size, block_start, input_buffer, mask,
-                                max_backward_limit, dist_cache,
-                                hasher, nodes, &path);
+    int is_last;
+    uint8_t* storage;
+    size_t storage_ix;
+
+    size_t block_start;
+    for (block_start = metablock_start; block_start < metablock_end; ) {
+      size_t block_size =
+          BROTLI_MIN(size_t, metablock_end - block_start, max_block_size);
+      ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);
+      size_t path_size;
+      size_t new_cmd_alloc_size;
+      if (BROTLI_IS_OOM(m)) goto oom;
+      BrotliInitZopfliNodes(nodes, block_size + 1);
+      StitchToPreviousBlockH10(hasher, block_size, block_start,
+                               input_buffer, mask);
+      path_size = BrotliZopfliComputeShortestPath(
+          m, block_size, block_start, input_buffer, mask, quality,
+          max_backward_limit, dist_cache, hasher, nodes);
+      if (BROTLI_IS_OOM(m)) goto oom;
       /* We allocate a command buffer in the first iteration of this loop that
          will be likely big enough for the whole metablock, so that for most
          inputs we will not have to reallocate in later iterations. We do the
@@ -838,20 +1110,25 @@ static int BrotliCompressBufferQuality10(int lgwin,
          will not increase peak memory usage.
          TODO: If the first allocation is too small, increase command
          buffer size exponentially. */
-      size_t new_cmd_alloc_size = std::max(expected_num_commands,
-                                           num_commands + path.size() + 1);
+      new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands,
+                                      num_commands + path_size + 1);
       if (cmd_alloc_size != new_cmd_alloc_size) {
+        Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);
+        if (BROTLI_IS_OOM(m)) goto oom;
         cmd_alloc_size = new_cmd_alloc_size;
-        commands = static_cast<Command*>(
-            realloc(commands, cmd_alloc_size * sizeof(Command)));
+        if (commands) {
+          memcpy(new_commands, commands, sizeof(Command) * num_commands);
+          BROTLI_FREE(m, commands);
+        }
+        commands = new_commands;
       }
-      ZopfliCreateCommands(block_size, block_start, max_backward_limit, path,
-                           &nodes[0], dist_cache, &last_insert_len,
-                           &commands[num_commands], &num_literals);
-      num_commands += path.size();
+      BrotliZopfliCreateCommands(block_size, block_start, max_backward_limit,
+                                 &nodes[0], dist_cache, &last_insert_len,
+                                 &commands[num_commands], &num_literals);
+      num_commands += path_size;
       block_start += block_size;
       metablock_size += block_size;
-      delete[] nodes;
+      BROTLI_FREE(m, nodes);
       if (num_literals > max_literals_per_metablock ||
           num_commands > max_commands_per_metablock) {
         break;
@@ -859,70 +1136,74 @@ static int BrotliCompressBufferQuality10(int lgwin,
     }
 
     if (last_insert_len > 0) {
-      Command cmd(last_insert_len);
-      commands[num_commands++] = cmd;
+      InitInsertCommand(&commands[num_commands++], last_insert_len);
       num_literals += last_insert_len;
     }
 
-    const bool is_last = (metablock_start + metablock_size == input_size);
-    uint8_t* storage = NULL;
-    size_t storage_ix = last_byte_bits;
+    is_last = (metablock_start + metablock_size == input_size) ? 1 : 0;
+    storage = NULL;
+    storage_ix = last_byte_bits;
 
     if (metablock_size == 0) {
       /* Write the ISLAST and ISEMPTY bits. */
-      storage = new uint8_t[16];
+      storage = BROTLI_ALLOC(m, uint8_t, 16);
+      if (BROTLI_IS_OOM(m)) goto oom;
       storage[0] = last_byte;
-      WriteBits(2, 3, &storage_ix, storage);
+      BrotliWriteBits(2, 3, &storage_ix, storage);
       storage_ix = (storage_ix + 7u) & ~7u;
     } else if (!ShouldCompress(input_buffer, mask, metablock_start,
                                metablock_size, num_literals, num_commands)) {
       /* Restore the distance cache, as its last update by
          CreateBackwardReferences is now unused. */
       memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
-      storage = new uint8_t[metablock_size + 16];
+      storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
+      if (BROTLI_IS_OOM(m)) goto oom;
       storage[0] = last_byte;
-      StoreUncompressedMetaBlock(is_last, input_buffer,
-                                 metablock_start, mask, metablock_size,
-                                 &storage_ix, storage);
+      BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
+                                       metablock_start, mask, metablock_size,
+                                       &storage_ix, storage);
     } else {
       uint32_t num_direct_distance_codes = 0;
       uint32_t distance_postfix_bits = 0;
-      MetaBlockSplit mb;
       ContextType literal_context_mode = CONTEXT_UTF8;
-      if (!IsMostlyUTF8(
-              input_buffer, metablock_start, mask, metablock_size,
-              kMinUTF8Ratio)) {
+      MetaBlockSplit mb;
+      InitMetaBlockSplit(&mb);
+      if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask,
+                              metablock_size, kMinUTF8Ratio)) {
         literal_context_mode = CONTEXT_SIGNED;
       }
-      BuildMetaBlock(input_buffer, metablock_start, mask,
-                     prev_byte, prev_byte2,
-                     commands, num_commands,
-                     literal_context_mode,
-                     &mb);
-      OptimizeHistograms(num_direct_distance_codes,
-                         distance_postfix_bits,
-                         &mb);
-      const size_t max_out_metablock_size = 2 * metablock_size + 500;
-      storage = new uint8_t[max_out_metablock_size];
+      BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, quality,
+                           prev_byte, prev_byte2,
+                           commands, num_commands,
+                           literal_context_mode,
+                           &mb);
+      if (BROTLI_IS_OOM(m)) goto oom;
+      BrotliOptimizeHistograms(num_direct_distance_codes,
+                               distance_postfix_bits,
+                               &mb);
+      storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 500);
+      if (BROTLI_IS_OOM(m)) goto oom;
       storage[0] = last_byte;
-      StoreMetaBlock(input_buffer, metablock_start, metablock_size, mask,
-                     prev_byte, prev_byte2,
-                     is_last,
-                     num_direct_distance_codes,
-                     distance_postfix_bits,
-                     literal_context_mode,
-                     commands, num_commands,
-                     mb,
-                     &storage_ix, storage);
+      BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
+                           mask, prev_byte, prev_byte2,
+                           is_last,
+                           num_direct_distance_codes,
+                           distance_postfix_bits,
+                           literal_context_mode,
+                           commands, num_commands,
+                           &mb,
+                           &storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) goto oom;
       if (metablock_size + 4 < (storage_ix >> 3)) {
         /* Restore the distance cache and last byte. */
         memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
         storage[0] = last_byte;
         storage_ix = last_byte_bits;
-        StoreUncompressedMetaBlock(is_last, input_buffer,
-                                   metablock_start, mask,
-                                   metablock_size, &storage_ix, storage);
+        BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
+                                         metablock_start, mask,
+                                         metablock_size, &storage_ix, storage);
       }
+      DestroyMetaBlockSplit(m, &mb);
     }
     last_byte = storage[storage_ix >> 3];
     last_byte_bits = storage_ix & 7u;
@@ -933,29 +1214,89 @@ static int BrotliCompressBufferQuality10(int lgwin,
        emitting an uncompressed block. */
     memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
 
-    const size_t out_size = storage_ix >> 3;
-    total_out_size += out_size;
-    if (total_out_size <= max_out_size) {
-      memcpy(encoded_buffer, storage, out_size);
-      encoded_buffer += out_size;
-    } else {
-      ok = 0;
+    {
+      const size_t out_size = storage_ix >> 3;
+      total_out_size += out_size;
+      if (total_out_size <= max_out_size) {
+        memcpy(encoded_buffer, storage, out_size);
+        encoded_buffer += out_size;
+      } else {
+        ok = 0;
+      }
     }
-    delete[] storage;
-    free(commands);
+    BROTLI_FREE(m, storage);
+    BROTLI_FREE(m, commands);
   }
 
   *encoded_size = total_out_size;
-  delete hasher;
+  CleanupH10(m, hasher);
+  BROTLI_FREE(m, hasher);
   return ok;
+
+oom:
+  BrotliWipeOutMemoryManager(m);
+  return 0;
+}
+
+size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
+  /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
+  size_t num_large_blocks = input_size >> 24;
+  size_t tail = input_size - (num_large_blocks << 24);
+  size_t tail_overhead = (tail > (1 << 20)) ? 4 : 3;
+  size_t overhead = 2 + (4 * num_large_blocks) + tail_overhead + 1;
+  size_t result = input_size + overhead;
+  if (input_size == 0) return 1;
+  return (result < input_size) ? 0 : result;
 }
 
-int BrotliCompressBuffer(BrotliParams params,
-                         size_t input_size,
-                         const uint8_t* input_buffer,
-                         size_t* encoded_size,
-                         uint8_t* encoded_buffer) {
-  if (*encoded_size == 0) {
+/* Wraps data to uncompressed brotli stream with minimal window size.
+   |output| should point at region with at least BrotliEncoderMaxCompressedSize
+   addressable bytes.
+   Returns the length of stream. */
+static size_t MakeUncompressedStream(
+    const uint8_t* input, size_t input_size, uint8_t* output) {
+  size_t size = input_size;
+  size_t result = 0;
+  size_t offset = 0;
+  if (input_size == 0) {
+    output[0] = 6;
+    return 1;
+  }
+  output[result++] = 0x21;  /* window bits = 10, is_last = false */
+  output[result++] = 0x03;  /* empty metadata, padding */
+  while (size > 0) {
+    uint32_t nibbles = 0;
+    uint32_t chunk_size;
+    uint32_t bits;
+    chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
+    if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
+    bits =
+        (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
+    output[result++] = (uint8_t)bits;
+    output[result++] = (uint8_t)(bits >> 8);
+    output[result++] = (uint8_t)(bits >> 16);
+    if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
+    memcpy(&output[result], &input[offset], chunk_size);
+    result += chunk_size;
+    offset += chunk_size;
+    size -= chunk_size;
+  }
+  output[result++] = 3;
+  return result;
+}
+
+int BrotliEncoderCompress(int quality, int lgwin, BrotliEncoderMode mode,
+                          size_t input_size,
+                          const uint8_t* input_buffer,
+                          size_t* encoded_size,
+                          uint8_t* encoded_buffer) {
+  BrotliEncoderState* s;
+  BrotliEncoderParams params;
+  size_t out_size = *encoded_size;
+  const uint8_t* input_start = input_buffer;
+  uint8_t* output_start = encoded_buffer;
+  size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
+  if (out_size == 0) {
     /* Output buffer needs at least one byte. */
     return 0;
   }
@@ -965,217 +1306,268 @@ int BrotliCompressBuffer(BrotliParams params,
     *encoded_buffer = 6;
     return 1;
   }
-  if (params.quality == 10) {
+  if (quality == 10) {
     /* TODO: Implement this direct path for all quality levels. */
-    const int lgwin = std::min(24, std::max(16, params.lgwin));
-    return BrotliCompressBufferQuality10(lgwin, input_size, input_buffer,
-                                         encoded_size, encoded_buffer);
-  }
-  BrotliMemIn in(input_buffer, input_size);
-  BrotliMemOut out(encoded_buffer, *encoded_size);
-  if (!BrotliCompress(params, &in, &out)) {
-    return 0;
+    const int lg_win = BROTLI_MIN(int, 24, BROTLI_MAX(int, 16, lgwin));
+    int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
+                                           encoded_size, encoded_buffer);
+    if (!ok || (max_out_size && *encoded_size > max_out_size)) {
+      goto fallback;
+    }
+    return 1;
   }
-  *encoded_size = out.position();
-  return 1;
-}
-
-static bool BrotliInIsFinished(BrotliIn* r) {
-  size_t read_bytes;
-  return r->Read(0, &read_bytes) == NULL;
-}
 
-static const uint8_t* BrotliInReadAndCheckEnd(const size_t block_size,
-                                              BrotliIn* r,
-                                              size_t* bytes_read,
-                                              bool* is_last) {
-  *bytes_read = 0;
-  const uint8_t* data = reinterpret_cast<const uint8_t*>(
-      r->Read(block_size, bytes_read));
-  assert((data == NULL) == (*bytes_read == 0));
-  *is_last = BrotliInIsFinished(r);
-  return data;
-}
-
-static bool CopyOneBlockToRingBuffer(BrotliIn* r,
-                                     BrotliCompressor* compressor,
-                                     size_t* bytes_read,
-                                     bool* is_last) {
-  const size_t block_size = compressor->input_block_size();
-  const uint8_t* data = BrotliInReadAndCheckEnd(block_size, r,
-                                                bytes_read, is_last);
-  if (data == NULL) {
-    return *is_last;
-  }
-  compressor->CopyInputToRingBuffer(*bytes_read, data);
-
-  // Read more bytes until block_size is filled or an EOF (data == NULL) is
-  // received. This is useful to get deterministic compressed output for the
-  // same input no matter how r->Read splits the input to chunks.
-  for (size_t remaining = block_size - *bytes_read; remaining > 0; ) {
-    size_t more_bytes_read = 0;
-    data = BrotliInReadAndCheckEnd(remaining, r, &more_bytes_read, is_last);
-    if (data == NULL) {
-      return *is_last;
+  BrotliEncoderParamsSetDefault(&params);
+  params.quality = quality;
+  params.lgwin = lgwin;
+  params.mode = mode;
+  s = BrotliEncoderCreateState(&params, 0, 0, 0);
+  if (!s) {
+    return 0;
+  } else {
+    size_t available_in = input_size;
+    const uint8_t* next_in = input_buffer;
+    size_t available_out = *encoded_size;
+    uint8_t* next_out = encoded_buffer;
+    size_t total_out = 0;
+    int result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
+        &available_in, &next_in, &available_out, &next_out, &total_out);
+    if (!BrotliEncoderIsFinished(s)) result = 0;
+    *encoded_size = total_out;
+    BrotliEncoderDestroyState(s);
+    if (!result || (max_out_size && *encoded_size > max_out_size)) {
+      goto fallback;
     }
-    compressor->CopyInputToRingBuffer(more_bytes_read, data);
-    *bytes_read += more_bytes_read;
-    remaining -= more_bytes_read;
+    return 1;
   }
-  return true;
+fallback:
+  *encoded_size = 0;
+  if (!max_out_size) return 0;
+  if (out_size >= max_out_size) {
+    *encoded_size =
+        MakeUncompressedStream(input_start, input_size, output_start);
+    return 1;
+  }
+  return 0;
 }
 
-
-int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
-  return BrotliCompressWithCustomDictionary(0, 0, params, in, out);
+static void InjectBytePaddingBlock(BrotliEncoderState* s) {
+  uint32_t seal = s->last_byte_;
+  size_t seal_bits = s->last_byte_bits_;
+  s->last_byte_ = 0;
+  s->last_byte_bits_ = 0;
+  /* is_last = 0, data_nibbles = 11, reseved = 0, meta_nibbles = 00 */
+  seal |= 0x6u << seal_bits;
+  seal_bits += 6;
+  s->flush_buf_[0] = (uint8_t)seal;
+  if (seal_bits > 8) s->flush_buf_[1] = (uint8_t)(seal >> 8);
+  s->next_out_ = s->flush_buf_;
+  s->available_out_ = (seal_bits + 7) >> 3;
 }
 
-// Reads the provided input in 'block_size' blocks. Only the last read can be
-// smaller than 'block_size'.
-class BrotliBlockReader {
- public:
-  explicit BrotliBlockReader(size_t block_size)
-      : block_size_(block_size), buf_(NULL) {}
-  ~BrotliBlockReader(void) { delete[] buf_; }
-
-  const uint8_t* Read(BrotliIn* in, size_t* bytes_read, bool* is_last) {
-    *bytes_read = 0;
-    const uint8_t* data = BrotliInReadAndCheckEnd(block_size_, in,
-                                                  bytes_read, is_last);
-    if (data == NULL || *bytes_read == block_size_ || *is_last) {
-      // If we could get the whole block in one read, or it is the last block,
-      // we just return the pointer to the data without copying.
-      return data;
+static int BrotliEncoderCompressStreamFast(
+    BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
+    const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
+    size_t* total_out) {
+  const size_t block_size_limit = 1u << s->params_.lgwin;
+  const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
+      BROTLI_MIN(size_t, *available_in, block_size_limit));
+  uint32_t* tmp_command_buf = NULL;
+  uint32_t* command_buf = NULL;
+  uint8_t* tmp_literal_buf = NULL;
+  uint8_t* literal_buf = NULL;
+  MemoryManager* m = &s->memory_manager_;
+  if (s->params_.quality == 1) {
+    if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
+      s->command_buf_ =
+          BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
+      s->literal_buf_ =
+          BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
+      if (BROTLI_IS_OOM(m)) return 0;
     }
-    // If the data comes in smaller chunks, we need to copy it into an internal
-    // buffer until we get a whole block or reach the last chunk.
-    if (buf_ == NULL) {
-      buf_ = new uint8_t[block_size_];
+    if (s->command_buf_) {
+      command_buf = s->command_buf_;
+      literal_buf = s->literal_buf_;
+    } else {
+      tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
+      tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
+      if (BROTLI_IS_OOM(m)) return 0;
+      command_buf = tmp_command_buf;
+      literal_buf = tmp_literal_buf;
     }
-    memcpy(buf_, data, *bytes_read);
-    do {
-      size_t cur_bytes_read = 0;
-      data = BrotliInReadAndCheckEnd(block_size_ - *bytes_read, in,
-                                     &cur_bytes_read, is_last);
-      if (data == NULL) {
-        return *is_last ? buf_ : NULL;
-      }
-      memcpy(&buf_[*bytes_read], data, cur_bytes_read);
-      *bytes_read += cur_bytes_read;
-    } while (*bytes_read < block_size_ && !*is_last);
-    return buf_;
   }
 
- private:
-  const size_t block_size_;
-  uint8_t* buf_;
-};
-
-int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
-                                       BrotliParams params,
-                                       BrotliIn* in, BrotliOut* out) {
-  if (params.quality <= 1) {
-    const int quality = std::max(0, params.quality);
-    const int lgwin = std::min(kMaxWindowBits,
-                               std::max(kMinWindowBits, params.lgwin));
-    uint8_t* storage = NULL;
-    int* table = NULL;
-    uint32_t* command_buf = NULL;
-    uint8_t* literal_buf = NULL;
-    uint8_t cmd_depths[128];
-    uint16_t cmd_bits[128];
-    uint8_t cmd_code[512];
-    size_t cmd_code_numbits;
-    if (quality == 0) {
-      InitCommandPrefixCodes(cmd_depths, cmd_bits, cmd_code, &cmd_code_numbits);
+  while (1) {
+    if (s->available_out_ == 0 &&
+        s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED) {
+      s->stream_state_ = BROTLI_STREAM_PROCESSING;
+      if (s->last_byte_bits_ == 0) break;
+      InjectBytePaddingBlock(s);
+      continue;
     }
-    uint8_t last_byte;
-    uint8_t last_byte_bits;
-    EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
-    BrotliBlockReader r(1u << lgwin);
-    int ok = 1;
-    bool is_last = false;
-    while (ok && !is_last) {
-      // Read next block of input.
-      size_t bytes;
-      const uint8_t* data = r.Read(in, &bytes, &is_last);
-      if (data == NULL) {
-        if (!is_last) {
-          ok = 0;
-          break;
-        }
-        assert(bytes == 0);
-      }
-      // Set up output storage.
-      const size_t max_out_size = 2 * bytes + 500;
-      if (storage == NULL) {
-        storage = new uint8_t[max_out_size];
-      }
-      storage[0] = last_byte;
-      size_t storage_ix = last_byte_bits;
-      // Set up hash table.
-      size_t htsize = HashTableSize(MaxHashTableSize(quality), bytes);
-      if (table == NULL) {
-        table = new int[htsize];
+
+    if (s->available_out_ != 0 && *available_out != 0) {
+      size_t copy_output_size =
+          BROTLI_MIN(size_t, s->available_out_, *available_out);
+      memcpy(*next_out, s->next_out_, copy_output_size);
+      *next_out += copy_output_size;
+      *available_out -= copy_output_size;
+      s->next_out_ += copy_output_size;
+      s->available_out_ -= copy_output_size;
+      s->total_out_ += copy_output_size;
+      if (total_out) *total_out = s->total_out_;
+      continue;
+    }
+
+    /* Compress block only when internal output buffer is empty, stream is not
+       finished, there is no pending flush request, and there is either
+       additional input or pending operation. */
+    if (s->available_out_ == 0 &&
+        s->stream_state_ == BROTLI_STREAM_PROCESSING &&
+        (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
+      size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
+      int is_last =
+          (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
+      int force_flush =
+          (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
+      size_t max_out_size = 2 * block_size + 500;
+      int inplace = 1;
+      uint8_t* storage = NULL;
+      size_t storage_ix = s->last_byte_bits_;
+      size_t table_size;
+      int* table;
+
+      if (force_flush && block_size == 0) {
+        s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+        continue;
       }
-      memset(table, 0, htsize * sizeof(table[0]));
-      // Set up command and literal buffers for two pass mode.
-      if (quality == 1 && command_buf == NULL) {
-        size_t buf_size = std::min(bytes, kCompressFragmentTwoPassBlockSize);
-        command_buf = new uint32_t[buf_size];
-        literal_buf = new uint8_t[buf_size];
+      if (max_out_size <= *available_out) {
+        storage = *next_out;
+      } else {
+        inplace = 0;
+        storage = GetBrotliStorage(s, max_out_size);
+        if (BROTLI_IS_OOM(m)) return 0;
       }
-      // Do the actual compression.
-      if (quality == 0) {
-        BrotliCompressFragmentFast(data, bytes, is_last, table, htsize,
-                                   cmd_depths, cmd_bits,
-                                   &cmd_code_numbits, cmd_code,
-                                   &storage_ix, storage);
+      storage[0] = s->last_byte_;
+      table = GetHashTable(s, s->params_.quality, block_size, &table_size);
+      if (BROTLI_IS_OOM(m)) return 0;
+
+      if (s->params_.quality == 0) {
+        BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table,
+            table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_,
+            s->cmd_code_, &storage_ix, storage);
+        if (BROTLI_IS_OOM(m)) return 0;
       } else {
-        BrotliCompressFragmentTwoPass(data, bytes, is_last,
-                                      command_buf, literal_buf,
-                                      table, htsize,
-                                      &storage_ix, storage);
+        BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last,
+            command_buf, literal_buf, table, table_size,
+            &storage_ix, storage);
+        if (BROTLI_IS_OOM(m)) return 0;
       }
-      // Save last bytes to stitch it together with the next output block.
-      last_byte = storage[storage_ix >> 3];
-      last_byte_bits = storage_ix & 7u;
-      // Write output block.
-      size_t out_bytes = storage_ix >> 3;
-      if (out_bytes > 0 && !out->Write(storage, out_bytes)) {
-        ok = 0;
-        break;
+      *next_in += block_size;
+      *available_in -= block_size;
+      if (inplace) {
+        size_t out_bytes = storage_ix >> 3;
+        assert(out_bytes <= *available_out);
+        assert((storage_ix & 7) == 0 || out_bytes < *available_out);
+        *next_out += out_bytes;
+        *available_out -= out_bytes;
+        s->total_out_ += out_bytes;
+        if (total_out) *total_out = s->total_out_;
+      } else {
+        size_t out_bytes = storage_ix >> 3;
+        s->next_out_ = storage;
+        s->available_out_ = out_bytes;
       }
+      s->last_byte_ = storage[storage_ix >> 3];
+      s->last_byte_bits_ = storage_ix & 7u;
+
+      if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+      if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
+      continue;
     }
-    delete[] storage;
-    delete[] table;
-    delete[] command_buf;
-    delete[] literal_buf;
-    return ok;
+    break;
+  }
+  BROTLI_FREE(m, tmp_command_buf);
+  BROTLI_FREE(m, tmp_literal_buf);
+  return 1;
+}
+
+int BrotliEncoderCompressStream(BrotliEncoderState* s,
+                                BrotliEncoderOperation op, size_t* available_in,
+                                const uint8_t** next_in, size_t* available_out,
+                                uint8_t** next_out, size_t* total_out) {
+  if (!EnsureInitialized(s)) return 0;
+
+  if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
+    return 0;
+  }
+  if (s->params_.quality <= 1) {
+    return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
+        available_out, next_out, total_out);
   }
+  while (1) {
+    size_t remaining_block_size = RemainingInputBlockSize(s);
+
+    if (remaining_block_size != 0 && *available_in != 0) {
+      size_t copy_input_size =
+          BROTLI_MIN(size_t, remaining_block_size, *available_in);
+      BrotliEncoderCopyInputToRingBuffer(s, copy_input_size, *next_in);
+      *next_in += copy_input_size;
+      *available_in -= copy_input_size;
+      continue;
+    }
 
-  size_t in_bytes = 0;
-  size_t out_bytes = 0;
-  uint8_t* output = NULL;
-  bool final_block = false;
-  BrotliCompressor compressor(params);
-  if (dictsize != 0) compressor.BrotliSetCustomDictionary(dictsize, dict);
-  while (!final_block) {
-    if (!CopyOneBlockToRingBuffer(in, &compressor, &in_bytes, &final_block)) {
-      return false;
+    if (s->available_out_ == 0 &&
+        s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED) {
+      s->stream_state_ = BROTLI_STREAM_PROCESSING;
+      if (s->last_byte_bits_ == 0) break;
+      InjectBytePaddingBlock(s);
+      continue;
     }
-    out_bytes = 0;
-    if (!compressor.WriteBrotliData(final_block,
-                                    /* force_flush = */ false,
-                                    &out_bytes, &output)) {
-      return false;
+
+    if (s->available_out_ != 0 && *available_out != 0) {
+      size_t copy_output_size =
+          BROTLI_MIN(size_t, s->available_out_, *available_out);
+      memcpy(*next_out, s->next_out_, copy_output_size);
+      *next_out += copy_output_size;
+      *available_out -= copy_output_size;
+      s->next_out_ += copy_output_size;
+      s->available_out_ -= copy_output_size;
+      s->total_out_ += copy_output_size;
+      if (total_out) *total_out = s->total_out_;
+      continue;
     }
-    if (out_bytes > 0 && !out->Write(output, out_bytes)) {
-      return false;
+
+    /* Compress data only when internal outpuf buffer is empty, stream is not
+       finished and there is no pending flush request. */
+    if (s->available_out_ == 0 &&
+        s->stream_state_ == BROTLI_STREAM_PROCESSING) {
+      if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
+        int is_last = (*available_in == 0) && op == BROTLI_OPERATION_FINISH;
+        int force_flush = (*available_in == 0) && op == BROTLI_OPERATION_FLUSH;
+        int result = BrotliEncoderWriteData(s, is_last, force_flush,
+            &s->available_out_, &s->next_out_);
+        if (!result) return 0;
+        if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
+        if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
+        continue;
+      }
     }
+    break;
   }
-  return true;
+  return 1;
+}
+
+int BrotliEncoderIsFinished(BrotliEncoderState* s) {
+  return (s->stream_state_ == BROTLI_STREAM_FINISHED &&
+      !BrotliEncoderHasMoreOutput(s)) ? 1 : 0;
+}
+
+int BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
+  return (s->available_out_ != 0) ? 1 : 0;
 }
 
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/encode.h b/enc/encode.h
index e319f3f..a99f5c8 100644
--- a/enc/encode.h
+++ b/enc/encode.h
@@ -9,45 +9,44 @@
 #ifndef BROTLI_ENC_ENCODE_H_
 #define BROTLI_ENC_ENCODE_H_
 
-#include <string>
-#include <vector>
-
 #include "../common/types.h"
-#include "./command.h"
-#include "./hash.h"
-#include "./ringbuffer.h"
-#include "./static_dict.h"
-#include "./streams.h"
-
-namespace brotli {
-
-static const int kMaxWindowBits = 24;
-static const int kMinWindowBits = 10;
-static const int kMinInputBlockBits = 16;
-static const int kMaxInputBlockBits = 24;
-
-struct BrotliParams {
-  BrotliParams(void)
-      : mode(MODE_GENERIC),
-        quality(11),
-        lgwin(22),
-        lgblock(0),
-        enable_dictionary(true),
-        enable_transforms(false),
-        greedy_block_split(false),
-        enable_context_modeling(true) {}
-
-  enum Mode {
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const int kBrotliMaxWindowBits = 24;
+static const int kBrotliMinWindowBits = 10;
+static const int kBrotliMinInputBlockBits = 16;
+static const int kBrotliMaxInputBlockBits = 24;
+
+typedef enum BrotliEncoderMode {
   /* Default compression mode. The compressor does not know anything in
      advance about the properties of the input. */
-    MODE_GENERIC = 0,
+  BROTLI_MODE_GENERIC = 0,
   /* Compression mode for UTF-8 format text input. */
-    MODE_TEXT = 1,
+  BROTLI_MODE_TEXT = 1,
   /* Compression mode used in WOFF 2.0. */
-    MODE_FONT = 2
-  };
-  Mode mode;
+  BROTLI_MODE_FONT = 2
+} BrotliEncoderMode;
+
+#define BROTLI_DEFAULT_QUALITY 11
+#define BROTLI_DEFAULT_WINDOW 22
+#define BROTLI_DEFAULT_MODE BROTLI_MODE_GENERIC
 
+typedef enum BrotliEncoderOperation {
+  BROTLI_OPERATION_PROCESS = 0,
+  /* Request output stream to flush. Performed when input stream is depleted
+     and there is enough space in output stream. */
+  BROTLI_OPERATION_FLUSH = 1,
+  /* Request output stream to finish. Performed when input stream is depleted
+     and there is enough space in output stream. */
+  BROTLI_OPERATION_FINISH = 2
+} BrotliEncoderOperation;
+
+/* DEPRECATED */
+typedef struct BrotliEncoderParams {
+  BrotliEncoderMode mode;
   /* Controls the compression-speed vs compression-density tradeoffs. The higher
      the |quality|, the slower the compression. Range is 0 to 11. */
   int quality;
@@ -56,155 +55,202 @@ struct BrotliParams {
   /* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
      If set to 0, the value will be set based on the quality. */
   int lgblock;
+} BrotliEncoderParams;
+
+typedef enum BrotliEncoderParameter {
+  BROTLI_PARAM_MODE = 0,
+  /* Controls the compression-speed vs compression-density tradeoffs. The higher
+     the quality, the slower the compression. Range is 0 to 11. */
+  BROTLI_PARAM_QUALITY = 1,
+  /* Base 2 logarithm of the sliding window size. Range is 10 to 24. */
+  BROTLI_PARAM_LGWIN = 2,
+  /* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
+     If set to 0, the value will be set based on the quality. */
+  BROTLI_PARAM_LGBLOCK = 3
+} BrotliEncoderParameter;
+
+/* DEPRECATED */
+void BrotliEncoderParamsSetDefault(BrotliEncoderParams* params);
+
+/* A state can not be reused for multiple brotli streams. */
+typedef struct BrotliEncoderStateStruct BrotliEncoderState;
+
+int BrotliEncoderSetParameter(
+    BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value);
+
+/* Creates the instance of BrotliEncoderState and initializes it.
+   |alloc_func| and |free_func| MUST be both zero or both non-zero. In the case
+   they are both zero, default memory allocators are used. |opaque| is passed to
+   |alloc_func| and |free_func| when they are called. */
+BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,
+                                                brotli_free_func free_func,
+                                                void* opaque);
+/* DEPRECATED */
+static inline BrotliEncoderState* BrotliEncoderCreateState(
+    const BrotliEncoderParams* params, brotli_alloc_func alloc_func,
+    brotli_free_func free_func, void* opaque) {
+  BrotliEncoderState* result = BrotliEncoderCreateInstance(
+      alloc_func, free_func, opaque);
+  if (!result) return result;
+  BrotliEncoderSetParameter(
+      result, BROTLI_PARAM_MODE, (uint32_t)params->mode);
+  BrotliEncoderSetParameter(
+      result, BROTLI_PARAM_QUALITY, (uint32_t)params->quality);
+  BrotliEncoderSetParameter(
+      result, BROTLI_PARAM_LGWIN, (uint32_t)params->lgwin);
+  BrotliEncoderSetParameter(
+      result, BROTLI_PARAM_LGBLOCK, (uint32_t)params->lgblock);
+  return result;
+}
+
+/* Deinitializes and frees BrotliEncoderState instance. */
+void BrotliEncoderDestroyInstance(BrotliEncoderState* state);
+/* DEPRECATED */
+static inline void BrotliEncoderDestroyState(BrotliEncoderState* state) {
+  BrotliEncoderDestroyInstance(state);
+}
+
+/* The maximum input size that can be processed at once. */
+size_t BrotliEncoderInputBlockSize(BrotliEncoderState* state);
+
+/* Encodes the data in |input_buffer| as a meta-block and writes it to
+   |encoded_buffer| (|*encoded_size should| be set to the size of
+   |encoded_buffer|) and sets |*encoded_size| to the number of bytes that
+   was written. The |input_size| must not be greater than input_block_size().
+   Returns 0 if there was an error and 1 otherwise. */
+int BrotliEncoderWriteMetaBlock(BrotliEncoderState* state,
+                                const size_t input_size,
+                                const uint8_t* input_buffer, const int is_last,
+                                size_t* encoded_size, uint8_t* encoded_buffer);
+
+/* Writes a metadata meta-block containing the given input to encoded_buffer.
+   |*encoded_size| should be set to the size of the encoded_buffer.
+   Sets |*encoded_size| to the number of bytes that was written.
+   Note that the given input data will not be part of the sliding window and
+   thus no backward references can be made to this data from subsequent
+   metablocks. |input_size| must not be greater than 2^24 and provided
+   |*encoded_size| must not be less than |input_size| + 6.
+   Returns 0 if there was an error and 1 otherwise. */
+int BrotliEncoderWriteMetadata(BrotliEncoderState* state,
+                               const size_t input_size,
+                               const uint8_t* input_buffer, const int is_last,
+                               size_t* encoded_size, uint8_t* encoded_buffer);
+
+/* Writes a zero-length meta-block with end-of-input bit set to the
+   internal output buffer and copies the output buffer to |encoded_buffer|
+   (|*encoded_size| should be set to the size of |encoded_buffer|) and sets
+   |*encoded_size| to the number of bytes written.
+   Returns 0 if there was an error and 1 otherwise. */
+int BrotliEncoderFinishStream(BrotliEncoderState* state, size_t* encoded_size,
+                              uint8_t* encoded_buffer);
+
+/* Copies the given input data to the internal ring buffer of the compressor.
+   No processing of the data occurs at this time and this function can be
+   called multiple times before calling WriteBrotliData() to process the
+   accumulated input. At most input_block_size() bytes of input data can be
+   copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
+ */
+void BrotliEncoderCopyInputToRingBuffer(BrotliEncoderState* state,
+                                        const size_t input_size,
+                                        const uint8_t* input_buffer);
+
+/* Processes the accumulated input data and sets |*out_size| to the length of
+   the new output meta-block, or to zero if no new output meta-block has been
+   created (in this case the processed input data is buffered internally).
+   If |*out_size| is positive, |*output| points to the start of the output
+   data. If |is_last| or |force_flush| is 1, an output meta-block is always
+   created. However, until |is_last| is 1 encoder may retain up to 7 bits
+   of the last byte of output. To force encoder to dump the remaining bits
+   use WriteMetadata() to append an empty meta-data block.
+   Returns 0 if the size of the input data is larger than
+   input_block_size(). */
+int BrotliEncoderWriteData(BrotliEncoderState* state, const int is_last,
+                           const int force_flush, size_t* out_size,
+                           uint8_t** output);
+
+/* Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
+   e.g. for custom static dictionaries for data formats.
+   Not to be confused with the built-in transformable dictionary of Brotli.
+   To decode, use BrotliSetCustomDictionary() of the decoder with the same
+   dictionary. */
+void BrotliEncoderSetCustomDictionary(BrotliEncoderState* state, size_t size,
+                                      const uint8_t* dict);
+
+/* Returns buffer size that is large enough to contain BrotliEncoderCompress
+   output for any input.
+   Returns 0 if result does not fit size_t. */
+size_t BrotliEncoderMaxCompressedSize(size_t input_size);
+
+/* Compresses the data in |input_buffer| into |encoded_buffer|, and sets
+   |*encoded_size| to the compressed length.
+   BROTLI_DEFAULT_QUALITY, BROTLI_DEFAULT_WINDOW and BROTLI_DEFAULT_MODE should
+   be used as |quality|, |lgwin| and |mode| if there are no specific
+   requirements to encoder speed and compression ratio.
+   If compression fails, |*encoded_size| is set to 0.
+   If BrotliEncoderMaxCompressedSize(|input_size|) is not zero, then
+   |*encoded_size| is never set to the bigger value.
+   Returns 0 if there was an error and 1 otherwise. */
+int BrotliEncoderCompress(int quality, int lgwin, BrotliEncoderMode mode,
+                          size_t input_size, const uint8_t* input_buffer,
+                          size_t* encoded_size, uint8_t* encoded_buffer);
+
+/* Progressively compress input stream and push produced bytes to output stream.
+   Internally workflow consists of 3 tasks:
+    * (optional) copy input data to internal buffer
+    * actually compress data and (optionally) store it to internal buffer
+    * (optional) copy compressed bytes from internal buffer to output stream
+   Whenever all 3 tasks can't move forward anymore, or error occurs, this
+   method returns.
+
+   |available_in| and |next_in| represent input stream; when X bytes of input
+   are consumed, X is subtracted from |available_in| and added to |next_in|.
+   |available_out| and |next_out| represent output stream; when Y bytes are
+   pushed to output, Y is subtracted from |available_out| and added to
+   |next_out|. |total_out|, if it is not a null-pointer, is assigned to the
+   total amount of bytes pushed by the instance of encoder to output.
+
+   |op| is used to perform flush or finish the stream.
+
+   Flushing the stream means forcing encoding of all input passed to encoder and
+   completing the current output block, so it could be fully decoded by stream
+   decoder. To perform flush |op| must be set to BROTLI_OPERATION_FLUSH. Under
+   some circumstances (e.g. lack of output stream capacity) this operation would
+   require several calls to BrotliEncoderCompressStream. The method must be
+   called again until both input stream is depleted and encoder has no more
+   output (see BrotliEncoderHasMoreOutput) after the method is called.
+
+   Finishing the stream means encoding of all input passed to encoder and
+   adding specific "final" marks, so stream decoder could determine that stream
+   is complete. To perform finish |op| must be set to BROTLI_OPERATION_FINISH.
+   Under some circumstances (e.g. lack of output stream capacity) this operation
+   would require several calls to BrotliEncoderCompressStream. The method must
+   be called again until both input stream is depleted and encoder has no more
+   output (see BrotliEncoderHasMoreOutput) after the method is called.
+
+   WARNING: when flushing and finishing, |op| should not change until operation
+   is complete; input stream should not be refilled as well.
+
+   Returns 0 if there was an error and 1 otherwise.
+*/
+int BrotliEncoderCompressStream(BrotliEncoderState* s,
+                                BrotliEncoderOperation op, size_t* available_in,
+                                const uint8_t** next_in, size_t* available_out,
+                                uint8_t** next_out, size_t* total_out);
+
+/* Check if encoder is in "finished" state, i.e. no more input is acceptable and
+   no more output will be produced.
+   Works only with BrotliEncoderCompressStream workflow.
+   Returns 1 if stream is finished and 0 otherwise. */
+int BrotliEncoderIsFinished(BrotliEncoderState* s);
+
+/* Check if encoder has more output bytes in internal buffer.
+   Works only with BrotliEncoderCompressStream workflow.
+   Returns 1 if has more output (in internal buffer) and 0 otherwise. */
+int BrotliEncoderHasMoreOutput(BrotliEncoderState* s);
+
 
-  // These settings are deprecated and will be ignored.
-  // All speed vs. size compromises are controlled by the quality param.
-  bool enable_dictionary;
-  bool enable_transforms;
-  bool greedy_block_split;
-  bool enable_context_modeling;
-};
-
-// An instance can not be reused for multiple brotli streams.
-class BrotliCompressor {
- public:
-  explicit BrotliCompressor(BrotliParams params);
-  ~BrotliCompressor(void);
-
-  // The maximum input size that can be processed at once.
-  size_t input_block_size(void) const { return size_t(1) << params_.lgblock; }
-
-  // Encodes the data in input_buffer as a meta-block and writes it to
-  // encoded_buffer (*encoded_size should be set to the size of
-  // encoded_buffer) and sets *encoded_size to the number of bytes that
-  // was written. The input_size must be <= input_block_size().
-  // Returns 0 if there was an error and 1 otherwise.
-  bool WriteMetaBlock(const size_t input_size,
-                      const uint8_t* input_buffer,
-                      const bool is_last,
-                      size_t* encoded_size,
-                      uint8_t* encoded_buffer);
-
-  // Writes a metadata meta-block containing the given input to encoded_buffer.
-  // *encoded_size should be set to the size of the encoded_buffer.
-  // Sets *encoded_size to the number of bytes that was written.
-  // Note that the given input data will not be part of the sliding window and
-  // thus no backward references can be made to this data from subsequent
-  // metablocks.
-  bool WriteMetadata(const size_t input_size,
-                     const uint8_t* input_buffer,
-                     const bool is_last,
-                     size_t* encoded_size,
-                     uint8_t* encoded_buffer);
-
-  // Writes a zero-length meta-block with end-of-input bit set to the
-  // internal output buffer and copies the output buffer to encoded_buffer
-  // (*encoded_size should be set to the size of encoded_buffer) and sets
-  // *encoded_size to the number of bytes written. Returns false if there was
-  // an error and true otherwise.
-  bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
-
-  // Copies the given input data to the internal ring buffer of the compressor.
-  // No processing of the data occurs at this time and this function can be
-  // called multiple times before calling WriteBrotliData() to process the
-  // accumulated input. At most input_block_size() bytes of input data can be
-  // copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
-  void CopyInputToRingBuffer(const size_t input_size,
-                             const uint8_t* input_buffer);
-
-  // Processes the accumulated input data and sets *out_size to the length of
-  // the new output meta-block, or to zero if no new output meta-block was
-  // created (in this case the processed input data is buffered internally).
-  // If *out_size is positive, *output points to the start of the output data.
-  // If is_last or force_flush is true, an output meta-block is always created.
-  // Returns false if the size of the input data is larger than
-  // input_block_size().
-  bool WriteBrotliData(const bool is_last, const bool force_flush,
-                       size_t* out_size, uint8_t** output);
-
-  // Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
-  // e.g. for custom static dictionaries for data formats.
-  // Not to be confused with the built-in transformable dictionary of Brotli.
-  // To decode, use BrotliSetCustomDictionary of the decoder with the same
-  // dictionary.
-  void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
-
-  // No-op, but we keep it here for API backward-compatibility.
-  void WriteStreamHeader(void) {}
-
- private:
-  uint8_t* GetBrotliStorage(size_t size);
-
-  // Allocates and clears a hash table using memory in "*this",
-  // stores the number of buckets in "*table_size" and returns a pointer to
-  // the base of the hash table.
-  int* GetHashTable(int quality,
-                    size_t input_size, size_t* table_size);
-
-  BrotliParams params_;
-  Hashers* hashers_;
-  int hash_type_;
-  uint64_t input_pos_;
-  RingBuffer* ringbuffer_;
-  size_t cmd_alloc_size_;
-  Command* commands_;
-  size_t num_commands_;
-  size_t num_literals_;
-  size_t last_insert_len_;
-  uint64_t last_flush_pos_;
-  uint64_t last_processed_pos_;
-  int dist_cache_[4];
-  int saved_dist_cache_[4];
-  uint8_t last_byte_;
-  uint8_t last_byte_bits_;
-  uint8_t prev_byte_;
-  uint8_t prev_byte2_;
-  size_t storage_size_;
-  uint8_t* storage_;
-  // Hash table for quality 0 mode.
-  int small_table_[1 << 10];  // 2KB
-  int* large_table_;          // Allocated only when needed
-  // Command and distance prefix codes (each 64 symbols, stored back-to-back)
-  // used for the next block in quality 0. The command prefix code is over a
-  // smaller alphabet with the following 64 symbols:
-  //    0 - 15: insert length code 0, copy length code 0 - 15, same distance
-  //   16 - 39: insert length code 0, copy length code 0 - 23
-  //   40 - 63: insert length code 0 - 23, copy length code 0
-  // Note that symbols 16 and 40 represent the same code in the full alphabet,
-  // but we do not use either of them in quality 0.
-  uint8_t cmd_depths_[128];
-  uint16_t cmd_bits_[128];
-  // The compressed form of the command and distance prefix codes for the next
-  // block in quality 0.
-  uint8_t cmd_code_[512];
-  size_t cmd_code_numbits_;
-  // Command and literal buffers for quality 1.
-  uint32_t* command_buf_;
-  uint8_t* literal_buf_;
-  
-  int is_last_block_emitted_;
-};
-
-// Compresses the data in input_buffer into encoded_buffer, and sets
-// *encoded_size to the compressed length.
-// Returns 0 if there was an error and 1 otherwise.
-int BrotliCompressBuffer(BrotliParams params,
-                         size_t input_size,
-                         const uint8_t* input_buffer,
-                         size_t* encoded_size,
-                         uint8_t* encoded_buffer);
-
-// Same as above, but uses the specified input and output classes instead
-// of reading from and writing to pre-allocated memory buffers.
-int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
-
-// Before compressing the data, sets a custom LZ77 dictionary with
-// BrotliCompressor::BrotliSetCustomDictionary.
-int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
-                                       BrotliParams params,
-                                       BrotliIn* in, BrotliOut* out);
-
-
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_ENCODE_H_ */
diff --git a/enc/encode_parallel.cc b/enc/encode_parallel.cc
index 6e4e8d5..6d7a4df 100644
--- a/enc/encode_parallel.cc
+++ b/enc/encode_parallel.cc
@@ -8,40 +8,34 @@
 
 #include "./encode_parallel.h"
 
-#include <algorithm>
-#include <limits>
+#include <vector>
 
 #include "./backward_references.h"
-#include "./bit_cost.h"
-#include "./block_splitter.h"
 #include "./brotli_bit_stream.h"
-#include "./cluster.h"
 #include "./context.h"
 #include "./entropy_encode.h"
 #include "./fast_log.h"
 #include "./hash.h"
-#include "./histogram.h"
 #include "./metablock.h"
+#include "./port.h"
 #include "./prefix.h"
-#include "./transform.h"
 #include "./utf8_util.h"
-#include "./write_bits.h"
 
 namespace brotli {
 
 namespace {
 
-void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
-                               uint32_t num_direct_distance_codes,
-                               uint32_t distance_postfix_bits) {
+static void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
+                                      uint32_t num_direct_distance_codes,
+                                      uint32_t distance_postfix_bits) {
   if (num_direct_distance_codes == 0 &&
       distance_postfix_bits == 0) {
     return;
   }
   for (size_t i = 0; i < num_commands; ++i) {
     Command* cmd = &cmds[i];
-    if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
-      PrefixEncodeCopyDistance(cmd->DistanceCode(),
+    if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+      PrefixEncodeCopyDistance(CommandDistanceCode(cmd),
                                num_direct_distance_codes,
                                distance_postfix_bits,
                                &cmd->dist_prefix_,
@@ -50,102 +44,115 @@ void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
   }
 }
 
-bool WriteMetaBlockParallel(const BrotliParams& params,
-                            const uint32_t input_size,
-                            const uint8_t* input_buffer,
-                            const uint32_t prefix_size,
-                            const uint8_t* prefix_buffer,
-                            const bool is_first,
-                            const bool is_last,
-                            size_t* encoded_size,
-                            uint8_t* encoded_buffer) {
+/* Returns 1 on success, otherwise 0. */
+int WriteMetaBlockParallel(const BrotliParams& params,
+                           const uint32_t input_size,
+                           const uint8_t* input_buffer,
+                           const uint32_t prefix_size,
+                           const uint8_t* prefix_buffer,
+                           const int is_first,
+                           const int is_last,
+                           size_t* encoded_size,
+                           uint8_t* encoded_buffer) {
   if (input_size == 0) {
-    return false;
+    return 0;
   }
 
+  MemoryManager memory_manager;
+  MemoryManager* m = &memory_manager;
+  BrotliInitMemoryManager(m, 0, 0, 0);
+
+  uint8_t* storage;
+  size_t storage_ix;
+  uint8_t first_byte;
+  size_t first_byte_bits;
+  size_t output_size;
+  uint32_t num_direct_distance_codes;
+  uint32_t distance_postfix_bits;
+  ContextType literal_context_mode;
+  size_t last_insert_len = 0;
+  size_t num_commands = 0;
+  size_t num_literals = 0;
+  int dist_cache[4] = { -4, -4, -4, -4 };
+  Command* commands;
+  int hash_type = BROTLI_MIN(int, 10, params.quality);
+  Hashers* hashers;
+  int use_utf8_mode;
+  uint8_t prev_byte;
+  uint8_t prev_byte2;
+  const uint32_t mask = BROTLI_UINT32_MAX >> 1;
+
   /* Copy prefix + next input block into a continuous area. */
   uint32_t input_pos = prefix_size;
   /* CreateBackwardReferences reads up to 3 bytes past the end of input if the
      mask points past the end of input.
      FindMatchLengthWithLimit could do another 8 bytes look-forward. */
-  std::vector<uint8_t> input(prefix_size + input_size + 4 + 8);
-  memcpy(&input[0], prefix_buffer, prefix_size);
-  memcpy(&input[input_pos], input_buffer, input_size);
+  uint8_t* input = BROTLI_ALLOC(m, uint8_t, prefix_size + input_size + 4 + 8);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  memcpy(input, prefix_buffer, prefix_size);
+  memcpy(input + input_pos, input_buffer, input_size);
   /* Since we don't have a ringbuffer, masking is a no-op.
      We use one less bit than the full range because some of the code uses
      mask + 1 as the size of the ringbuffer. */
-  const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;
 
-  uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
-  uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
+  prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
+  prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
 
   /* Decide about UTF8 mode. */
   static const double kMinUTF8Ratio = 0.75;
-  bool utf8_mode = IsMostlyUTF8(&input[0], input_pos, mask, input_size,
-                                kMinUTF8Ratio);
+  use_utf8_mode = BrotliIsMostlyUTF8(
+      input, input_pos, mask, input_size, kMinUTF8Ratio);
 
   /* Initialize hashers. */
-  int hash_type = std::min(10, params.quality);
-  Hashers* hashers = new Hashers();
-  hashers->Init(hash_type);
+  hashers = BROTLI_ALLOC(m, Hashers, 1);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  InitHashers(hashers);
+  HashersSetup(m, hashers, hash_type);
+  if (BROTLI_IS_OOM(m)) goto oom;
 
   /* Compute backward references. */
-  size_t last_insert_len = 0;
-  size_t num_commands = 0;
-  size_t num_literals = 0;
-  int dist_cache[4] = { -4, -4, -4, -4 };
-  Command* commands = static_cast<Command*>(
-      malloc(sizeof(Command) * ((input_size + 1) >> 1)));
-  if (commands == 0) {
-    delete hashers;
-    return false;
-  }
-  CreateBackwardReferences(
-      input_size, input_pos, is_last,
-      &input[0], mask,
-      params.quality,
-      params.lgwin,
-      hashers,
-      hash_type,
-      dist_cache,
-      &last_insert_len,
-      commands,
-      &num_commands,
-      &num_literals);
-  delete hashers;
+  commands = BROTLI_ALLOC(m, Command, ((input_size + 1) >> 1));
+  if (BROTLI_IS_OOM(m)) goto oom;
+  BrotliCreateBackwardReferences(m, input_size, input_pos, is_last, input,
+      mask, params.quality, params.lgwin, hashers, hash_type, dist_cache,
+      &last_insert_len, commands, &num_commands, &num_literals);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  DestroyHashers(m, hashers);
+  BROTLI_FREE(m, hashers);
   if (last_insert_len > 0) {
-    commands[num_commands++] = Command(last_insert_len);
+    InitInsertCommand(&commands[num_commands++], last_insert_len);
     num_literals += last_insert_len;
   }
   assert(num_commands != 0);
 
   /* Build the meta-block. */
   MetaBlockSplit mb;
-  uint32_t num_direct_distance_codes =
-      params.mode == BrotliParams::MODE_FONT ? 12 : 0;
-  uint32_t distance_postfix_bits =
-      params.mode == BrotliParams::MODE_FONT ? 1 : 0;
-  ContextType literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
+  InitMetaBlockSplit(&mb);
+  num_direct_distance_codes = params.mode == BrotliParams::MODE_FONT ? 12 : 0;
+  distance_postfix_bits = params.mode == BrotliParams::MODE_FONT ? 1 : 0;
+  literal_context_mode = use_utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
   RecomputeDistancePrefixes(commands, num_commands,
                             num_direct_distance_codes,
                             distance_postfix_bits);
   if (params.quality <= 9) {
-    BuildMetaBlockGreedy(&input[0], input_pos, mask,
+    BrotliBuildMetaBlockGreedy(m, input, input_pos, mask,
+                               commands, num_commands,
+                               &mb);
+    if (BROTLI_IS_OOM(m)) goto oom;
+  } else {
+    BrotliBuildMetaBlock(m, input, input_pos, mask, params.quality,
+                         prev_byte, prev_byte2,
                          commands, num_commands,
+                         literal_context_mode,
                          &mb);
-  } else {
-    BuildMetaBlock(&input[0], input_pos, mask,
-                   prev_byte, prev_byte2,
-                   commands, num_commands,
-                   literal_context_mode,
-                   &mb);
+    if (BROTLI_IS_OOM(m)) goto oom;
   }
 
   /* Set up the temporary output storage. */
-  const size_t max_out_size = 2 * input_size + 500;
-  std::vector<uint8_t> storage(max_out_size);
-  uint8_t first_byte = 0;
-  size_t first_byte_bits = 0;
+  storage = BROTLI_ALLOC(m, uint8_t, 2 * input_size + 500);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  first_byte = 0;
+  first_byte_bits = 0;
   if (is_first) {
     if (params.lgwin == 16) {
       first_byte = 0;
@@ -159,45 +166,55 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
     }
   }
   storage[0] = static_cast<uint8_t>(first_byte);
-  size_t storage_ix = first_byte_bits;
+  storage_ix = first_byte_bits;
 
   /* Store the meta-block to the temporary output. */
-  StoreMetaBlock(&input[0], input_pos, input_size, mask,
-                 prev_byte, prev_byte2,
-                 is_last,
-                 num_direct_distance_codes,
-                 distance_postfix_bits,
-                 literal_context_mode,
-                 commands, num_commands,
-                 mb,
-                 &storage_ix, &storage[0]);
-  free(commands);
+  BrotliStoreMetaBlock(m, input, input_pos, input_size, mask,
+                       prev_byte, prev_byte2,
+                       is_last,
+                       num_direct_distance_codes,
+                       distance_postfix_bits,
+                       literal_context_mode,
+                       commands, num_commands,
+                       &mb,
+                       &storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  DestroyMetaBlockSplit(m, &mb);
+  BROTLI_FREE(m, commands);
 
   /* If this is not the last meta-block, store an empty metadata
      meta-block so that the meta-block will end at a byte boundary. */
   if (!is_last) {
-    StoreSyncMetaBlock(&storage_ix, &storage[0]);
+    BrotliStoreSyncMetaBlock(&storage_ix, storage);
   }
 
   /* If the compressed data is too large, fall back to an uncompressed
      meta-block. */
-  size_t output_size = storage_ix >> 3;
+  output_size = storage_ix >> 3;
   if (input_size + 4 < output_size) {
     storage[0] = static_cast<uint8_t>(first_byte);
     storage_ix = first_byte_bits;
-    StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
-                               input_size,
-                               &storage_ix, &storage[0]);
+    BrotliStoreUncompressedMetaBlock(is_last, input, input_pos, mask,
+                                     input_size,
+                                     &storage_ix, storage);
     output_size = storage_ix >> 3;
   }
 
   /* Copy the temporary output with size-check to the output. */
   if (output_size > *encoded_size) {
-    return false;
+    BROTLI_FREE(m, storage);
+    BROTLI_FREE(m, input);
+    return 0;
   }
-  memcpy(encoded_buffer, &storage[0], output_size);
+  memcpy(encoded_buffer, storage, output_size);
   *encoded_size = output_size;
-  return true;
+  BROTLI_FREE(m, storage);
+  BROTLI_FREE(m, input);
+  return 1;
+
+oom:
+  BrotliWipeOutMemoryManager(m);
+  return 0;
 }
 
 }  /* namespace */
@@ -217,20 +234,20 @@ int BrotliCompressBufferParallel(BrotliParams params,
   }
 
   /* Sanitize params. */
-  if (params.lgwin < kMinWindowBits) {
-    params.lgwin = kMinWindowBits;
-  } else if (params.lgwin > kMaxWindowBits) {
-    params.lgwin = kMaxWindowBits;
+  if (params.lgwin < kBrotliMinWindowBits) {
+    params.lgwin = kBrotliMinWindowBits;
+  } else if (params.lgwin > kBrotliMaxWindowBits) {
+    params.lgwin = kBrotliMaxWindowBits;
   }
   if (params.lgblock == 0) {
     params.lgblock = 16;
     if (params.quality >= 9 && params.lgwin > params.lgblock) {
-      params.lgblock = std::min(21, params.lgwin);
+      params.lgblock = BROTLI_MIN(int, 21, params.lgwin);
     }
-  } else if (params.lgblock < kMinInputBlockBits) {
-    params.lgblock = kMinInputBlockBits;
-  } else if (params.lgblock > kMaxInputBlockBits) {
-    params.lgblock = kMaxInputBlockBits;
+  } else if (params.lgblock < kBrotliMinInputBlockBits) {
+    params.lgblock = kBrotliMinInputBlockBits;
+  } else if (params.lgblock > kBrotliMaxInputBlockBits) {
+    params.lgblock = kBrotliMaxInputBlockBits;
   }
   size_t max_input_block_size = 1 << params.lgblock;
   size_t max_prefix_size = 1u << params.lgwin;
@@ -239,10 +256,10 @@ int BrotliCompressBufferParallel(BrotliParams params,
 
   /* Compress block-by-block independently. */
   for (size_t pos = 0; pos < input_size; ) {
-    uint32_t input_block_size =
-        static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
+    uint32_t input_block_size = static_cast<uint32_t>(
+        BROTLI_MIN(size_t, max_input_block_size, input_size - pos));
     uint32_t prefix_size =
-        static_cast<uint32_t>(std::min(max_prefix_size, pos));
+        static_cast<uint32_t>(BROTLI_MIN(size_t, max_prefix_size, pos));
     size_t out_size = input_block_size + (input_block_size >> 3) + 1024;
     std::vector<uint8_t> out(out_size);
     if (!WriteMetaBlockParallel(params,
@@ -250,11 +267,11 @@ int BrotliCompressBufferParallel(BrotliParams params,
                                 &input_buffer[pos],
                                 prefix_size,
                                 &input_buffer[pos - prefix_size],
-                                pos == 0,
-                                pos + input_block_size == input_size,
+                                (pos == 0) ? 1 : 0,
+                                (pos + input_block_size == input_size) ? 1 : 0,
                                 &out_size,
                                 &out[0])) {
-      return false;
+      return 0;
     }
     out.resize(out_size);
     compressed_pieces.push_back(out);
@@ -266,14 +283,14 @@ int BrotliCompressBufferParallel(BrotliParams params,
   for (size_t i = 0; i < compressed_pieces.size(); ++i) {
     const std::vector<uint8_t>& out = compressed_pieces[i];
     if (out_pos + out.size() > *encoded_size) {
-      return false;
+      return 0;
     }
     memcpy(&encoded_buffer[out_pos], &out[0], out.size());
     out_pos += out.size();
   }
   *encoded_size = out_pos;
 
-  return true;
+  return 1;
 }
 
 }  /* namespace brotli */
diff --git a/enc/encode_parallel.h b/enc/encode_parallel.h
index b85d961..b7649b1 100644
--- a/enc/encode_parallel.h
+++ b/enc/encode_parallel.h
@@ -12,7 +12,7 @@
 #define BROTLI_ENC_ENCODE_PARALLEL_H_
 
 #include "../common/types.h"
-#include "./encode.h"
+#include "./compressor.h"
 
 namespace brotli {
 
diff --git a/enc/entropy_encode.c b/enc/entropy_encode.c
index ef7361c..1c110c2 100644
--- a/enc/entropy_encode.c
+++ b/enc/entropy_encode.c
@@ -8,36 +8,46 @@
 
 #include "./entropy_encode.h"
 
-#include <algorithm>
-#include <cstdlib>
-#include <limits>
+#include <string.h>  /* memset */
 
+#include "../common/constants.h"
 #include "../common/types.h"
-#include "./histogram.h"
 #include "./port.h"
 
-namespace brotli {
-
-void SetDepth(const HuffmanTree &p,
-              HuffmanTree *pool,
-              uint8_t *depth,
-              uint8_t level) {
-  if (p.index_left_ >= 0) {
-    ++level;
-    SetDepth(pool[p.index_left_], pool, depth, level);
-    SetDepth(pool[p.index_right_or_value_], pool, depth, level);
-  } else {
-    depth[p.index_right_or_value_] = level;
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+int BrotliSetDepth(int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
+  int stack[16];
+  int level = 0;
+  int p = p0;
+  assert(max_depth <= 15);
+  stack[0] = -1;
+  while (1) {
+    if (pool[p].index_left_ >= 0) {
+      level++;
+      if (level > max_depth) return 0;
+      stack[level] = pool[p].index_right_or_value_;
+      p = pool[p].index_left_;
+      continue;
+    } else {
+      depth[pool[p].index_right_or_value_] = (uint8_t)level;
+    }
+    while (level >= 0 && stack[level] == -1) level--;
+    if (level < 0) return 1;
+    p = stack[level];
+    stack[level] = -1;
   }
 }
 
 /* Sort the root nodes, least popular first. */
-static inline bool SortHuffmanTree(const HuffmanTree& v0,
-                                   const HuffmanTree& v1) {
-  if (v0.total_count_ != v1.total_count_) {
-    return v0.total_count_ < v1.total_count_;
+static inline int SortHuffmanTree(const HuffmanTree* v0,
+                                  const HuffmanTree* v1) {
+  if (v0->total_count_ != v1->total_count_) {
+    return (v0->total_count_ < v1->total_count_) ? 1 : 0;
   }
-  return v0.index_right_or_value_ > v1.index_right_or_value_;
+  return (v0->index_right_or_value_ > v1->index_right_or_value_) ? 1 : 0;
 }
 
 /* This function will create a Huffman tree.
@@ -55,31 +65,37 @@ static inline bool SortHuffmanTree(const HuffmanTree& v0,
    we are not planning to use this with extremely long blocks.
 
    See http://en.wikipedia.org/wiki/Huffman_coding */
-void CreateHuffmanTree(const uint32_t *data,
-                       const size_t length,
-                       const int tree_limit,
-                       HuffmanTree* tree,
-                       uint8_t *depth) {
+void BrotliCreateHuffmanTree(const uint32_t *data,
+                             const size_t length,
+                             const int tree_limit,
+                             HuffmanTree* tree,
+                             uint8_t *depth) {
+  uint32_t count_limit;
+  HuffmanTree sentinel;
+  InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
   /* For block sizes below 64 kB, we never need to do a second iteration
      of this loop. Probably all of our block sizes will be smaller than
      that, so this loop is mostly of academic interest. If we actually
      would need this, we would be better off with the Katajainen algorithm. */
-  for (uint32_t count_limit = 1; ; count_limit *= 2) {
+  for (count_limit = 1; ; count_limit *= 2) {
     size_t n = 0;
-    for (size_t i = length; i != 0;) {
+    size_t i;
+    size_t j;
+    size_t k;
+    for (i = length; i != 0;) {
       --i;
       if (data[i]) {
-        const uint32_t count = std::max(data[i], count_limit);
-        tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
+        const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
+        InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
       }
     }
 
     if (n == 1) {
-      depth[tree[0].index_right_or_value_] = 1;      // Only one element.
+      depth[tree[0].index_right_or_value_] = 1;  /* Only one element. */
       break;
     }
 
-    std::sort(tree, tree + n, SortHuffmanTree);
+    SortHuffmanTreeItems(tree, n, SortHuffmanTree);
 
     /* The nodes are:
        [0, n): the sorted leaf nodes that we start with.
@@ -88,13 +104,12 @@ void CreateHuffmanTree(const uint32_t *data,
                     (n+1). These are naturally in ascending order.
        [2n]: we add a sentinel at the end as well.
        There will be (2n+1) elements at the end. */
-    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
     tree[n] = sentinel;
     tree[n + 1] = sentinel;
 
-    size_t i = 0;      /* Points to the next leaf node. */
-    size_t j = n + 1;  /* Points to the next non-leaf node. */
-    for (size_t k = n - 1; k != 0; --k) {
+    i = 0;      /* Points to the next leaf node. */
+    j = n + 1;  /* Points to the next non-leaf node. */
+    for (k = n - 1; k != 0; --k) {
       size_t left, right;
       if (tree[i].total_count_ <= tree[j].total_count_) {
         left = i;
@@ -111,21 +126,21 @@ void CreateHuffmanTree(const uint32_t *data,
         ++j;
       }
 
+      {
         /* The sentinel node becomes the parent node. */
-      size_t j_end = 2 * n - k;
-      tree[j_end].total_count_ =
-          tree[left].total_count_ + tree[right].total_count_;
-      tree[j_end].index_left_ = static_cast<int16_t>(left);
-      tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
+        size_t j_end = 2 * n - k;
+        tree[j_end].total_count_ =
+            tree[left].total_count_ + tree[right].total_count_;
+        tree[j_end].index_left_ = (int16_t)left;
+        tree[j_end].index_right_or_value_ = (int16_t)right;
 
         /* Add back the last sentinel node. */
-      tree[j_end + 1] = sentinel;
+        tree[j_end + 1] = sentinel;
+      }
     }
-    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
-
+    if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
       /* We need to pack the Huffman tree in tree_limit bits. If this was not
          successful, add fake entities to the lowest values and retry. */
-    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
       break;
     }
   }
@@ -142,7 +157,7 @@ static void Reverse(uint8_t* v, size_t start, size_t end) {
   }
 }
 
-static void WriteHuffmanTreeRepetitions(
+static void BrotliWriteHuffmanTreeRepetitions(
     const uint8_t previous_value,
     const uint8_t value,
     size_t repetitions,
@@ -163,16 +178,17 @@ static void WriteHuffmanTreeRepetitions(
     --repetitions;
   }
   if (repetitions < 3) {
-    for (size_t i = 0; i < repetitions; ++i) {
+    size_t i;
+    for (i = 0; i < repetitions; ++i) {
       tree[*tree_size] = value;
       extra_bits_data[*tree_size] = 0;
       ++(*tree_size);
     }
   } else {
-    repetitions -= 3;
     size_t start = *tree_size;
-    while (true) {
-      tree[*tree_size] = 16;
+    repetitions -= 3;
+    while (1) {
+      tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
       extra_bits_data[*tree_size] = repetitions & 0x3;
       ++(*tree_size);
       repetitions >>= 2;
@@ -186,7 +202,7 @@ static void WriteHuffmanTreeRepetitions(
   }
 }
 
-static void WriteHuffmanTreeRepetitionsZeros(
+static void BrotliWriteHuffmanTreeRepetitionsZeros(
     size_t repetitions,
     size_t* tree_size,
     uint8_t* tree,
@@ -198,16 +214,17 @@ static void WriteHuffmanTreeRepetitionsZeros(
     --repetitions;
   }
   if (repetitions < 3) {
-    for (size_t i = 0; i < repetitions; ++i) {
+    size_t i;
+    for (i = 0; i < repetitions; ++i) {
       tree[*tree_size] = 0;
       extra_bits_data[*tree_size] = 0;
       ++(*tree_size);
     }
   } else {
-    repetitions -= 3;
     size_t start = *tree_size;
-    while (true) {
-      tree[*tree_size] = 17;
+    repetitions -= 3;
+    while (1) {
+      tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
       extra_bits_data[*tree_size] = repetitions & 0x7;
       ++(*tree_size);
       repetitions >>= 3;
@@ -221,8 +238,8 @@ static void WriteHuffmanTreeRepetitionsZeros(
   }
 }
 
-void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
-                                 uint8_t* good_for_rle) {
+void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
+                                       uint8_t* good_for_rle) {
   size_t nonzero_count = 0;
   size_t stride;
   size_t limit;
@@ -260,8 +277,8 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
       /* Small histogram will model it well. */
       return;
     }
-    size_t zeros = length - nonzeros;
     if (smallest_nonzero < 4) {
+      size_t zeros = length - nonzeros;
       if (zeros < 6) {
         for (i = 1; i < length - 1; ++i) {
           if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
@@ -324,7 +341,7 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
         for (k = 0; k < stride; ++k) {
           /* We don't want to change value at counts[i],
              that is already belonging to the next stride. Thus - 1. */
-          counts[i - k - 1] = static_cast<uint32_t>(count);
+          counts[i - k - 1] = (uint32_t)count;
         }
       }
       stride = 0;
@@ -353,16 +370,18 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
 }
 
 static void DecideOverRleUse(const uint8_t* depth, const size_t length,
-                             bool *use_rle_for_non_zero,
-                             bool *use_rle_for_zero) {
+                             int *use_rle_for_non_zero,
+                             int *use_rle_for_zero) {
   size_t total_reps_zero = 0;
   size_t total_reps_non_zero = 0;
   size_t count_reps_zero = 1;
   size_t count_reps_non_zero = 1;
-  for (size_t i = 0; i < length;) {
+  size_t i;
+  for (i = 0; i < length;) {
     const uint8_t value = depth[i];
     size_t reps = 1;
-    for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
+    size_t k;
+    for (k = i + 1; k < length && depth[k] == value; ++k) {
       ++reps;
     }
     if (reps >= 3 && value == 0) {
@@ -375,20 +394,24 @@ static void DecideOverRleUse(const uint8_t* depth, const size_t length,
     }
     i += reps;
   }
-  *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
-  *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
+  *use_rle_for_non_zero =
+      (total_reps_non_zero > count_reps_non_zero * 2) ? 1 : 0;
+  *use_rle_for_zero = (total_reps_zero > count_reps_zero * 2) ? 1 : 0;
 }
 
-void WriteHuffmanTree(const uint8_t* depth,
-                      size_t length,
-                      size_t* tree_size,
-                      uint8_t* tree,
-                      uint8_t* extra_bits_data) {
-  uint8_t previous_value = 8;
+void BrotliWriteHuffmanTree(const uint8_t* depth,
+                            size_t length,
+                            size_t* tree_size,
+                            uint8_t* tree,
+                            uint8_t* extra_bits_data) {
+  uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
+  size_t i;
+  int use_rle_for_non_zero = 0;
+  int use_rle_for_zero = 0;
 
   /* Throw away trailing zeros. */
   size_t new_length = length;
-  for (size_t i = 0; i < length; ++i) {
+  for (i = 0; i < length; ++i) {
     if (depth[length - i - 1] == 0) {
       --new_length;
     } else {
@@ -397,8 +420,6 @@ void WriteHuffmanTree(const uint8_t* depth,
   }
 
   /* First gather statistics on if it is a good idea to do rle. */
-  bool use_rle_for_non_zero = false;
-  bool use_rle_for_zero = false;
   if (length > 50) {
     /* Find rle coding for longer codes.
        Shorter codes seem not to benefit from rle. */
@@ -407,73 +428,73 @@ void WriteHuffmanTree(const uint8_t* depth,
   }
 
   /* Actual rle coding. */
-  for (size_t i = 0; i < new_length;) {
+  for (i = 0; i < new_length;) {
     const uint8_t value = depth[i];
     size_t reps = 1;
     if ((value != 0 && use_rle_for_non_zero) ||
         (value == 0 && use_rle_for_zero)) {
-      for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
+      size_t k;
+      for (k = i + 1; k < new_length && depth[k] == value; ++k) {
         ++reps;
       }
     }
     if (value == 0) {
-      WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
+      BrotliWriteHuffmanTreeRepetitionsZeros(
+          reps, tree_size, tree, extra_bits_data);
     } else {
-      WriteHuffmanTreeRepetitions(previous_value,
-                                  value, reps, tree_size,
-                                  tree, extra_bits_data);
+      BrotliWriteHuffmanTreeRepetitions(previous_value,
+                                        value, reps, tree_size,
+                                        tree, extra_bits_data);
       previous_value = value;
     }
     i += reps;
   }
 }
 
-namespace {
-
-uint16_t ReverseBits(int num_bits, uint16_t bits) {
+static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
   static const size_t kLut[16] = {  /* Pre-reversed 4-bit values. */
     0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
     0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
   };
   size_t retval = kLut[bits & 0xf];
-  for (int i = 4; i < num_bits; i += 4) {
+  size_t i;
+  for (i = 4; i < num_bits; i += 4) {
     retval <<= 4;
-    bits = static_cast<uint16_t>(bits >> 4);
+    bits = (uint16_t)(bits >> 4);
     retval |= kLut[bits & 0xf];
   }
   retval >>= (-num_bits & 0x3);
-  return static_cast<uint16_t>(retval);
+  return (uint16_t)retval;
 }
 
-}  // namespace
+/* 0..15 are values for bits */
+#define MAX_HUFFMAN_BITS 16
 
-void ConvertBitDepthsToSymbols(const uint8_t *depth,
-                               size_t len,
-                               uint16_t *bits) {
+void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
+                                     size_t len,
+                                     uint16_t *bits) {
   /* In Brotli, all bit depths are [1..15]
      0 bit depth means that the symbol does not exist. */
-  const int kMaxBits = 16;  // 0..15 are values for bits
-  uint16_t bl_count[kMaxBits] = { 0 };
-  {
-    for (size_t i = 0; i < len; ++i) {
-      ++bl_count[depth[i]];
-    }
-    bl_count[0] = 0;
+  uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
+  uint16_t next_code[MAX_HUFFMAN_BITS];
+  size_t i;
+  int code = 0;
+  for (i = 0; i < len; ++i) {
+    ++bl_count[depth[i]];
   }
-  uint16_t next_code[kMaxBits];
+  bl_count[0] = 0;
   next_code[0] = 0;
-  {
-    int code = 0;
-    for (int bits = 1; bits < kMaxBits; ++bits) {
-      code = (code + bl_count[bits - 1]) << 1;
-      next_code[bits] = static_cast<uint16_t>(code);
-    }
+  for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
+    code = (code + bl_count[i - 1]) << 1;
+    next_code[i] = (uint16_t)code;
   }
-  for (size_t i = 0; i < len; ++i) {
+  for (i = 0; i < len; ++i) {
     if (depth[i]) {
-      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+      bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
     }
   }
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/entropy_encode.h b/enc/entropy_encode.h
index 9757930..c9474f7 100644
--- a/enc/entropy_encode.h
+++ b/enc/entropy_encode.h
@@ -9,29 +9,30 @@
 #ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
 #define BROTLI_ENC_ENTROPY_ENCODE_H_
 
-#include <string.h>
-
 #include "../common/types.h"
-#include "./histogram.h"
-#include "./prefix.h"
+#include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* A node of a Huffman tree. */
-struct HuffmanTree {
-  HuffmanTree() {}
-  HuffmanTree(uint32_t count, int16_t left, int16_t right)
-      : total_count_(count),
-        index_left_(left),
-        index_right_or_value_(right) {
-  }
+typedef struct HuffmanTree {
   uint32_t total_count_;
   int16_t index_left_;
   int16_t index_right_or_value_;
-};
+} HuffmanTree;
+
+static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
+    int16_t left, int16_t right) {
+  self->total_count_ = count;
+  self->index_left_ = left;
+  self->index_right_or_value_ = right;
+}
 
-void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
-              uint8_t *depth, uint8_t level);
+/* Returns 1 is assignment of depths succeded, otherwise 0. */
+BROTLI_INTERNAL int BrotliSetDepth(
+    int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
 
 /* This function will create a Huffman tree.
 
@@ -45,11 +46,11 @@ void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
    be at least 2 * length + 1 long.
 
    See http://en.wikipedia.org/wiki/Huffman_coding */
-void CreateHuffmanTree(const uint32_t *data,
-                       const size_t length,
-                       const int tree_limit,
-                       HuffmanTree* tree,
-                       uint8_t *depth);
+BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t *data,
+                                             const size_t length,
+                                             const int tree_limit,
+                                             HuffmanTree* tree,
+                                             uint8_t *depth);
 
 /* Change the population counts in a way that the consequent
    Huffman tree compression, especially its rle-part will be more
@@ -58,48 +59,63 @@ void CreateHuffmanTree(const uint32_t *data,
    length contains the size of the histogram.
    counts contains the population counts.
    good_for_rle is a buffer of at least length size */
-void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
-                                 uint8_t* good_for_rle);
+BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
+    size_t length, uint32_t* counts, uint8_t* good_for_rle);
 
 /* Write a Huffman tree from bit depths into the bitstream representation
    of a Huffman tree. The generated Huffman tree is to be compressed once
    more using a Huffman tree */
-void WriteHuffmanTree(const uint8_t* depth,
-                      size_t num,
-                      size_t* tree_size,
-                      uint8_t* tree,
-                      uint8_t* extra_bits_data);
+BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
+                                            size_t num,
+                                            size_t* tree_size,
+                                            uint8_t* tree,
+                                            uint8_t* extra_bits_data);
 
 /* Get the actual bit values for a tree of bit depths. */
-void ConvertBitDepthsToSymbols(const uint8_t *depth,
-                               size_t len,
-                               uint16_t *bits);
-
-template<int kSize>
-struct EntropyCode {
-  // How many bits for symbol.
-  uint8_t depth_[kSize];
-  // Actual bits used to represent the symbol.
-  uint16_t bits_[kSize];
-  // How many non-zero depth.
-  int count_;
-  // First four symbols with non-zero depth.
-  int symbols_[4];
-};
-
-static const int kCodeLengthCodes = 18;
-
-// Literal entropy code.
-typedef EntropyCode<256> EntropyCodeLiteral;
-// Prefix entropy codes.
-typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
-typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
-typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
-// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
-typedef EntropyCode<272> EntropyCodeContextMap;
-// Block type entropy code, 256 block types + 2 special symbols.
-typedef EntropyCode<258> EntropyCodeBlockType;
-
-}  // namespace brotli
+BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
+                                                     size_t len,
+                                                     uint16_t *bits);
+
+/* Input size optimized Shell sort. */
+typedef int (*HuffmanTreeComparator)(const HuffmanTree*, const HuffmanTree*);
+static BROTLI_INLINE void SortHuffmanTreeItems(HuffmanTree* items,
+    const size_t n, HuffmanTreeComparator comparator) {
+  static const size_t gaps[] = {132, 57, 23, 10, 4, 1};
+  if (n < 13) {
+    /* Insertion sort. */
+    size_t i;
+    for (i = 1; i < n; ++i) {
+      HuffmanTree tmp = items[i];
+      size_t k = i;
+      size_t j = i - 1;
+      while (comparator(&tmp, &items[j])) {
+        items[k] = items[j];
+        k = j;
+        if (!j--) break;
+      }
+      items[k] = tmp;
+    }
+    return;
+  } else {
+    /* Shell sort. */
+    int g = n < 57 ? 2 : 0;
+    for (; g < 6; ++g) {
+      size_t gap = gaps[g];
+      size_t i;
+      for (i = gap; i < n; ++i) {
+        size_t j = i;
+        HuffmanTree tmp = items[i];
+        for (; j >= gap && comparator(&tmp, &items[j - gap]); j -= gap) {
+          items[j] = items[j - gap];
+        }
+        items[j] = tmp;
+      }
+    }
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_ENTROPY_ENCODE_H_ */
diff --git a/enc/entropy_encode_static.h b/enc/entropy_encode_static.h
index 0e1cfa7..1b4a2a1 100644
--- a/enc/entropy_encode_static.h
+++ b/enc/entropy_encode_static.h
@@ -9,17 +9,20 @@
 #ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
 #define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
 
+#include "../common/constants.h"
+#include "../common/port.h"
 #include "../common/types.h"
-#include "./prefix.h"
 #include "./write_bits.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 static const uint8_t kCodeLengthDepth[18] = {
   4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
 };
 
-static const uint8_t kStaticCommandCodeDepth[kNumCommandPrefixes] = {
+static const uint8_t kStaticCommandCodeDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
    9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
    9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
    9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
@@ -77,11 +80,13 @@ static const uint32_t kCodeLengthBits[18] = {
   0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
 };
 
-inline void StoreStaticCodeLengthCode(size_t* storage_ix, uint8_t* storage) {
-  WriteBits(40, MAKE_UINT64_T(0xff, 0x55555554), storage_ix, storage);
+static BROTLI_INLINE void StoreStaticCodeLengthCode(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(
+      40, MAKE_UINT64_T(0x0000ffU, 0x55555554U), storage_ix, storage);
 }
 
-static const uint64_t kZeroRepsBits[704] = {
+static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
   0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
   0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
   0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
@@ -202,7 +207,7 @@ static const uint64_t kZeroRepsBits[704] = {
   0x06f9cb87, 0x08f9cb87,
 };
 
-static const uint32_t kZeroRepsDepth[704] = {
+static const uint32_t kZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
    0,  4,  8,  7,  7,  7,  7,  7,  7,  7,  7, 11, 14, 14, 14, 14,
   14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
   14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
@@ -249,7 +254,7 @@ static const uint32_t kZeroRepsDepth[704] = {
   28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
 };
 
-static const uint64_t kNonZeroRepsBits[704] = {
+static const uint64_t kNonZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
   0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
   0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
   0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
@@ -370,7 +375,7 @@ static const uint64_t kNonZeroRepsBits[704] = {
   0x2baeb6db, 0x3baeb6db,
 };
 
-static const uint32_t kNonZeroRepsDepth[704] = {
+static const uint32_t kNonZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
    6,  6,  6,  6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
   12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
   18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
@@ -417,47 +422,7 @@ static const uint32_t kNonZeroRepsDepth[704] = {
   30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
 };
 
-static const uint16_t kStaticLiteralCodeBits[256] = {
-    0,  128,   64,  192,   32,  160,   96,  224,
-   16,  144,   80,  208,   48,  176,  112,  240,
-    8,  136,   72,  200,   40,  168,  104,  232,
-   24,  152,   88,  216,   56,  184,  120,  248,
-    4,  132,   68,  196,   36,  164,  100,  228,
-   20,  148,   84,  212,   52,  180,  116,  244,
-   12,  140,   76,  204,   44,  172,  108,  236,
-   28,  156,   92,  220,   60,  188,  124,  252,
-    2,  130,   66,  194,   34,  162,   98,  226,
-   18,  146,   82,  210,   50,  178,  114,  242,
-   10,  138,   74,  202,   42,  170,  106,  234,
-   26,  154,   90,  218,   58,  186,  122,  250,
-    6,  134,   70,  198,   38,  166,  102,  230,
-   22,  150,   86,  214,   54,  182,  118,  246,
-   14,  142,   78,  206,   46,  174,  110,  238,
-   30,  158,   94,  222,   62,  190,  126,  254,
-    1,  129,   65,  193,   33,  161,   97,  225,
-   17,  145,   81,  209,   49,  177,  113,  241,
-    9,  137,   73,  201,   41,  169,  105,  233,
-   25,  153,   89,  217,   57,  185,  121,  249,
-    5,  133,   69,  197,   37,  165,  101,  229,
-   21,  149,   85,  213,   53,  181,  117,  245,
-   13,  141,   77,  205,   45,  173,  109,  237,
-   29,  157,   93,  221,   61,  189,  125,  253,
-    3,  131,   67,  195,   35,  163,   99,  227,
-   19,  147,   83,  211,   51,  179,  115,  243,
-   11,  139,   75,  203,   43,  171,  107,  235,
-   27,  155,   91,  219,   59,  187,  123,  251,
-    7,  135,   71,  199,   39,  167,  103,  231,
-   23,  151,   87,  215,   55,  183,  119,  247,
-   15,  143,   79,  207,   47,  175,  111,  239,
-   31,  159,   95,  223,   63,  191,  127,  255,
-};
-
-inline void StoreStaticLiteralHuffmanTree(size_t* storage_ix,
-                                          uint8_t* storage) {
-  WriteBits(32, 0x00010003U, storage_ix, storage);
-}
-
-static const uint16_t kStaticCommandCodeBits[kNumCommandPrefixes] = {
+static const uint16_t kStaticCommandCodeBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
     0,  256,  128,  384,   64,  320,  192,  448,
    32,  288,  160,  416,   96,  352,  224,  480,
    16,  272,  144,  400,   80,  336,  208,  464,
@@ -548,10 +513,11 @@ static const uint16_t kStaticCommandCodeBits[kNumCommandPrefixes] = {
   255, 1279,  767, 1791,  511, 1535, 1023, 2047,
 };
 
-inline void StoreStaticCommandHuffmanTree(size_t* storage_ix,
-                                          uint8_t* storage) {
-  WriteBits(28, 0x0000000006307003U, storage_ix, storage);
-  WriteBits(31, 0x0000000009262441U, storage_ix, storage);
+static BROTLI_INLINE void StoreStaticCommandHuffmanTree(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(
+      56, MAKE_UINT64_T(0x926244U, 0x16307003U), storage_ix, storage);
+  BrotliWriteBits(3, 0x00000000U, storage_ix, storage);
 }
 
 static const uint16_t kStaticDistanceCodeBits[64] = {
@@ -561,12 +527,13 @@ static const uint16_t kStaticDistanceCodeBits[64] = {
    3, 35, 19, 51, 11, 43, 27, 59,  7, 39, 23, 55, 15, 47, 31, 63,
 };
 
-inline void StoreStaticDistanceHuffmanTree(size_t* storage_ix,
-                                           uint8_t* storage) {
-  WriteBits(18, 0x000000000001dc03U, storage_ix, storage);
-  WriteBits(10, 0x00000000000000daU, storage_ix, storage);
+static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(28, 0x0369dc03U, storage_ix, storage);
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */
diff --git a/enc/fast_log.h b/enc/fast_log.h
index 6b2d8ff..ef3e73d 100644
--- a/enc/fast_log.h
+++ b/enc/fast_log.h
@@ -9,16 +9,18 @@
 #ifndef BROTLI_ENC_FAST_LOG_H_
 #define BROTLI_ENC_FAST_LOG_H_
 
-#include <assert.h>
 #include <math.h>
 
 #include "../common/types.h"
+#include "../common/port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
-static inline uint32_t Log2FloorNonZero(size_t n) {
+static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
 #ifdef __GNUC__
-  return 31u ^ static_cast<uint32_t>(__builtin_clz(static_cast<uint32_t>(n)));
+  return 31u ^ (uint32_t)__builtin_clz((uint32_t)n);
 #else
   uint32_t result = 0;
   while (n >>= 1) result++;
@@ -120,7 +122,7 @@ static const float kLog2Table[] = {
 };
 
 /* Faster logarithm for small integers, with the property of log2(0) == 0. */
-static inline double FastLog2(size_t v) {
+static BROTLI_INLINE double FastLog2(size_t v) {
   if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
     return kLog2Table[v];
   }
@@ -129,12 +131,14 @@ static inline double FastLog2(size_t v) {
   /* Visual Studio 2010 and Android API levels < 18 do not have the log2()
    * function defined, so we use log() and a multiplication instead. */
   static const double kLog2Inv = 1.4426950408889634f;
-  return log(static_cast<double>(v)) * kLog2Inv;
+  return log((double)v) * kLog2Inv;
 #else
-  return log2(static_cast<double>(v));
+  return log2((double)v);
 #endif
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_FAST_LOG_H_ */
diff --git a/enc/find_match_length.h b/enc/find_match_length.h
index ed143d1..2b38abf 100644
--- a/enc/find_match_length.h
+++ b/enc/find_match_length.h
@@ -12,14 +12,16 @@
 #include "../common/types.h"
 #include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* Separate implementation for little-endian 64-bit targets, for speed. */
 #if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
 
-static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
-                                              const uint8_t* s2,
-                                              size_t limit) {
+static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                                     const uint8_t* s2,
+                                                     size_t limit) {
   size_t matched = 0;
   size_t limit2 = (limit >> 3) + 1;  /* + 1 is for pre-decrement in while */
   while (PREDICT_TRUE(--limit2)) {
@@ -30,7 +32,7 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
     } else {
       uint64_t x =
           BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
-      size_t matching_bits = static_cast<size_t>(__builtin_ctzll(x));
+      size_t matching_bits = (size_t)__builtin_ctzll(x);
       matched += matching_bits >> 3;
       return matched;
     }
@@ -47,9 +49,9 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
   return matched;
 }
 #else
-static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
-                                              const uint8_t* s2,
-                                              size_t limit) {
+static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                                     const uint8_t* s2,
+                                                     size_t limit) {
   size_t matched = 0;
   const uint8_t* s2_limit = s2 + limit;
   const uint8_t* s2_ptr = s2;
@@ -71,6 +73,8 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
 }
 #endif
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */
diff --git a/enc/hash.h b/enc/hash.h
index 1b34f08..e872efb 100644
--- a/enc/hash.h
+++ b/enc/hash.h
@@ -10,25 +10,23 @@
 #ifndef BROTLI_ENC_HASH_H_
 #define BROTLI_ENC_HASH_H_
 
-#include <sys/types.h>
-
-#include <algorithm>
-#include <cstring>
-#include <limits>
+#include <string.h>  /* memcmp, memset */
 
+#include "../common/dictionary.h"
 #include "../common/types.h"
 #include "./dictionary_hash.h"
 #include "./fast_log.h"
 #include "./find_match_length.h"
+#include "./memory.h"
 #include "./port.h"
-#include "./prefix.h"
 #include "./static_dict.h"
-#include "./transform.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
-static const size_t kMaxTreeSearchDepth = 64;
-static const size_t kMaxTreeCompLength = 128;
+#define MAX_TREE_SEARCH_DEPTH 64
+#define MAX_TREE_COMP_LENGTH 128
 
 static const uint32_t kDistanceCacheIndex[] = {
   0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
@@ -50,12 +48,11 @@ static const uint8_t kCutoffTransforms[] = {
    * The number has been tuned heuristically against compression benchmarks. */
 static const uint32_t kHashMul32 = 0x1e35a7bd;
 
-template<int kShiftBits>
-inline uint32_t Hash(const uint8_t *data) {
+static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
   uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
   /* The higher bits contain more mixture from the multiplication,
      so we take our results from there. */
-  return h >> (32 - kShiftBits);
+  return h >> (32 - 14);
 }
 
 /* Usually, we always choose the longest backward reference. This function
@@ -74,442 +71,233 @@ inline uint32_t Hash(const uint8_t *data) {
    than the saved literals.
 
    backward_reference_offset MUST be positive. */
-inline double BackwardReferenceScore(size_t copy_length,
-                                     size_t backward_reference_offset) {
-  return 5.4 * static_cast<double>(copy_length) -
+static BROTLI_INLINE double BackwardReferenceScore(
+    size_t copy_length, size_t backward_reference_offset) {
+  return 5.4 * (double)copy_length -
       1.20 * Log2FloorNonZero(backward_reference_offset);
 }
 
-inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,
-    size_t distance_short_code) {
+static BROTLI_INLINE double BackwardReferenceScoreUsingLastDistance(
+    size_t copy_length, size_t distance_short_code) {
   static const double kDistanceShortCodeBitCost[16] = {
     -0.6, 0.95, 1.17, 1.27,
     0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
     1.05, 1.05, 1.15, 1.15, 1.25, 1.25
   };
-  return 5.4 * static_cast<double>(copy_length) -
+  return 5.4 * (double)copy_length -
       kDistanceShortCodeBitCost[distance_short_code];
 }
 
-struct BackwardMatch {
-  BackwardMatch(void) : distance(0), length_and_code(0) {}
+typedef struct BackwardMatch {
+  uint32_t distance;
+  uint32_t length_and_code;
+} BackwardMatch;
 
-  BackwardMatch(size_t dist, size_t len)
-      : distance(static_cast<uint32_t>(dist))
-      , length_and_code(static_cast<uint32_t>(len << 5)) {}
+static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
+    size_t dist, size_t len) {
+  self->distance = (uint32_t)dist;
+  self->length_and_code = (uint32_t)(len << 5);
+}
 
-  BackwardMatch(size_t dist, size_t len, size_t len_code)
-      : distance(static_cast<uint32_t>(dist))
-      , length_and_code(static_cast<uint32_t>(
-            (len << 5) | (len == len_code ? 0 : len_code))) {}
+static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
+    size_t dist, size_t len, size_t len_code) {
+  self->distance = (uint32_t)dist;
+  self->length_and_code =
+      (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
+}
 
-  size_t length(void) const {
-    return length_and_code >> 5;
-  }
-  size_t length_code(void) const {
-    size_t code = length_and_code & 31;
-    return code ? code : length();
-  }
+static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
+  return self->length_and_code >> 5;
+}
 
-  uint32_t distance;
-  uint32_t length_and_code;
-};
+static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
+  size_t code = self->length_and_code & 31;
+  return code ? code : BackwardMatchLength(self);
+}
 
-// A (forgetful) hash table to the data seen by the compressor, to
-// help create backward references to previous data.
-//
-// This is a hash map of fixed size (kBucketSize). Starting from the
-// given index, kBucketSweep buckets are used to store values of a key.
-template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
-class HashLongestMatchQuickly {
- public:
-  HashLongestMatchQuickly(void) {
-    Reset();
-  }
-  void Reset(void) {
-    need_init_ = true;
-    num_dict_lookups_ = 0;
-    num_dict_matches_ = 0;
-  }
-  void Init(void) {
-    if (need_init_) {
-      // It is not strictly necessary to fill this buffer here, but
-      // not filling will make the results of the compression stochastic
-      // (but correct). This is because random data would cause the
-      // system to find accidentally good backward references here and there.
-      memset(&buckets_[0], 0, sizeof(buckets_));
-      need_init_ = false;
-    }
-  }
-  void InitForData(const uint8_t* data, size_t num) {
-    for (size_t i = 0; i < num; ++i) {
-      const uint32_t key = HashBytes(&data[i]);
-      memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0]));
-      need_init_ = false;
-    }
-  }
-  // Look at 4 bytes at data.
-  // Compute a hash from these, and store the value somewhere within
-  // [ix .. ix+3].
-  inline void Store(const uint8_t *data, const uint32_t ix) {
-    const uint32_t key = HashBytes(data);
-    // Wiggle the value with the bucket sweep range.
-    const uint32_t off = (ix >> 3) % kBucketSweep;
-    buckets_[key + off] = ix;
-  }
+#define EXPAND_CAT(a, b) CAT(a, b)
+#define CAT(a, b) a ## b
+#define FN(X) EXPAND_CAT(X, HASHER())
 
-  // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
-  // up to the length of max_length and stores the position cur_ix in the
-  // hash table.
-  //
-  // Does not look for matches longer than max_length.
-  // Does not look for matches further away than max_backward.
-  // Writes the best found match length into best_len_out.
-  // Writes the index (&data[index]) of the start of the best match into
-  // best_distance_out.
-  inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
-                               const size_t ring_buffer_mask,
-                               const int* __restrict distance_cache,
-                               const size_t cur_ix,
-                               const size_t max_length,
-                               const size_t max_backward,
-                               size_t * __restrict best_len_out,
-                               size_t * __restrict best_len_code_out,
-                               size_t * __restrict best_distance_out,
-                               double* __restrict best_score_out) {
-    const size_t best_len_in = *best_len_out;
-    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
-    const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
-    int compare_char = ring_buffer[cur_ix_masked + best_len_in];
-    double best_score = *best_score_out;
-    size_t best_len = best_len_in;
-    size_t cached_backward = static_cast<size_t>(distance_cache[0]);
-    size_t prev_ix = cur_ix - cached_backward;
-    bool match_found = false;
-    if (prev_ix < cur_ix) {
-      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
-      if (compare_char == ring_buffer[prev_ix + best_len]) {
-        size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
-                                              &ring_buffer[cur_ix_masked],
-                                              max_length);
-        if (len >= 4) {
-          best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
-          best_len = len;
-          *best_len_out = len;
-          *best_len_code_out = len;
-          *best_distance_out = cached_backward;
-          *best_score_out = best_score;
-          compare_char = ring_buffer[cur_ix_masked + best_len];
-          if (kBucketSweep == 1) {
-            buckets_[key] = static_cast<uint32_t>(cur_ix);
-            return true;
-          } else {
-            match_found = true;
-          }
-        }
-      }
-    }
-    if (kBucketSweep == 1) {
-      // Only one to look for, don't bother to prepare for a loop.
-      prev_ix = buckets_[key];
-      buckets_[key] = static_cast<uint32_t>(cur_ix);
-      size_t backward = cur_ix - prev_ix;
-      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
-      if (compare_char != ring_buffer[prev_ix + best_len_in]) {
-        return false;
-      }
-      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
-        return false;
-      }
-      const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
-                                                  &ring_buffer[cur_ix_masked],
-                                                  max_length);
-      if (len >= 4) {
-        *best_len_out = len;
-        *best_len_code_out = len;
-        *best_distance_out = backward;
-        *best_score_out = BackwardReferenceScore(len, backward);
-        return true;
-      }
-    } else {
-      uint32_t *bucket = buckets_ + key;
-      prev_ix = *bucket++;
-      for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
-        const size_t backward = cur_ix - prev_ix;
-        prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
-        if (compare_char != ring_buffer[prev_ix + best_len]) {
-          continue;
-        }
-        if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
-          continue;
-        }
-        const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
-                                                    &ring_buffer[cur_ix_masked],
-                                                    max_length);
-        if (len >= 4) {
-          const double score = BackwardReferenceScore(len, backward);
-          if (best_score < score) {
-            best_score = score;
-            best_len = len;
-            *best_len_out = best_len;
-            *best_len_code_out = best_len;
-            *best_distance_out = backward;
-            *best_score_out = score;
-            compare_char = ring_buffer[cur_ix_masked + best_len];
-            match_found = true;
-          }
-        }
-      }
-    }
-    if (kUseDictionary && !match_found &&
-        num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
-      ++num_dict_lookups_;
-      const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
-      const uint16_t v = kStaticDictionaryHash[dict_key];
-      if (v > 0) {
-        const uint32_t len = v & 31;
-        const uint32_t dist = v >> 5;
-        const size_t offset =
-            kBrotliDictionaryOffsetsByLength[len] + len * dist;
-        if (len <= max_length) {
-          const size_t matchlen =
-              FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
-                                       &kBrotliDictionary[offset], len);
-          if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
-            const size_t transform_id = kCutoffTransforms[len - matchlen];
-            const size_t word_id =
-                transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
-                dist;
-            const size_t backward = max_backward + word_id + 1;
-            const double score = BackwardReferenceScore(matchlen, backward);
-            if (best_score < score) {
-              ++num_dict_matches_;
-              best_score = score;
-              best_len = matchlen;
-              *best_len_out = best_len;
-              *best_len_code_out = len;
-              *best_distance_out = backward;
-              *best_score_out = best_score;
-              match_found = true;
-            }
-          }
-        }
-      }
-    }
-    const uint32_t off = (cur_ix >> 3) % kBucketSweep;
-    buckets_[key + off] = static_cast<uint32_t>(cur_ix);
-    return match_found;
-  }
+#define MAX_NUM_MATCHES_H10 (64 + MAX_TREE_SEARCH_DEPTH)
 
-  enum { kHashLength = 5 };
-  enum { kHashTypeLength = 8 };
-  // HashBytes is the function that chooses the bucket to place
-  // the address in. The HashLongestMatch and HashLongestMatchQuickly
-  // classes have separate, different implementations of hashing.
-  static uint32_t HashBytes(const uint8_t *data) {
-    // Computing a hash based on 5 bytes works much better for
-    // qualities 1 and 3, where the next hash value is likely to replace
-    uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
-    // The higher bits contain more mixture from the multiplication,
-    // so we take our results from there.
-    return static_cast<uint32_t>(h >> (64 - kBucketBits));
-  }
+#define HASHER() H10
+#define HashToBinaryTree HASHER()
 
-  enum { kHashMapSize = 4 << kBucketBits };
+#define BUCKET_BITS 17
+#define BUCKET_SIZE (1 << BUCKET_BITS)
 
- private:
-  static const uint32_t kBucketSize = 1 << kBucketBits;
-  uint32_t buckets_[kBucketSize + kBucketSweep];
-  // True if buckets_ array needs to be initialized.
-  bool need_init_;
-  size_t num_dict_lookups_;
-  size_t num_dict_matches_;
-};
+static size_t FN(HashTypeLength)(void) { return 4; }
+static size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; }
 
-// A (forgetful) hash table to the data seen by the compressor, to
-// help create backward references to previous data.
-//
-// This is a hash map of fixed size (kBucketSize) to a ring buffer of
-// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
-// index positions of the given hash key in the compressed data.
-template <int kBucketBits,
-          int kBlockBits,
-          int kNumLastDistancesToCheck>
-class HashLongestMatch {
- public:
-  HashLongestMatch(void) {
-    Reset();
-  }
+static uint32_t FN(HashBytes)(const uint8_t *data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - BUCKET_BITS);
+}
 
-  void Reset(void) {
-    need_init_ = true;
-    num_dict_lookups_ = 0;
-    num_dict_matches_ = 0;
-  }
+/* A (forgetful) hash table where each hash bucket contains a binary tree of
+   sequences whose first 4 bytes share the same hash code.
+   Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
+   position in the input data. The binary tree is sorted by the lexicographic
+   order of the sequences, and it is also a max-heap with respect to the
+   starting positions. */
+typedef struct HashToBinaryTree {
+  /* The window size minus 1 */
+  size_t window_mask_;
 
-  void Init(void) {
-    if (need_init_) {
-      memset(&num_[0], 0, sizeof(num_));
-      need_init_ = false;
-    }
-  }
+  /* Hash table that maps the 4-byte hashes of the sequence to the last
+     position where this hash was found, which is the root of the binary
+     tree of sequences that share this hash bucket. */
+  uint32_t buckets_[BUCKET_SIZE];
 
-  void InitForData(const uint8_t* data, size_t num) {
-    for (size_t i = 0; i < num; ++i) {
-      const uint32_t key = HashBytes(&data[i]);
-      num_[key] = 0;
-      need_init_ = false;
+  /* The union of the binary trees of each hash bucket. The root of the tree
+     corresponding to a hash is a sequence starting at buckets_[hash] and
+     the left and right children of a sequence starting at pos are
+     forest_[2 * pos] and forest_[2 * pos + 1]. */
+  uint32_t* forest_;
+
+  /* A position used to mark a non-existent sequence, i.e. a tree is empty if
+     its root is at invalid_pos_ and a node is a leaf if both its children
+     are at invalid_pos_. */
+  uint32_t invalid_pos_;
+
+  int is_dirty_;
+} HashToBinaryTree;
+
+static void FN(Reset)(HashToBinaryTree* self) {
+  self->is_dirty_ = 1;
+}
+
+static void FN(Initialize)(HashToBinaryTree* self) {
+  self->forest_ = NULL;
+  FN(Reset)(self);
+}
+
+static void FN(Cleanup)(MemoryManager* m, HashToBinaryTree* self) {
+  BROTLI_FREE(m, self->forest_);
+}
+
+static void FN(Init)(
+    MemoryManager* m, HashToBinaryTree* self, const uint8_t* data, int lgwin,
+    size_t position, size_t bytes, int is_last) {
+  if (self->is_dirty_) {
+    uint32_t invalid_pos;
+    size_t num_nodes;
+    uint32_t i;
+    BROTLI_UNUSED(data);
+    self->window_mask_ = (1u << lgwin) - 1u;
+    invalid_pos = (uint32_t)(0 - self->window_mask_);
+    self->invalid_pos_ = invalid_pos;
+    for (i = 0; i < BUCKET_SIZE; i++) {
+      self->buckets_[i] = invalid_pos;
     }
+    num_nodes = (position == 0 && is_last) ? bytes : self->window_mask_ + 1;
+    self->forest_ = BROTLI_ALLOC(m, uint32_t, 2 * num_nodes);
+    self->is_dirty_ = 0;
+    if (BROTLI_IS_OOM(m)) return;
   }
+}
 
-  // Look at 3 bytes at data.
-  // Compute a hash from these, and store the value of ix at that position.
-  inline void Store(const uint8_t *data, const uint32_t ix) {
-    const uint32_t key = HashBytes(data);
-    const int minor_ix = num_[key] & kBlockMask;
-    buckets_[key][minor_ix] = ix;
-    ++num_[key];
-  }
+static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,
+    const size_t pos) {
+  return 2 * (pos & self->window_mask_);
+}
 
-  // Find a longest backward match of &data[cur_ix] up to the length of
-  // max_length and stores the position cur_ix in the hash table.
-  //
-  // Does not look for matches longer than max_length.
-  // Does not look for matches further away than max_backward.
-  // Writes the best found match length into best_len_out.
-  // Writes the index (&data[index]) offset from the start of the best match
-  // into best_distance_out.
-  // Write the score of the best match into best_score_out.
-  bool FindLongestMatch(const uint8_t * __restrict data,
-                        const size_t ring_buffer_mask,
-                        const int* __restrict distance_cache,
-                        const size_t cur_ix,
-                        const size_t max_length,
-                        const size_t max_backward,
-                        size_t * __restrict best_len_out,
-                        size_t * __restrict best_len_code_out,
-                        size_t * __restrict best_distance_out,
-                        double * __restrict best_score_out) {
-    *best_len_code_out = 0;
-    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
-    bool match_found = false;
-    // Don't accept a short copy from far away.
-    double best_score = *best_score_out;
-    size_t best_len = *best_len_out;
-    *best_len_out = 0;
-    // Try last distance first.
-    for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {
-      const size_t idx = kDistanceCacheIndex[i];
-      const size_t backward =
-          static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);
-      size_t prev_ix = static_cast<size_t>(cur_ix - backward);
-      if (prev_ix >= cur_ix) {
-        continue;
-      }
-      if (PREDICT_FALSE(backward > max_backward)) {
-        continue;
-      }
-      prev_ix &= ring_buffer_mask;
+static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,
+    const size_t pos) {
+  return 2 * (pos & self->window_mask_) + 1;
+}
 
-      if (cur_ix_masked + best_len > ring_buffer_mask ||
-          prev_ix + best_len > ring_buffer_mask ||
-          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
-        continue;
-      }
-      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
-                                                  &data[cur_ix_masked],
-                                                  max_length);
-      if (len >= 3 || (len == 2 && i < 2)) {
-        // Comparing for >= 2 does not change the semantics, but just saves for
-        // a few unnecessary binary logarithms in backward reference score,
-        // since we are not interested in such short matches.
-        double score = BackwardReferenceScoreUsingLastDistance(len, i);
-        if (best_score < score) {
-          best_score = score;
-          best_len = len;
-          *best_len_out = best_len;
-          *best_len_code_out = best_len;
-          *best_distance_out = backward;
-          *best_score_out = best_score;
-          match_found = true;
-        }
+/* Stores the hash of the next 4 bytes and in a single tree-traversal, the
+   hash bucket's binary tree is searched for matches and is re-rooted at the
+   current position.
+
+   If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
+   current position is searched for matches, but the state of the hash table
+   is not changed, since we can not know the final sorting order of the
+   current (incomplete) sequence.
+
+   This function must be called with increasing cur_ix positions. */
+static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
+    HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data,
+    const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
+    const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
+    BackwardMatch* BROTLI_RESTRICT matches) {
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  const size_t max_comp_len =
+      BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
+  const int should_reroot_tree = (max_length >= MAX_TREE_COMP_LENGTH) ? 1 : 0;
+  const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
+  size_t prev_ix = self->buckets_[key];
+  /* The forest index of the rightmost node of the left subtree of the new
+     root, updated as we traverse and reroot the tree of the hash bucket. */
+  size_t node_left = FN(LeftChildIndex)(self, cur_ix);
+  /* The forest index of the leftmost node of the right subtree of the new
+     root, updated as we traverse and reroot the tree of the hash bucket. */
+  size_t node_right = FN(RightChildIndex)(self, cur_ix);
+  /* The match length of the rightmost node of the left subtree of the new
+     root, updated as we traverse and reroot the tree of the hash bucket. */
+  size_t best_len_left = 0;
+  /* The match length of the leftmost node of the right subtree of the new
+     root, updated as we traverse and reroot the tree of the hash bucket. */
+  size_t best_len_right = 0;
+  size_t depth_remaining;
+  if (should_reroot_tree) {
+    self->buckets_[key] = (uint32_t)cur_ix;
+  }
+  for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
+    const size_t backward = cur_ix - prev_ix;
+    const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
+    if (backward == 0 || backward > max_backward || depth_remaining == 0) {
+      if (should_reroot_tree) {
+        self->forest_[node_left] = self->invalid_pos_;
+        self->forest_[node_right] = self->invalid_pos_;
       }
+      break;
     }
-    const uint32_t key = HashBytes(&data[cur_ix_masked]);
-    const uint32_t * __restrict const bucket = &buckets_[key][0];
-    const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
-    for (size_t i = num_[key]; i > down;) {
-      --i;
-      size_t prev_ix = bucket[i & kBlockMask];
-      const size_t backward = cur_ix - prev_ix;
-      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
-        break;
-      }
-      prev_ix &= ring_buffer_mask;
-      if (cur_ix_masked + best_len > ring_buffer_mask ||
-          prev_ix + best_len > ring_buffer_mask ||
-          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
-        continue;
+    {
+      const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
+      size_t len;
+      assert(cur_len <= MAX_TREE_COMP_LENGTH);
+      len = cur_len +
+          FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
+                                   &data[prev_ix_masked + cur_len],
+                                   max_length - cur_len);
+      assert(0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
+      if (matches && len > *best_len) {
+        *best_len = len;
+        InitBackwardMatch(matches++, backward, len);
       }
-      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
-                                                  &data[cur_ix_masked],
-                                                  max_length);
-      if (len >= 4) {
-        // Comparing for >= 3 does not change the semantics, but just saves
-        // for a few unnecessary binary logarithms in backward reference
-        // score, since we are not interested in such short matches.
-        double score = BackwardReferenceScore(len, backward);
-        if (best_score < score) {
-          best_score = score;
-          best_len = len;
-          *best_len_out = best_len;
-          *best_len_code_out = best_len;
-          *best_distance_out = backward;
-          *best_score_out = best_score;
-          match_found = true;
+      if (len >= max_comp_len) {
+        if (should_reroot_tree) {
+          self->forest_[node_left] =
+              self->forest_[FN(LeftChildIndex)(self, prev_ix)];
+          self->forest_[node_right] =
+              self->forest_[FN(RightChildIndex)(self, prev_ix)];
         }
+        break;
       }
-    }
-    buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
-    ++num_[key];
-    if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
-      size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
-      for (int k = 0; k < 2; ++k, ++dict_key) {
-        ++num_dict_lookups_;
-        const uint16_t v = kStaticDictionaryHash[dict_key];
-        if (v > 0) {
-          const size_t len = v & 31;
-          const size_t dist = v >> 5;
-          const size_t offset =
-              kBrotliDictionaryOffsetsByLength[len] + len * dist;
-          if (len <= max_length) {
-            const size_t matchlen =
-                FindMatchLengthWithLimit(&data[cur_ix_masked],
-                                         &kBrotliDictionary[offset], len);
-            if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
-              const size_t transform_id = kCutoffTransforms[len - matchlen];
-              const size_t word_id =
-                  transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
-                  dist;
-              const size_t backward = max_backward + word_id + 1;
-              double score = BackwardReferenceScore(matchlen, backward);
-              if (best_score < score) {
-                ++num_dict_matches_;
-                best_score = score;
-                best_len = matchlen;
-                *best_len_out = best_len;
-                *best_len_code_out = len;
-                *best_distance_out = backward;
-                *best_score_out = best_score;
-                match_found = true;
-              }
-            }
-          }
+      if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
+        best_len_left = len;
+        if (should_reroot_tree) {
+          self->forest_[node_left] = (uint32_t)prev_ix;
+        }
+        node_left = FN(RightChildIndex)(self, prev_ix);
+        prev_ix = self->forest_[node_left];
+      } else {
+        best_len_right = len;
+        if (should_reroot_tree) {
+          self->forest_[node_right] = (uint32_t)prev_ix;
         }
+        node_right = FN(LeftChildIndex)(self, prev_ix);
+        prev_ix = self->forest_[node_right];
       }
     }
-    return match_found;
   }
+  return matches;
+}
 
 /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
    length of max_length and stores the position cur_ix in the hash table.
@@ -518,447 +306,194 @@ class HashLongestMatch {
    matches in matches[0] to matches[*num_matches - 1]. The matches will be
    sorted by strictly increasing length and (non-strictly) increasing
    distance. */
-  size_t FindAllMatches(const uint8_t* data,
-                        const size_t ring_buffer_mask,
-                        const size_t cur_ix,
-                        const size_t max_length,
-                        const size_t max_backward,
-                        BackwardMatch* matches) {
-    BackwardMatch* const orig_matches = matches;
-    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
-    size_t best_len = 1;
-    size_t stop = cur_ix - 64;
-    if (cur_ix < 64) { stop = 0; }
-    for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
-      size_t prev_ix = i;
-      const size_t backward = cur_ix - prev_ix;
-      if (PREDICT_FALSE(backward > max_backward)) {
-        break;
-      }
-      prev_ix &= ring_buffer_mask;
-      if (data[cur_ix_masked] != data[prev_ix] ||
-          data[cur_ix_masked + 1] != data[prev_ix + 1]) {
-        continue;
-      }
-      const size_t len =
-          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
-                                   max_length);
-      if (len > best_len) {
-        best_len = len;
-        *matches++ = BackwardMatch(backward, len);
-      }
+static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,
+    const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward, const int quality,
+    BackwardMatch* matches) {
+  BackwardMatch* const orig_matches = matches;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  size_t best_len = 1;
+  const size_t short_match_max_backward = quality <= 10 ? 16 : 64;
+  size_t stop = cur_ix - short_match_max_backward;
+  uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
+  size_t i;
+  if (cur_ix < short_match_max_backward) { stop = 0; }
+  for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
+    size_t prev_ix = i;
+    const size_t backward = cur_ix - prev_ix;
+    if (PREDICT_FALSE(backward > max_backward)) {
+      break;
     }
-    const uint32_t key = HashBytes(&data[cur_ix_masked]);
-    const uint32_t * __restrict const bucket = &buckets_[key][0];
-    const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
-    for (size_t i = num_[key]; i > down;) {
-      --i;
-      size_t prev_ix = bucket[i & kBlockMask];
-      const size_t backward = cur_ix - prev_ix;
-      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
-        break;
-      }
-      prev_ix &= ring_buffer_mask;
-      if (cur_ix_masked + best_len > ring_buffer_mask ||
-          prev_ix + best_len > ring_buffer_mask ||
-          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
-        continue;
-      }
-      const size_t len =
-          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
-                                   max_length);
-      if (len > best_len) {
-        best_len = len;
-        *matches++ = BackwardMatch(backward, len);
-      }
-    }
-    buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
-    ++num_[key];
-    uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
-    for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
-      dict_matches[i] = kInvalidMatch;
+    prev_ix &= ring_buffer_mask;
+    if (data[cur_ix_masked] != data[prev_ix] ||
+        data[cur_ix_masked + 1] != data[prev_ix + 1]) {
+      continue;
     }
-    size_t minlen = std::max<size_t>(4, best_len + 1);
-    if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
-                                       &dict_matches[0])) {
-      size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
-      for (size_t l = minlen; l <= maxlen; ++l) {
-        uint32_t dict_id = dict_matches[l];
-        if (dict_id < kInvalidMatch) {
-          *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
-                                     dict_id & 31);
-        }
-      }
-    }
-    return static_cast<size_t>(matches - orig_matches);
-  }
-
-  enum { kHashLength = 4 };
-  enum { kHashTypeLength = 4 };
-
-  // HashBytes is the function that chooses the bucket to place
-  // the address in. The HashLongestMatch and HashLongestMatchQuickly
-  // classes have separate, different implementations of hashing.
-  static uint32_t HashBytes(const uint8_t *data) {
-    uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
-    // The higher bits contain more mixture from the multiplication,
-    // so we take our results from there.
-    return h >> (32 - kBucketBits);
-  }
-
-  enum { kHashMapSize = 2 << kBucketBits };
-
-  static const size_t kMaxNumMatches = 64 + (1 << kBlockBits);
-
- private:
-  // Number of hash buckets.
-  static const uint32_t kBucketSize = 1 << kBucketBits;
-
-  // Only kBlockSize newest backward references are kept,
-  // and the older are forgotten.
-  static const uint32_t kBlockSize = 1 << kBlockBits;
-
-  // Mask for accessing entries in a block (in a ringbuffer manner).
-  static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
-
-  // Number of entries in a particular bucket.
-  uint16_t num_[kBucketSize];
-
-  // Buckets containing kBlockSize of backward references.
-  uint32_t buckets_[kBucketSize][kBlockSize];
-
-  // True if num_ array needs to be initialized.
-  bool need_init_;
-
-  size_t num_dict_lookups_;
-  size_t num_dict_matches_;
-};
-
-// A (forgetful) hash table where each hash bucket contains a binary tree of
-// sequences whose first 4 bytes share the same hash code.
-// Each sequence is kMaxTreeCompLength long and is identified by its starting
-// position in the input data. The binary tree is sorted by the lexicographic
-// order of the sequences, and it is also a max-heap with respect to the
-// starting positions.
-class HashToBinaryTree {
- public:
-  HashToBinaryTree() : forest_(NULL) {
-    Reset();
-  }
-
-  ~HashToBinaryTree() {
-    delete[] forest_;
-  }
-
-  void Reset() {
-    need_init_ = true;
-  }
-
-  void Init(int lgwin, size_t position, size_t bytes, bool is_last) {
-    if (need_init_) {
-      window_mask_ = (1u << lgwin) - 1u;
-      invalid_pos_ = static_cast<uint32_t>(-window_mask_);
-      for (uint32_t i = 0; i < kBucketSize; i++) {
-        buckets_[i] = invalid_pos_;
-      }
-      size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1;
-      forest_ = new uint32_t[2 * num_nodes];
-      need_init_ = false;
-    }
-  }
-
-  // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
-  // length of max_length and stores the position cur_ix in the hash table.
-  //
-  // Sets *num_matches to the number of matches found, and stores the found
-  // matches in matches[0] to matches[*num_matches - 1]. The matches will be
-  // sorted by strictly increasing length and (non-strictly) increasing
-  // distance.
-  size_t FindAllMatches(const uint8_t* data,
-                        const size_t ring_buffer_mask,
-                        const size_t cur_ix,
-                        const size_t max_length,
-                        const size_t max_backward,
-                        BackwardMatch* matches) {
-    BackwardMatch* const orig_matches = matches;
-    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
-    size_t best_len = 1;
-    size_t stop = cur_ix - 64;
-    if (cur_ix < 64) { stop = 0; }
-    for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
-      size_t prev_ix = i;
-      const size_t backward = cur_ix - prev_ix;
-      if (PREDICT_FALSE(backward > max_backward)) {
-        break;
-      }
-      prev_ix &= ring_buffer_mask;
-      if (data[cur_ix_masked] != data[prev_ix] ||
-          data[cur_ix_masked + 1] != data[prev_ix + 1]) {
-        continue;
-      }
+    {
       const size_t len =
           FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
                                    max_length);
       if (len > best_len) {
         best_len = len;
-        *matches++ = BackwardMatch(backward, len);
+        InitBackwardMatch(matches++, backward, len);
       }
     }
-    if (best_len < max_length) {
-      matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
-                                    max_length, &best_len, matches);
-    }
-    uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
-    for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
-      dict_matches[i] = kInvalidMatch;
-    }
-    size_t minlen = std::max<size_t>(4, best_len + 1);
-    if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
-                                       &dict_matches[0])) {
-      size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
-      for (size_t l = minlen; l <= maxlen; ++l) {
+  }
+  if (best_len < max_length) {
+    matches = FN(StoreAndFindMatches)(self, data, cur_ix, ring_buffer_mask,
+        max_length, max_backward, &best_len, matches);
+  }
+  for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
+    dict_matches[i] = kInvalidMatch;
+  }
+  {
+    size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
+    if (BrotliFindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen,
+                                             max_length, &dict_matches[0])) {
+      size_t maxlen = BROTLI_MIN(
+          size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
+      size_t l;
+      for (l = minlen; l <= maxlen; ++l) {
         uint32_t dict_id = dict_matches[l];
         if (dict_id < kInvalidMatch) {
-          *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
-                                     dict_id & 31);
-        }
-      }
-    }
-    return static_cast<size_t>(matches - orig_matches);
-  }
-
-  // Stores the hash of the next 4 bytes and re-roots the binary tree at the
-  // current sequence, without returning any matches.
-  // REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
-  void Store(const uint8_t* data,
-             const size_t ring_buffer_mask,
-             const size_t cur_ix) {
-    size_t best_len = 0;
-    StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
-                        &best_len, NULL);
-  }
-
-  void StitchToPreviousBlock(size_t num_bytes,
-                             size_t position,
-                             const uint8_t* ringbuffer,
-                             size_t ringbuffer_mask) {
-    if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
-      // Store the last `kMaxTreeCompLength - 1` positions in the hasher.
-      // These could not be calculated before, since they require knowledge
-      // of both the previous and the current block.
-      const size_t i_start = position - kMaxTreeCompLength + 1;
-      const size_t i_end = std::min(position, i_start + num_bytes);
-      for (size_t i = i_start; i < i_end; ++i) {
-        // We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
-        // end of the current block and that we have at least
-        // kMaxTreeCompLength tail in the ringbuffer.
-        Store(ringbuffer, ringbuffer_mask, i);
-      }
-    }
-  }
-
-  static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
-
- private:
-  // Stores the hash of the next 4 bytes and in a single tree-traversal, the
-  // hash bucket's binary tree is searched for matches and is re-rooted at the
-  // current position.
-  //
-  // If less than kMaxTreeCompLength data is available, the hash bucket of the
-  // current position is searched for matches, but the state of the hash table
-  // is not changed, since we can not know the final sorting order of the
-  // current (incomplete) sequence.
-  //
-  // This function must be called with increasing cur_ix positions.
-  BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data,
-                                     const size_t cur_ix,
-                                     const size_t ring_buffer_mask,
-                                     const size_t max_length,
-                                     size_t* const __restrict best_len,
-                                     BackwardMatch* __restrict matches) {
-    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
-    const size_t max_backward = window_mask_ - 15;
-    const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength);
-    const bool reroot_tree = max_length >= kMaxTreeCompLength;
-    const uint32_t key = HashBytes(&data[cur_ix_masked]);
-    size_t prev_ix = buckets_[key];
-    // The forest index of the rightmost node of the left subtree of the new
-    // root, updated as we traverse and reroot the tree of the hash bucket.
-    size_t node_left = LeftChildIndex(cur_ix);
-    // The forest index of the leftmost node of the right subtree of the new
-    // root, updated as we traverse and reroot the tree of the hash bucket.
-    size_t node_right = RightChildIndex(cur_ix);
-    // The match length of the rightmost node of the left subtree of the new
-    // root, updated as we traverse and reroot the tree of the hash bucket.
-    size_t best_len_left = 0;
-    // The match length of the leftmost node of the right subtree of the new
-    // root, updated as we traverse and reroot the tree of the hash bucket.
-    size_t best_len_right = 0;
-    if (reroot_tree) {
-      buckets_[key] = static_cast<uint32_t>(cur_ix);
-    }
-    for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) {
-      const size_t backward = cur_ix - prev_ix;
-      const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
-      if (backward == 0 || backward > max_backward || depth_remaining == 0) {
-        if (reroot_tree) {
-          forest_[node_left] = invalid_pos_;
-          forest_[node_right] = invalid_pos_;
-        }
-        break;
-      }
-      const size_t cur_len = std::min(best_len_left, best_len_right);
-      const size_t len = cur_len +
-          FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
-                                   &data[prev_ix_masked + cur_len],
-                                   max_length - cur_len);
-      if (len > *best_len) {
-        *best_len = len;
-        if (matches) {
-          *matches++ = BackwardMatch(backward, len);
-        }
-        if (len >= max_comp_len) {
-          if (reroot_tree) {
-            forest_[node_left] = forest_[LeftChildIndex(prev_ix)];
-            forest_[node_right] = forest_[RightChildIndex(prev_ix)];
-          }
-          break;
+          InitDictionaryBackwardMatch(matches++,
+              max_backward + (dict_id >> 5) + 1, l, dict_id & 31);
         }
       }
-      if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
-        best_len_left = len;
-        if (reroot_tree) {
-          forest_[node_left] = static_cast<uint32_t>(prev_ix);
-        }
-        node_left = RightChildIndex(prev_ix);
-        prev_ix = forest_[node_left];
-      } else {
-        best_len_right = len;
-        if (reroot_tree) {
-          forest_[node_right] = static_cast<uint32_t>(prev_ix);
-        }
-        node_right = LeftChildIndex(prev_ix);
-        prev_ix = forest_[node_right];
-      }
     }
-    return matches;
-  }
-
-  inline size_t LeftChildIndex(const size_t pos) {
-    return 2 * (pos & window_mask_);
-  }
-
-  inline size_t RightChildIndex(const size_t pos) {
-    return 2 * (pos & window_mask_) + 1;
-  }
-
-  static uint32_t HashBytes(const uint8_t *data) {
-    uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
-    // The higher bits contain more mixture from the multiplication,
-    // so we take our results from there.
-    return h >> (32 - kBucketBits);
-  }
-
-  static const int kBucketBits = 17;
-  static const size_t kBucketSize = 1 << kBucketBits;
-
-  // The window size minus 1
-  size_t window_mask_;
-
-  // Hash table that maps the 4-byte hashes of the sequence to the last
-  // position where this hash was found, which is the root of the binary
-  // tree of sequences that share this hash bucket.
-  uint32_t buckets_[kBucketSize];
-
-  // The union of the binary trees of each hash bucket. The root of the tree
-  // corresponding to a hash is a sequence starting at buckets_[hash] and
-  // the left and right children of a sequence starting at pos are
-  // forest_[2 * pos] and forest_[2 * pos + 1].
-  uint32_t* forest_;
-
-  // A position used to mark a non-existent sequence, i.e. a tree is empty if
-  // its root is at invalid_pos_ and a node is a leaf if both its children
-  // are at invalid_pos_.
-  uint32_t invalid_pos_;
-
-  bool need_init_;
-};
-
-struct Hashers {
-  // For kBucketSweep == 1, enabling the dictionary lookup makes compression
-  // a little faster (0.5% - 1%) and it compresses 0.15% better on small text
-  // and html inputs.
-  typedef HashLongestMatchQuickly<16, 1, true> H2;
-  typedef HashLongestMatchQuickly<16, 2, false> H3;
-  typedef HashLongestMatchQuickly<17, 4, true> H4;
-  typedef HashLongestMatch<14, 4, 4> H5;
-  typedef HashLongestMatch<14, 5, 4> H6;
-  typedef HashLongestMatch<15, 6, 10> H7;
-  typedef HashLongestMatch<15, 7, 10> H8;
-  typedef HashLongestMatch<15, 8, 16> H9;
-  typedef HashToBinaryTree H10;
-
-  Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
-                  hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {}
-
-  ~Hashers(void) {
-    delete hash_h2;
-    delete hash_h3;
-    delete hash_h4;
-    delete hash_h5;
-    delete hash_h6;
-    delete hash_h7;
-    delete hash_h8;
-    delete hash_h9;
-    delete hash_h10;
   }
+  return (size_t)(matches - orig_matches);
+}
 
-  void Init(int type) {
-    switch (type) {
-      case 2: hash_h2 = new H2; break;
-      case 3: hash_h3 = new H3; break;
-      case 4: hash_h4 = new H4; break;
-      case 5: hash_h5 = new H5; break;
-      case 6: hash_h6 = new H6; break;
-      case 7: hash_h7 = new H7; break;
-      case 8: hash_h8 = new H8; break;
-      case 9: hash_h9 = new H9; break;
-      case 10: hash_h10 = new H10; break;
-      default: break;
-    }
-  }
+/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
+   current sequence, without returning any matches.
+   REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
+static BROTLI_INLINE void FN(Store)(HashToBinaryTree* self, const uint8_t *data,
+    const size_t mask, const size_t ix) {
+  /* Maximum distance is window size - 16, see section 9.1. of the spec. */
+  const size_t max_backward = self->window_mask_ - 15;
+  FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
+      max_backward, NULL, NULL);
+}
 
-  template<typename Hasher>
-  void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
-    hasher->Init();
-    for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
-      hasher->Store(&dict[i], static_cast<uint32_t>(i));
-    }
+static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,
+    const uint8_t *data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  size_t i = ix_start + 63 <= ix_end ? ix_end - 63 : ix_start;
+  for (; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
   }
+}
 
-/* Custom LZ77 window. */
-  void PrependCustomDictionary(
-      int type, int lgwin, const size_t size, const uint8_t* dict) {
-    switch (type) {
-      case 2: WarmupHash(size, dict, hash_h2); break;
-      case 3: WarmupHash(size, dict, hash_h3); break;
-      case 4: WarmupHash(size, dict, hash_h4); break;
-      case 5: WarmupHash(size, dict, hash_h5); break;
-      case 6: WarmupHash(size, dict, hash_h6); break;
-      case 7: WarmupHash(size, dict, hash_h7); break;
-      case 8: WarmupHash(size, dict, hash_h8); break;
-      case 9: WarmupHash(size, dict, hash_h9); break;
-      case 10:
-        hash_h10->Init(lgwin, 0, size, false);
-        for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
-          hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
-        }
-        break;
-      default: break;
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashToBinaryTree* self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 &&
+      position >= MAX_TREE_COMP_LENGTH) {
+    /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
+    const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
+    size_t i;
+    for (i = i_start; i < i_end; ++i) {
+      /* Maximum distance is window size - 16, see section 9.1. of the spec.
+         Furthermore, we have to make sure that we don't look further back
+         from the start of the next block than the window size, otherwise we
+         could access already overwritten areas of the ringbuffer. */
+      const size_t max_backward =
+          self->window_mask_ - BROTLI_MAX(size_t, 15, position - i);
+      /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
+         end of the current block and that we have at least
+         MAX_TREE_COMP_LENGTH tail in the ringbuffer. */
+      FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
+          MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
     }
   }
+}
 
-
+#undef BUCKET_SIZE
+#undef BUCKET_BITS
+
+#undef HASHER
+
+/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
+   a little faster (0.5% - 1%) and it compresses 0.15% better on small text
+   and html inputs. */
+
+#define HASHER() H2
+#define BUCKET_BITS 16
+#define BUCKET_SWEEP 1
+#define USE_DICTIONARY 1
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef BUCKET_SWEEP
+#undef USE_DICTIONARY
+#undef HASHER
+
+#define HASHER() H3
+#define BUCKET_SWEEP 2
+#define USE_DICTIONARY 0
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef BUCKET_SWEEP
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H4
+#define BUCKET_BITS 17
+#define BUCKET_SWEEP 4
+#define USE_DICTIONARY 1
+#include "./hash_longest_match_quickly_inc.h"  /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef BUCKET_SWEEP
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H5
+#define BUCKET_BITS 14
+#define BLOCK_BITS 4
+#define NUM_LAST_DISTANCES_TO_CHECK 4
+#include "./hash_longest_match_inc.h"  /* NOLINT(build/include) */
+#undef BLOCK_BITS
+#undef HASHER
+
+#define HASHER() H6
+#define BLOCK_BITS 5
+#include "./hash_longest_match_inc.h"  /* NOLINT(build/include) */
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef BLOCK_BITS
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H7
+#define BUCKET_BITS 15
+#define BLOCK_BITS 6
+#define NUM_LAST_DISTANCES_TO_CHECK 10
+#include "./hash_longest_match_inc.h"  /* NOLINT(build/include) */
+#undef BLOCK_BITS
+#undef HASHER
+
+#define HASHER() H8
+#define BLOCK_BITS 7
+#include "./hash_longest_match_inc.h"  /* NOLINT(build/include) */
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef BLOCK_BITS
+#undef HASHER
+
+#define HASHER() H9
+#define BLOCK_BITS 8
+#define NUM_LAST_DISTANCES_TO_CHECK 16
+#include "./hash_longest_match_inc.h"  /* NOLINT(build/include) */
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef BLOCK_BITS
+#undef BUCKET_BITS
+#undef HASHER
+
+#undef FN
+#undef CAT
+#undef EXPAND_CAT
+
+typedef struct Hashers {
   H2* hash_h2;
   H3* hash_h3;
   H4* hash_h4;
@@ -968,8 +503,131 @@ struct Hashers {
   H8* hash_h8;
   H9* hash_h9;
   H10* hash_h10;
-};
+} Hashers;
+
+static BROTLI_INLINE void InitHashers(Hashers* self) {
+  self->hash_h2 = 0;
+  self->hash_h3 = 0;
+  self->hash_h4 = 0;
+  self->hash_h5 = 0;
+  self->hash_h6 = 0;
+  self->hash_h7 = 0;
+  self->hash_h8 = 0;
+  self->hash_h9 = 0;
+  self->hash_h10 = 0;
+}
+
+static BROTLI_INLINE void DestroyHashers(MemoryManager* m, Hashers* self) {
+  BROTLI_FREE(m, self->hash_h2);
+  BROTLI_FREE(m, self->hash_h3);
+  BROTLI_FREE(m, self->hash_h4);
+  BROTLI_FREE(m, self->hash_h5);
+  BROTLI_FREE(m, self->hash_h6);
+  BROTLI_FREE(m, self->hash_h7);
+  BROTLI_FREE(m, self->hash_h8);
+  BROTLI_FREE(m, self->hash_h9);
+  if (self->hash_h10) CleanupH10(m, self->hash_h10);
+  BROTLI_FREE(m, self->hash_h10);
+}
+
+static BROTLI_INLINE void HashersSetup(
+    MemoryManager* m, Hashers* self, int type) {
+  switch (type) {
+    case 2:
+      self->hash_h2 = BROTLI_ALLOC(m, H2, 1);
+      if (BROTLI_IS_OOM(m)) return;
+      ResetH2(self->hash_h2);
+    break;
+
+    case 3:
+      self->hash_h3 = BROTLI_ALLOC(m, H3, 1);
+      if (BROTLI_IS_OOM(m)) return;
+      ResetH3(self->hash_h3);
+    break;
+
+    case 4:
+      self->hash_h4 = BROTLI_ALLOC(m, H4, 1);
+      if (BROTLI_IS_OOM(m)) return;
+      ResetH4(self->hash_h4);
+    break;
+
+    case 5:
+      self->hash_h5 = BROTLI_ALLOC(m, H5, 1);
+      if (BROTLI_IS_OOM(m)) return;
+      ResetH5(self->hash_h5);
+    break;
+
+    case 6:
+      self->hash_h6 = BROTLI_ALLOC(m, H6, 1);
+      if (BROTLI_IS_OOM(m)) return;
+      ResetH6(self->hash_h6);
+    break;
+
+    case 7:
+      self->hash_h7 = BROTLI_ALLOC(m, H7, 1);
+      if (BROTLI_IS_OOM(m)) return;
+      ResetH7(self->hash_h7);
+    break;
+
+    case 8:
+      self->hash_h8 = BROTLI_ALLOC(m, H8, 1);
+      if (BROTLI_IS_OOM(m)) return;
+      ResetH8(self->hash_h8);
+    break;
+
+    case 9:
+      self->hash_h9 = BROTLI_ALLOC(m, H9, 1);
+      if (BROTLI_IS_OOM(m)) return;
+      ResetH9(self->hash_h9);
+    break;
+
+    case 10:
+      self->hash_h10 = BROTLI_ALLOC(m, H10, 1);
+      if (BROTLI_IS_OOM(m)) return;
+      InitializeH10(self->hash_h10);
+    break;
+
+    default: break;
+  }
+}
+
+#define _TEMPLATE(Hasher)                                                      \
+static BROTLI_INLINE void WarmupHash ## Hasher(MemoryManager* m,               \
+    const int lgwin, const size_t size, const uint8_t* dict, Hasher* hasher) { \
+  size_t overlap = (StoreLookahead ## Hasher()) - 1;                           \
+  size_t i;                                                                    \
+  Init ## Hasher(m, hasher, dict, lgwin, 0, size, 0);                          \
+  if (BROTLI_IS_OOM(m)) return;                                                \
+  for (i = 0; i + overlap < size; i++) {                                       \
+    Store ## Hasher(hasher, dict, ~(size_t)0, i);                              \
+  }                                                                            \
+}
+_TEMPLATE(H2) _TEMPLATE(H3) _TEMPLATE(H4) _TEMPLATE(H5) _TEMPLATE(H6)
+_TEMPLATE(H7) _TEMPLATE(H8) _TEMPLATE(H9) _TEMPLATE(H10)
+#undef _TEMPLATE
+
+/* Custom LZ77 window. */
+static BROTLI_INLINE void HashersPrependCustomDictionary(
+    MemoryManager* m, Hashers* self, int type, int lgwin, const size_t size,
+    const uint8_t* dict) {
+  switch (type) {
+    case 2: WarmupHashH2(m, lgwin, size, dict, self->hash_h2); break;
+    case 3: WarmupHashH3(m, lgwin, size, dict, self->hash_h3); break;
+    case 4: WarmupHashH4(m, lgwin, size, dict, self->hash_h4); break;
+    case 5: WarmupHashH5(m, lgwin, size, dict, self->hash_h5); break;
+    case 6: WarmupHashH6(m, lgwin, size, dict, self->hash_h6); break;
+    case 7: WarmupHashH7(m, lgwin, size, dict, self->hash_h7); break;
+    case 8: WarmupHashH8(m, lgwin, size, dict, self->hash_h8); break;
+    case 9: WarmupHashH9(m, lgwin, size, dict, self->hash_h9); break;
+    case 10: WarmupHashH10(m, lgwin, size, dict, self->hash_h10); break;
+    default: break;
+  }
+  if (BROTLI_IS_OOM(m)) return;
+}
+
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_HASH_H_ */
diff --git a/enc/hash_longest_match_inc.h b/enc/hash_longest_match_inc.h
new file mode 100644
index 0000000..3c2fbbd
--- /dev/null
+++ b/enc/hash_longest_match_inc.h
@@ -0,0 +1,285 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, BLOCK_BITS,
+                        NUM_LAST_DISTANCES_TO_CHECK */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (BUCKET_SIZE) to a ring buffer of
+   fixed size (BLOCK_SIZE). The ring buffer contains the last BLOCK_SIZE
+   index positions of the given hash key in the compressed data. */
+
+#define HashLongestMatch HASHER()
+
+/* Number of hash buckets. */
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+/* Only BLOCK_SIZE newest backward references are kept,
+   and the older are forgotten. */
+#define BLOCK_SIZE (1u << BLOCK_BITS)
+
+/* Mask for accessing entries in a block (in a ringbuffer manner). */
+#define BLOCK_MASK ((1 << BLOCK_BITS) - 1)
+
+#define HASH_MAP_SIZE (2 << BUCKET_BITS)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* HashBytes is the function that chooses the bucket to place
+   the address in. The HashLongestMatch and HashLongestMatchQuickly
+   classes have separate, different implementations of hashing. */
+static uint32_t FN(HashBytes)(const uint8_t *data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - BUCKET_BITS);
+}
+
+typedef struct HashLongestMatch {
+  /* Number of entries in a particular bucket. */
+  uint16_t num_[BUCKET_SIZE];
+
+  /* Buckets containing BLOCK_SIZE of backward references. */
+  uint32_t buckets_[BLOCK_SIZE << BUCKET_BITS];
+
+  /* True if num_ array needs to be initialized. */
+  int is_dirty_;
+
+  size_t num_dict_lookups_;
+  size_t num_dict_matches_;
+} HashLongestMatch;
+
+static void FN(Reset)(HashLongestMatch* self) {
+  self->is_dirty_ = 1;
+  self->num_dict_lookups_ = 0;
+  self->num_dict_matches_ = 0;
+}
+
+static void FN(InitEmpty)(HashLongestMatch* self) {
+  if (self->is_dirty_) {
+    memset(self->num_, 0, sizeof(self->num_));
+    self->is_dirty_ = 0;
+  }
+}
+
+static void FN(InitForData)(HashLongestMatch* self, const uint8_t* data,
+    size_t num) {
+  size_t i;
+  for (i = 0; i < num; ++i) {
+    const uint32_t key = FN(HashBytes)(&data[i]);
+    self->num_[key] = 0;
+  }
+  if (num != 0) {
+    self->is_dirty_ = 0;
+  }
+}
+
+static void FN(Init)(
+    MemoryManager* m, HashLongestMatch* self, const uint8_t* data, int lgwin,
+    size_t position, size_t bytes, int is_last) {
+  /* Choose which init method is faster.
+     Init() is about 100 times faster than InitForData(). */
+  const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
+  BROTLI_UNUSED(m);
+  BROTLI_UNUSED(lgwin);
+  if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
+    FN(InitForData)(self, data, bytes);
+  } else {
+    FN(InitEmpty)(self);
+  }
+}
+
+/* Look at 4 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value of ix at that position. */
+static BROTLI_INLINE void FN(Store)(HashLongestMatch* self, const uint8_t *data,
+    const size_t mask, const size_t ix) {
+  const uint32_t key = FN(HashBytes)(&data[ix & mask]);
+  const size_t minor_ix = self->num_[key] & BLOCK_MASK;
+  self->buckets_[minor_ix + (key << BLOCK_BITS)] = (uint32_t)ix;
+  ++self->num_[key];
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* self,
+    const uint8_t *data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashLongestMatch* self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best found match length into best_len_out.
+   Writes the index (&data[index]) offset from the start of the best match
+   into best_distance_out.
+   Write the score of the best match into best_score_out.
+   Returns 1 when match is found, otherwise 0. */
+static BROTLI_INLINE int FN(FindLongestMatch)(HashLongestMatch* self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    size_t* BROTLI_RESTRICT best_len_out,
+    size_t* BROTLI_RESTRICT best_len_code_out,
+    size_t* BROTLI_RESTRICT best_distance_out,
+    double* BROTLI_RESTRICT best_score_out) {
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  int is_match_found = 0;
+  /* Don't accept a short copy from far away. */
+  double best_score = *best_score_out;
+  size_t best_len = *best_len_out;
+  size_t i;
+  *best_len_code_out = 0;
+  *best_len_out = 0;
+  /* Try last distance first. */
+  for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
+    const size_t idx = kDistanceCacheIndex[i];
+    const size_t backward =
+        (size_t)(distance_cache[idx] + kDistanceCacheOffset[i]);
+    size_t prev_ix = (size_t)(cur_ix - backward);
+    if (prev_ix >= cur_ix) {
+      continue;
+    }
+    if (PREDICT_FALSE(backward > max_backward)) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+
+    if (cur_ix_masked + best_len > ring_buffer_mask ||
+        prev_ix + best_len > ring_buffer_mask ||
+        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+      continue;
+    }
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        /* Comparing for >= 2 does not change the semantics, but just saves for
+           a few unnecessary binary logarithms in backward reference score,
+           since we are not interested in such short matches. */
+        double score = BackwardReferenceScoreUsingLastDistance(len, i);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          *best_len_out = best_len;
+          *best_len_code_out = best_len;
+          *best_distance_out = backward;
+          *best_score_out = best_score;
+          is_match_found = 1;
+        }
+      }
+    }
+  }
+  {
+    const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
+    const uint32_t * BROTLI_RESTRICT const bucket =
+        &self->buckets_[key << BLOCK_BITS];
+    const size_t down =
+        (self->num_[key] > BLOCK_SIZE) ? (self->num_[key] - BLOCK_SIZE) : 0u;
+    for (i = self->num_[key]; i > down;) {
+      size_t prev_ix = bucket[--i & BLOCK_MASK];
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          double score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            *best_len_out = best_len;
+            *best_len_code_out = best_len;
+            *best_distance_out = backward;
+            *best_score_out = best_score;
+            is_match_found = 1;
+          }
+        }
+      }
+    }
+    self->buckets_[(key << BLOCK_BITS) + (self->num_[key] & BLOCK_MASK)] =
+        (uint32_t)cur_ix;
+    ++self->num_[key];
+  }
+  if (!is_match_found &&
+      self->num_dict_matches_ >= (self->num_dict_lookups_ >> 7)) {
+    size_t dict_key = Hash14(&data[cur_ix_masked]) << 1;
+    int k;
+    for (k = 0; k < 2; ++k, ++dict_key) {
+      const uint16_t v = kStaticDictionaryHash[dict_key];
+      ++self->num_dict_lookups_;
+      if (v > 0) {
+        const size_t len = v & 31;
+        const size_t dist = v >> 5;
+        const size_t offset =
+            kBrotliDictionaryOffsetsByLength[len] + len * dist;
+        if (len <= max_length) {
+          const size_t matchlen =
+              FindMatchLengthWithLimit(&data[cur_ix_masked],
+                                       &kBrotliDictionary[offset], len);
+          if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
+            const size_t transform_id = kCutoffTransforms[len - matchlen];
+            const size_t word_id = dist +
+                transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]);
+            const size_t backward = max_backward + word_id + 1;
+            double score = BackwardReferenceScore(matchlen, backward);
+            if (best_score < score) {
+              ++self->num_dict_matches_;
+              best_score = score;
+              best_len = matchlen;
+              *best_len_out = best_len;
+              *best_len_code_out = len;
+              *best_distance_out = backward;
+              *best_score_out = best_score;
+              is_match_found = 1;
+            }
+          }
+        }
+      }
+    }
+  }
+  return is_match_found;
+}
+
+#undef HASH_MAP_SIZE
+#undef BLOCK_MASK
+#undef BLOCK_SIZE
+#undef BUCKET_SIZE
+
+#undef HashLongestMatch
diff --git a/enc/hash_longest_match_quickly_inc.h b/enc/hash_longest_match_quickly_inc.h
new file mode 100644
index 0000000..dedc36c
--- /dev/null
+++ b/enc/hash_longest_match_quickly_inc.h
@@ -0,0 +1,268 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP, USE_DICTIONARY */
+
+#define HashLongestMatchQuickly HASHER()
+
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+#define HASH_MAP_SIZE (4 << BUCKET_BITS)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
+
+/* HashBytes is the function that chooses the bucket to place
+   the address in. The HashLongestMatch and HashLongestMatchQuickly
+   classes have separate, different implementations of hashing. */
+static uint32_t FN(HashBytes)(const uint8_t *data) {
+  /* Computing a hash based on 5 bytes works much better for
+     qualities 1 and 3, where the next hash value is likely to replace */
+  uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> (64 - BUCKET_BITS));
+}
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (BUCKET_SIZE). Starting from the
+   given index, BUCKET_SWEEP buckets are used to store values of a key. */
+typedef struct HashLongestMatchQuickly {
+  uint32_t buckets_[BUCKET_SIZE + BUCKET_SWEEP];
+  /* True if buckets_ array needs to be initialized. */
+  int is_dirty_;
+  size_t num_dict_lookups_;
+  size_t num_dict_matches_;
+} HashLongestMatchQuickly;
+
+static void FN(Reset)(HashLongestMatchQuickly* self) {
+  self->is_dirty_ = 1;
+  self->num_dict_lookups_ = 0;
+  self->num_dict_matches_ = 0;
+}
+
+static void FN(InitEmpty)(HashLongestMatchQuickly* self) {
+  if (self->is_dirty_) {
+    /* It is not strictly necessary to fill this buffer here, but
+       not filling will make the results of the compression stochastic
+       (but correct). This is because random data would cause the
+       system to find accidentally good backward references here and there. */
+    memset(&self->buckets_[0], 0, sizeof(self->buckets_));
+    self->is_dirty_ = 0;
+  }
+}
+
+static void FN(InitForData)(HashLongestMatchQuickly* self, const uint8_t* data,
+    size_t num) {
+  size_t i;
+  for (i = 0; i < num; ++i) {
+    const uint32_t key = FN(HashBytes)(&data[i]);
+    memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
+  }
+  if (num != 0) {
+    self->is_dirty_ = 0;
+  }
+}
+
+static void FN(Init)(
+    MemoryManager* m, HashLongestMatchQuickly* self, const uint8_t* data,
+    int lgwin, size_t position, size_t bytes, int is_last) {
+  /* Choose which init method is faster.
+     Init() is about 100 times faster than InitForData(). */
+  const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
+  BROTLI_UNUSED(m);
+  BROTLI_UNUSED(lgwin);
+  if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
+    FN(InitForData)(self, data, bytes);
+  } else {
+    FN(InitEmpty)(self);
+  }
+}
+
+/* Look at 5 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value somewhere within
+   [ix .. ix+3]. */
+static BROTLI_INLINE void FN(Store)(HashLongestMatchQuickly* self,
+    const uint8_t *data, const size_t mask, const size_t ix) {
+  const uint32_t key = FN(HashBytes)(&data[ix & mask]);
+  /* Wiggle the value with the bucket sweep range. */
+  const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
+  self->buckets_[key + off] = (uint32_t)ix;
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashLongestMatchQuickly* self,
+    const uint8_t *data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashLongestMatchQuickly* self, size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+/* Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
+   up to the length of max_length and stores the position cur_ix in the
+   hash table.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best found match length into best_len_out.
+   Writes the index (&data[index]) of the start of the best match into
+   best_distance_out.
+   Returns 1 if match is found, otherwise 0. */
+static BROTLI_INLINE int FN(FindLongestMatch)(HashLongestMatchQuickly* self,
+    const uint8_t* BROTLI_RESTRICT ring_buffer, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    size_t* BROTLI_RESTRICT best_len_out,
+    size_t* BROTLI_RESTRICT best_len_code_out,
+    size_t* BROTLI_RESTRICT best_distance_out,
+    double* BROTLI_RESTRICT best_score_out) {
+  const size_t best_len_in = *best_len_out;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  const uint32_t key = FN(HashBytes)(&ring_buffer[cur_ix_masked]);
+  int compare_char = ring_buffer[cur_ix_masked + best_len_in];
+  double best_score = *best_score_out;
+  size_t best_len = best_len_in;
+  size_t cached_backward = (size_t)distance_cache[0];
+  size_t prev_ix = cur_ix - cached_backward;
+  int is_match_found = 0;
+  if (prev_ix < cur_ix) {
+    prev_ix &= (uint32_t)ring_buffer_mask;
+    if (compare_char == ring_buffer[prev_ix + best_len]) {
+      size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                            &ring_buffer[cur_ix_masked],
+                                            max_length);
+      if (len >= 4) {
+        best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
+        best_len = len;
+        *best_len_out = len;
+        *best_len_code_out = len;
+        *best_distance_out = cached_backward;
+        *best_score_out = best_score;
+        compare_char = ring_buffer[cur_ix_masked + best_len];
+        if (BUCKET_SWEEP == 1) {
+          self->buckets_[key] = (uint32_t)cur_ix;
+          return 1;
+        } else {
+          is_match_found = 1;
+        }
+      }
+    }
+  }
+  if (BUCKET_SWEEP == 1) {
+    size_t backward;
+    size_t len;
+    /* Only one to look for, don't bother to prepare for a loop. */
+    prev_ix = self->buckets_[key];
+    self->buckets_[key] = (uint32_t)cur_ix;
+    backward = cur_ix - prev_ix;
+    prev_ix &= (uint32_t)ring_buffer_mask;
+    if (compare_char != ring_buffer[prev_ix + best_len_in]) {
+      return 0;
+    }
+    if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+      return 0;
+    }
+    len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                   &ring_buffer[cur_ix_masked],
+                                   max_length);
+    if (len >= 4) {
+      *best_len_out = len;
+      *best_len_code_out = len;
+      *best_distance_out = backward;
+      *best_score_out = BackwardReferenceScore(len, backward);
+      return 1;
+    }
+  } else {
+    uint32_t *bucket = self->buckets_ + key;
+    int i;
+    prev_ix = *bucket++;
+    for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) {
+      const size_t backward = cur_ix - prev_ix;
+      size_t len;
+      prev_ix &= (uint32_t)ring_buffer_mask;
+      if (compare_char != ring_buffer[prev_ix + best_len]) {
+        continue;
+      }
+      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        continue;
+      }
+      len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                     &ring_buffer[cur_ix_masked],
+                                     max_length);
+      if (len >= 4) {
+        const double score = BackwardReferenceScore(len, backward);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          *best_len_out = best_len;
+          *best_len_code_out = best_len;
+          *best_distance_out = backward;
+          *best_score_out = score;
+          compare_char = ring_buffer[cur_ix_masked + best_len];
+          is_match_found = 1;
+        }
+      }
+    }
+  }
+  if (USE_DICTIONARY && !is_match_found &&
+      self->num_dict_matches_ >= (self->num_dict_lookups_ >> 7)) {
+    const uint32_t dict_key = Hash14(&ring_buffer[cur_ix_masked]) << 1;
+    const uint16_t v = kStaticDictionaryHash[dict_key];
+    ++self->num_dict_lookups_;
+    if (v > 0) {
+      const uint32_t len = v & 31;
+      const uint32_t dist = v >> 5;
+      const size_t offset =
+          kBrotliDictionaryOffsetsByLength[len] + len * dist;
+      if (len <= max_length) {
+        const size_t matchlen =
+            FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
+                                     &kBrotliDictionary[offset], len);
+        if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
+          const size_t transform_id = kCutoffTransforms[len - matchlen];
+          const size_t word_id = dist +
+              transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]);
+          const size_t backward = max_backward + word_id + 1;
+          const double score = BackwardReferenceScore(matchlen, backward);
+          if (best_score < score) {
+            ++self->num_dict_matches_;
+            best_score = score;
+            best_len = matchlen;
+            *best_len_out = best_len;
+            *best_len_code_out = len;
+            *best_distance_out = backward;
+            *best_score_out = best_score;
+            is_match_found = 1;
+          }
+        }
+      }
+    }
+  }
+  self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
+  return is_match_found;
+}
+
+#undef HASH_MAP_SIZE
+#undef BUCKET_SIZE
+
+#undef HashLongestMatchQuickly
diff --git a/enc/histogram.c b/enc/histogram.c
index 537a275..a8f0413 100644
--- a/enc/histogram.c
+++ b/enc/histogram.c
@@ -8,60 +8,88 @@
 
 #include "./histogram.h"
 
-#include <cmath>
-
 #include "./block_splitter.h"
 #include "./command.h"
 #include "./context.h"
-#include "./prefix.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct BlockSplitIterator {
+  const BlockSplit* split_;  /* Not owned. */
+  size_t idx_;
+  size_t type_;
+  size_t length_;
+} BlockSplitIterator;
+
+static void InitBlockSplitIterator(BlockSplitIterator* self,
+    const BlockSplit* split) {
+  self->split_ = split;
+  self->idx_ = 0;
+  self->type_ = 0;
+  self->length_ = split->lengths ? split->lengths[0] : 0;
+}
+
+static void BlockSplitIteratorNext(BlockSplitIterator* self) {
+  if (self->length_ == 0) {
+    ++self->idx_;
+    self->type_ = self->split_->types[self->idx_];
+    self->length_ = self->split_->lengths[self->idx_];
+  }
+  --self->length_;
+}
 
-void BuildHistograms(
-    const Command* cmds,
-    const size_t num_commands,
-    const BlockSplit& literal_split,
-    const BlockSplit& insert_and_copy_split,
-    const BlockSplit& dist_split,
-    const uint8_t* ringbuffer,
-    size_t start_pos,
-    size_t mask,
-    uint8_t prev_byte,
-    uint8_t prev_byte2,
-    const std::vector<ContextType>& context_modes,
-    std::vector<HistogramLiteral>* literal_histograms,
-    std::vector<HistogramCommand>* insert_and_copy_histograms,
-    std::vector<HistogramDistance>* copy_dist_histograms) {
+void BrotliBuildHistogramsWithContext(
+    const Command* cmds, const size_t num_commands,
+    const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
+    const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
+    size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    const ContextType* context_modes, HistogramLiteral* literal_histograms,
+    HistogramCommand* insert_and_copy_histograms,
+    HistogramDistance* copy_dist_histograms) {
   size_t pos = start_pos;
-  BlockSplitIterator literal_it(literal_split);
-  BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
-  BlockSplitIterator dist_it(dist_split);
-  for (size_t i = 0; i < num_commands; ++i) {
-    const Command &cmd = cmds[i];
-    insert_and_copy_it.Next();
-    (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
-        cmd.cmd_prefix_);
-    for (size_t j = cmd.insert_len_; j != 0; --j) {
-      literal_it.Next();
-      size_t context = (literal_it.type_ << kLiteralContextBits) +
+  BlockSplitIterator literal_it;
+  BlockSplitIterator insert_and_copy_it;
+  BlockSplitIterator dist_it;
+  size_t i;
+
+  InitBlockSplitIterator(&literal_it, literal_split);
+  InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
+  InitBlockSplitIterator(&dist_it, dist_split);
+  for (i = 0; i < num_commands; ++i) {
+    const Command* cmd = &cmds[i];
+    size_t j;
+    BlockSplitIteratorNext(&insert_and_copy_it);
+    HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
+        cmd->cmd_prefix_);
+    for (j = cmd->insert_len_; j != 0; --j) {
+      size_t context;
+      BlockSplitIteratorNext(&literal_it);
+      context = (literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
           Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
-      (*literal_histograms)[context].Add(ringbuffer[pos & mask]);
+      HistogramAddLiteral(&literal_histograms[context],
+          ringbuffer[pos & mask]);
       prev_byte2 = prev_byte;
       prev_byte = ringbuffer[pos & mask];
       ++pos;
     }
-    pos += cmd.copy_len();
-    if (cmd.copy_len()) {
+    pos += CommandCopyLen(cmd);
+    if (CommandCopyLen(cmd)) {
       prev_byte2 = ringbuffer[(pos - 2) & mask];
       prev_byte = ringbuffer[(pos - 1) & mask];
-      if (cmd.cmd_prefix_ >= 128) {
-        dist_it.Next();
-        size_t context = (dist_it.type_ << kDistanceContextBits) +
-            cmd.DistanceContext();
-        (*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
+      if (cmd->cmd_prefix_ >= 128) {
+        size_t context;
+        BlockSplitIteratorNext(&dist_it);
+        context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
+            CommandDistanceContext(cmd);
+        HistogramAddDistance(&copy_dist_histograms[context],
+            cmd->dist_prefix_);
       }
     }
   }
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/histogram.h b/enc/histogram.h
index 2287b59..c9bcead 100644
--- a/enc/histogram.h
+++ b/enc/histogram.h
@@ -9,87 +9,52 @@
 #ifndef BROTLI_ENC_HISTOGRAM_H_
 #define BROTLI_ENC_HISTOGRAM_H_
 
-#include <cstring>
-#include <limits>
-#include <vector>
+#include <string.h>  /* memset */
 
+#include "../common/constants.h"
 #include "../common/types.h"
+#include "./block_splitter.h"
 #include "./command.h"
 #include "./context.h"
-#include "./fast_log.h"
-#include "./prefix.h"
-
-namespace brotli {
-
-struct BlockSplit;
-
-// A simple container for histograms of data in blocks.
-template<int kDataSize>
-struct Histogram {
-  Histogram(void) {
-    Clear();
-  }
-  void Clear(void) {
-    memset(data_, 0, sizeof(data_));
-    total_count_ = 0;
-    bit_cost_ = std::numeric_limits<double>::infinity();
-  }
-  void Add(size_t val) {
-    ++data_[val];
-    ++total_count_;
-  }
-  void Remove(size_t val) {
-    --data_[val];
-    --total_count_;
-  }
-  template<typename DataType>
-  void Add(const DataType *p, size_t n) {
-    total_count_ += n;
-    n += 1;
-    while(--n) ++data_[*p++];
-  }
-  void AddHistogram(const Histogram& v) {
-    total_count_ += v.total_count_;
-    for (size_t i = 0; i < kDataSize; ++i) {
-      data_[i] += v.data_[i];
-    }
-  }
-
-  uint32_t data_[kDataSize];
-  size_t total_count_;
-  double bit_cost_;
-};
-
-// Literal histogram.
-typedef Histogram<256> HistogramLiteral;
-// Prefix histograms.
-typedef Histogram<kNumCommandPrefixes> HistogramCommand;
-typedef Histogram<kNumDistancePrefixes> HistogramDistance;
-typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
-// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
-typedef Histogram<272> HistogramContextMap;
-// Block type histogram, 256 block types + 2 special symbols.
-typedef Histogram<258> HistogramBlockType;
-
-static const size_t kLiteralContextBits = 6;
-static const size_t kDistanceContextBits = 2;
-
-void BuildHistograms(
-    const Command* cmds,
-    const size_t num_commands,
-    const BlockSplit& literal_split,
-    const BlockSplit& insert_and_copy_split,
-    const BlockSplit& dist_split,
-    const uint8_t* ringbuffer,
-    size_t pos,
-    size_t mask,
-    uint8_t prev_byte,
-    uint8_t prev_byte2,
-    const std::vector<ContextType>& context_modes,
-    std::vector<HistogramLiteral>* literal_histograms,
-    std::vector<HistogramCommand>* insert_and_copy_histograms,
-    std::vector<HistogramDistance>* copy_dist_histograms);
-
-}  // namespace brotli
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define FN(X) X ## Literal
+#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
+#define DataType uint8_t
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DataType
+#undef DATA_SIZE
+#undef FN
+
+#define FN(X) X ## Command
+#define DataType uint16_t
+#define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DATA_SIZE
+#undef FN
+
+#define FN(X) X ## Distance
+#define DATA_SIZE BROTLI_NUM_DISTANCE_SYMBOLS
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DataType
+#undef DATA_SIZE
+#undef FN
+
+BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
+    const Command* cmds, const size_t num_commands,
+    const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
+    const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
+    size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    const ContextType* context_modes, HistogramLiteral* literal_histograms,
+    HistogramCommand* insert_and_copy_histograms,
+    HistogramDistance* copy_dist_histograms);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_HISTOGRAM_H_ */
diff --git a/enc/histogram_inc.h b/enc/histogram_inc.h
new file mode 100644
index 0000000..7807036
--- /dev/null
+++ b/enc/histogram_inc.h
@@ -0,0 +1,51 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: Histogram, DATA_SIZE, DataType */
+
+/* A simple container for histograms of data in blocks. */
+
+typedef struct FN(Histogram) {
+  uint32_t data_[DATA_SIZE];
+  size_t total_count_;
+  double bit_cost_;
+} FN(Histogram);
+
+static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
+  memset(self->data_, 0, sizeof(self->data_));
+  self->total_count_ = 0;
+  self->bit_cost_ = HUGE_VAL;
+}
+
+static BROTLI_INLINE void FN(ClearHistograms)(
+    FN(Histogram)* array, size_t length) {
+  size_t i;
+  for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
+}
+
+static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
+  ++self->data_[val];
+  ++self->total_count_;
+}
+
+static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
+    const DataType *p, size_t n) {
+  self->total_count_ += n;
+  n += 1;
+  while (--n) ++self->data_[*p++];
+}
+
+static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
+    const FN(Histogram)* v) {
+  size_t i;
+  self->total_count_ += v->total_count_;
+  for (i = 0; i < DATA_SIZE; ++i) {
+    self->data_[i] += v->data_[i];
+  }
+}
+
+static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
diff --git a/enc/literal_cost.c b/enc/literal_cost.c
index 2560ee7..301edc8 100644
--- a/enc/literal_cost.c
+++ b/enc/literal_cost.c
@@ -9,27 +9,26 @@
 
 #include "./literal_cost.h"
 
-#include <math.h>
-
-#include <algorithm>
-
 #include "../common/types.h"
 #include "./fast_log.h"
+#include "./port.h"
 #include "./utf8_util.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
   if (c < 128) {
     return 0;  /* Next one is the 'Byte 1' again. */
   } else if (c >= 192) {  /* Next one is the 'Byte 2' of utf-8 encoding. */
-    return std::min<size_t>(1, clamp);
+    return BROTLI_MIN(size_t, 1, clamp);
   } else {
     /* Let's decide over the last byte if this ends the sequence. */
     if (last < 0xe0) {
       return 0;  /* Completed two or three byte coding. */
     } else {  /* Next one is the 'Byte 3' of utf-8 encoding. */
-      return std::min<size_t>(2, clamp);
+      return BROTLI_MIN(size_t, 2, clamp);
     }
   }
 }
@@ -40,7 +39,8 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
   size_t max_utf8 = 1;  /* should be 2, but 1 compresses better. */
   size_t last_c = 0;
   size_t utf8_pos = 0;
-  for (size_t i = 0; i < len; ++i) {
+  size_t i;
+  for (i = 0; i < len; ++i) {
     size_t c = data[(pos + i) & mask];
     utf8_pos = UTF8Position(last_c, c, 2);
     ++counts[utf8_pos];
@@ -62,28 +62,31 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
   const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
   size_t histogram[3][256] = { { 0 } };
   size_t window_half = 495;
-  size_t in_window = std::min(window_half, len);
+  size_t in_window = BROTLI_MIN(size_t, window_half, len);
   size_t in_window_utf8[3] = { 0 };
 
-  /* Bootstrap histograms. */
-  size_t last_c = 0;
-  size_t utf8_pos = 0;
-  for (size_t i = 0; i < in_window; ++i) {
-    size_t c = data[(pos + i) & mask];
-    ++histogram[utf8_pos][c];
-    ++in_window_utf8[utf8_pos];
-    utf8_pos = UTF8Position(last_c, c, max_utf8);
-    last_c = c;
+
+  size_t i;
+  {  /* Bootstrap histograms. */
+    size_t last_c = 0;
+    size_t utf8_pos = 0;
+    for (i = 0; i < in_window; ++i) {
+      size_t c = data[(pos + i) & mask];
+      ++histogram[utf8_pos][c];
+      ++in_window_utf8[utf8_pos];
+      utf8_pos = UTF8Position(last_c, c, max_utf8);
+      last_c = c;
+    }
   }
 
   /* Compute bit costs with sliding window. */
-  for (size_t i = 0; i < len; ++i) {
+  for (i = 0; i < len; ++i) {
     if (i >= window_half) {
       /* Remove a byte in the past. */
-      size_t c = i < window_half + 1 ?
-          0 : data[(pos + i - window_half - 1) & mask];
-      size_t last_c = i < window_half + 2 ?
-          0 : data[(pos + i - window_half - 2) & mask];
+      size_t c =
+          i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
+      size_t last_c =
+          i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
       size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
       --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
       --in_window_utf8[utf8_pos2];
@@ -96,71 +99,80 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
       ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
       ++in_window_utf8[utf8_pos2];
     }
-    size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
-    size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
-    size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
-    size_t masked_pos = (pos + i) & mask;
-    size_t histo = histogram[utf8_pos][data[masked_pos]];
-    if (histo == 0) {
-      histo = 1;
-    }
-    double lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
-    lit_cost += 0.02905;
-    if (lit_cost < 1.0) {
-      lit_cost *= 0.5;
-      lit_cost += 0.5;
-    }
+    {
+      size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
+      size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
+      size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
+      size_t masked_pos = (pos + i) & mask;
+      size_t histo = histogram[utf8_pos][data[masked_pos]];
+      double lit_cost;
+      if (histo == 0) {
+        histo = 1;
+      }
+      lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
+      lit_cost += 0.02905;
+      if (lit_cost < 1.0) {
+        lit_cost *= 0.5;
+        lit_cost += 0.5;
+      }
       /* Make the first bytes more expensive -- seems to help, not sure why.
          Perhaps because the entropy source is changing its properties
          rapidly in the beginning of the file, perhaps because the beginning
          of the data is a statistical "anomaly". */
-    if (i < 2000) {
-      lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
+      if (i < 2000) {
+        lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
+      }
+      cost[i] = (float)lit_cost;
     }
-    cost[i] = static_cast<float>(lit_cost);
   }
 }
 
-void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
-                                 const uint8_t *data, float *cost) {
-  if (IsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
+void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+                                       const uint8_t *data, float *cost) {
+  if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
     EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
     return;
-  }
-  size_t histogram[256] = { 0 };
-  size_t window_half = 2000;
-  size_t in_window = std::min(window_half, len);
+  } else {
+    size_t histogram[256] = { 0 };
+    size_t window_half = 2000;
+    size_t in_window = BROTLI_MIN(size_t, window_half, len);
 
     /* Bootstrap histogram. */
-  for (size_t i = 0; i < in_window; ++i) {
-    ++histogram[data[(pos + i) & mask]];
-  }
+    size_t i;
+    for (i = 0; i < in_window; ++i) {
+      ++histogram[data[(pos + i) & mask]];
+    }
 
     /* Compute bit costs with sliding window. */
-  for (size_t i = 0; i < len; ++i) {
-    if (i >= window_half) {
+    for (i = 0; i < len; ++i) {
+      size_t histo;
+      if (i >= window_half) {
         /* Remove a byte in the past. */
-      --histogram[data[(pos + i - window_half) & mask]];
-      --in_window;
-    }
-    if (i + window_half < len) {
+        --histogram[data[(pos + i - window_half) & mask]];
+        --in_window;
+      }
+      if (i + window_half < len) {
         /* Add a byte in the future. */
-      ++histogram[data[(pos + i + window_half) & mask]];
-      ++in_window;
-    }
-    size_t histo = histogram[data[(pos + i) & mask]];
-    if (histo == 0) {
-      histo = 1;
+        ++histogram[data[(pos + i + window_half) & mask]];
+        ++in_window;
+      }
+      histo = histogram[data[(pos + i) & mask]];
+      if (histo == 0) {
+        histo = 1;
+      }
+      {
+        double lit_cost = FastLog2(in_window) - FastLog2(histo);
+        lit_cost += 0.029;
+        if (lit_cost < 1.0) {
+          lit_cost *= 0.5;
+          lit_cost += 0.5;
+        }
+        cost[i] = (float)lit_cost;
+      }
     }
-    double lit_cost = FastLog2(in_window) - FastLog2(histo);
-    lit_cost += 0.029;
-    if (lit_cost < 1.0) {
-      lit_cost *= 0.5;
-      lit_cost += 0.5;
-    }
-    cost[i] = static_cast<float>(lit_cost);
   }
 }
 
-
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/literal_cost.h b/enc/literal_cost.h
index c00f83d..fa96c9e 100644
--- a/enc/literal_cost.h
+++ b/enc/literal_cost.h
@@ -11,15 +11,20 @@
 #define BROTLI_ENC_LITERAL_COST_H_
 
 #include "../common/types.h"
+#include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* Estimates how many bits the literals in the interval [pos, pos + len) in the
    ringbuffer (data, mask) will take entropy coded and writes these estimates
    to the cost[0..len) array. */
-void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
-                                 const uint8_t *data, float *cost);
+BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
+    size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost);
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_LITERAL_COST_H_ */
diff --git a/enc/memory.c b/enc/memory.c
new file mode 100644
index 0000000..de4a649
--- /dev/null
+++ b/enc/memory.c
@@ -0,0 +1,181 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#include "./memory.h"
+
+#include <assert.h>
+#include <stdlib.h>  /* exit, free, malloc */
+#include <strings.h>  /* memcpy */
+
+#include "../common/types.h"
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_PERM_ALLOCATED 128
+#define MAX_NEW_ALLOCATED 64
+#define MAX_NEW_FREED 64
+
+#define PERM_ALLOCATED_OFFSET 0
+#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
+#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
+
+static void* DefaultAllocFunc(void* opaque, size_t size) {
+  BROTLI_UNUSED(opaque);
+  return malloc(size);
+}
+
+static void DefaultFreeFunc(void* opaque, void* address) {
+  BROTLI_UNUSED(opaque);
+  free(address);
+}
+
+void BrotliInitMemoryManager(
+    MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
+    void* opaque) {
+  if (!alloc_func) {
+    m->alloc_func = DefaultAllocFunc;
+    m->free_func = DefaultFreeFunc;
+    m->opaque = 0;
+  } else {
+    m->alloc_func = alloc_func;
+    m->free_func = free_func;
+    m->opaque = opaque;
+  }
+#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+  m->is_oom = 0;
+  m->perm_allocated = 0;
+  m->new_allocated = 0;
+  m->new_freed = 0;
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+}
+
+#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
+
+void* BrotliAllocate(MemoryManager* m, size_t n) {
+  void* result = m->alloc_func(m->opaque, n);
+  if (!result) exit(EXIT_FAILURE);
+  return result;
+}
+
+void BrotliFree(MemoryManager* m, void* p) {
+  m->free_func(m->opaque, p);
+}
+
+void BrotliWipeOutMemoryManager(MemoryManager* m) {
+  BROTLI_UNUSED(m);
+}
+
+#else  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+static void SortPointers(void** items, const size_t n) {
+  /* Shell sort. */
+  static const size_t gaps[] = {23, 10, 4, 1};
+  int g = 0;
+  for (; g < 4; ++g) {
+    size_t gap = gaps[g];
+    size_t i;
+    for (i = gap; i < n; ++i) {
+      size_t j = i;
+      void* tmp = items[i];
+      for (; j >= gap && tmp < items[j - gap]; j -= gap) {
+        items[j] = items[j - gap];
+      }
+      items[j] = tmp;
+    }
+  }
+}
+
+static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
+  size_t a_read_index = 0;
+  size_t b_read_index = 0;
+  size_t a_write_index = 0;
+  size_t b_write_index = 0;
+  size_t annihilated = 0;
+  while (a_read_index < a_len && b_read_index < b_len) {
+    if (a[a_read_index] == b[b_read_index]) {
+      a_read_index++;
+      b_read_index++;
+      annihilated++;
+    } else if (a[a_read_index] < b[b_read_index]) {
+      a[a_write_index++] = a[a_read_index++];
+    } else {
+      b[b_write_index++] = b[b_read_index++];
+    }
+  }
+  while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
+  while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
+  return annihilated;
+}
+
+static void CollectGarbagePointers(MemoryManager* m) {
+  size_t annihilated;
+  SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
+  SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
+  annihilated = Annihilate(
+      m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
+      m->pointers + NEW_FREED_OFFSET, m->new_freed);
+  m->new_allocated -= annihilated;
+  m->new_freed -= annihilated;
+
+  if (m->new_freed != 0) {
+    annihilated = Annihilate(
+        m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
+        m->pointers + NEW_FREED_OFFSET, m->new_freed);
+    m->perm_allocated -= annihilated;
+    m->new_freed -= annihilated;
+    assert(m->new_freed == 0);
+  }
+
+  if (m->new_allocated != 0) {
+    assert(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
+    memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
+           m->pointers + NEW_ALLOCATED_OFFSET,
+           sizeof(void*) * m->new_allocated);
+    m->perm_allocated += m->new_allocated;
+    m->new_allocated = 0;
+    SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
+  }
+}
+
+void* BrotliAllocate(MemoryManager* m, size_t n) {
+  void* result = m->alloc_func(m->opaque, n);
+  if (!result) {
+    m->is_oom = 1;
+    return NULL;
+  }
+  if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
+  m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
+  return result;
+}
+
+void BrotliFree(MemoryManager* m, void* p) {
+  if (!p) return;
+  m->free_func(m->opaque, p);
+  if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
+  m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
+}
+
+void BrotliWipeOutMemoryManager(MemoryManager* m) {
+  size_t i;
+  CollectGarbagePointers(m);
+  /* Now all unfreed pointers are in perm-allocated list. */
+  for (i = 0; i < m->perm_allocated; ++i) {
+    m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
+  }
+  m->perm_allocated = 0;
+}
+
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/memory.h b/enc/memory.h
new file mode 100644
index 0000000..0cda0c1
--- /dev/null
+++ b/enc/memory.h
@@ -0,0 +1,62 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for memory management. */
+
+#ifndef BROTLI_ENC_MEMORY_H_
+#define BROTLI_ENC_MEMORY_H_
+
+#include "../common/types.h"
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if !defined(BROTLI_ENCODER_CLEANUP_ON_OOM) && \
+    !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_ENCODER_EXIT_ON_OOM
+#endif
+
+typedef struct MemoryManager {
+  brotli_alloc_func alloc_func;
+  brotli_free_func free_func;
+  void* opaque;
+#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+  int is_oom;
+  size_t perm_allocated;
+  size_t new_allocated;
+  size_t new_freed;
+  void* pointers[256];
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+} MemoryManager;
+
+BROTLI_INTERNAL void BrotliInitMemoryManager(
+    MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
+    void* opaque);
+
+BROTLI_INTERNAL void* BrotliAllocate(MemoryManager* m, size_t n);
+#define BROTLI_ALLOC(M, T, N) ((T*)BrotliAllocate((M), (N) * sizeof(T)))
+
+BROTLI_INTERNAL void BrotliFree(MemoryManager* m, void* p);
+#define BROTLI_FREE(M, P) { \
+  BrotliFree((M), (P));     \
+  P = NULL;                 \
+}
+
+#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_IS_OOM(M) (!!0)
+#else  /* BROTLI_ENCODER_EXIT_ON_OOM */
+#define BROTLI_IS_OOM(M) (!!(M)->is_oom)
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+BROTLI_INTERNAL void BrotliWipeOutMemoryManager(MemoryManager* m);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_MEMORY_H_ */
diff --git a/enc/metablock.c b/enc/metablock.c
index 1391eb7..8670bcd 100644
--- a/enc/metablock.c
+++ b/enc/metablock.c
@@ -9,212 +9,199 @@
 
 #include "./metablock.h"
 
+#include "../common/constants.h"
 #include "../common/types.h"
+#include "./bit_cost.h"
 #include "./block_splitter.h"
 #include "./cluster.h"
 #include "./context.h"
+#include "./entropy_encode.h"
 #include "./histogram.h"
-
-namespace brotli {
-
-void BuildMetaBlock(const uint8_t* ringbuffer,
-                    const size_t pos,
-                    const size_t mask,
-                    uint8_t prev_byte,
-                    uint8_t prev_byte2,
-                    const Command* cmds,
-                    size_t num_commands,
-                    ContextType literal_context_mode,
-                    MetaBlockSplit* mb) {
-  SplitBlock(cmds, num_commands,
-             ringbuffer, pos, mask,
-             &mb->literal_split,
-             &mb->command_split,
-             &mb->distance_split);
-
-  std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
-                                                 literal_context_mode);
-
-  size_t num_literal_contexts =
-      mb->literal_split.num_types << kLiteralContextBits;
-  size_t num_distance_contexts =
-      mb->distance_split.num_types << kDistanceContextBits;
-  std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
-  mb->command_histograms.resize(mb->command_split.num_types);
-  std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
-  BuildHistograms(cmds, num_commands,
-                  mb->literal_split,
-                  mb->command_split,
-                  mb->distance_split,
-                  ringbuffer,
-                  pos,
-                  mask,
-                  prev_byte,
-                  prev_byte2,
-                  literal_context_modes,
-                  &literal_histograms,
-                  &mb->command_histograms,
-                  &distance_histograms);
-
+#include "./memory.h"
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+void BrotliBuildMetaBlock(MemoryManager* m,
+                          const uint8_t* ringbuffer,
+                          const size_t pos,
+                          const size_t mask,
+                          const int quality,
+                          uint8_t prev_byte,
+                          uint8_t prev_byte2,
+                          const Command* cmds,
+                          size_t num_commands,
+                          ContextType literal_context_mode,
+                          MetaBlockSplit* mb) {
   /* Histogram ids need to fit in one byte. */
   static const size_t kMaxNumberOfHistograms = 256;
+  HistogramDistance* distance_histograms;
+  HistogramLiteral* literal_histograms;
+  ContextType* literal_context_modes;
+  size_t num_literal_contexts;
+  size_t num_distance_contexts;
+  size_t i;
+
+  BrotliSplitBlock(m, cmds, num_commands,
+                   ringbuffer, pos, mask, quality,
+                   &mb->literal_split,
+                   &mb->command_split,
+                   &mb->distance_split);
+  if (BROTLI_IS_OOM(m)) return;
+
+  literal_context_modes =
+      BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    literal_context_modes[i] = literal_context_mode;
+  }
 
-  ClusterHistograms(literal_histograms,
-                    1u << kLiteralContextBits,
-                    mb->literal_split.num_types,
-                    kMaxNumberOfHistograms,
-                    &mb->literal_histograms,
-                    &mb->literal_context_map);
-
-  ClusterHistograms(distance_histograms,
-                    1u << kDistanceContextBits,
-                    mb->distance_split.num_types,
-                    kMaxNumberOfHistograms,
-                    &mb->distance_histograms,
-                    &mb->distance_context_map);
+  num_literal_contexts =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  num_distance_contexts =
+      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
+  literal_histograms = BROTLI_ALLOC(m, HistogramLiteral, num_literal_contexts);
+  if (BROTLI_IS_OOM(m)) return;
+  ClearHistogramsLiteral(literal_histograms, num_literal_contexts);
+
+  assert(mb->command_histograms == 0);
+  mb->command_histograms_size = mb->command_split.num_types;
+  mb->command_histograms =
+      BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
+  distance_histograms =
+      BROTLI_ALLOC(m, HistogramDistance, num_distance_contexts);
+  if (BROTLI_IS_OOM(m)) return;
+  ClearHistogramsDistance(distance_histograms, num_distance_contexts);
+  BrotliBuildHistogramsWithContext(cmds, num_commands,
+      &mb->literal_split, &mb->command_split, &mb->distance_split,
+      ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
+      literal_histograms, mb->command_histograms, distance_histograms);
+  BROTLI_FREE(m, literal_context_modes);
+
+  assert(mb->literal_context_map == 0);
+  mb->literal_context_map_size =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  mb->literal_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+  assert(mb->literal_histograms == 0);
+  mb->literal_histograms_size = mb->literal_context_map_size;
+  mb->literal_histograms =
+      BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  BrotliClusterHistogramsLiteral(m, literal_histograms,
+                                 mb->literal_context_map_size,
+                                 kMaxNumberOfHistograms,
+                                 mb->literal_histograms,
+                                 &mb->literal_histograms_size,
+                                 mb->literal_context_map);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, literal_histograms);
+
+  assert(mb->distance_context_map == 0);
+  mb->distance_context_map_size =
+      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
+  mb->distance_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+  assert(mb->distance_histograms == 0);
+  mb->distance_histograms_size = mb->distance_context_map_size;
+  mb->distance_histograms =
+      BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  BrotliClusterHistogramsDistance(m, distance_histograms,
+                                  mb->distance_context_map_size,
+                                  kMaxNumberOfHistograms,
+                                  mb->distance_histograms,
+                                  &mb->distance_histograms_size,
+                                  mb->distance_context_map);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, distance_histograms);
 }
 
-// Greedy block splitter for one block category (literal, command or distance).
-template<typename HistogramType>
-class BlockSplitter {
- public:
-  BlockSplitter(size_t alphabet_size,
-                size_t min_block_size,
-                double split_threshold,
-                size_t num_symbols,
-                BlockSplit* split,
-                std::vector<HistogramType>* histograms)
-      : alphabet_size_(alphabet_size),
-        min_block_size_(min_block_size),
-        split_threshold_(split_threshold),
-        num_blocks_(0),
-        split_(split),
-        histograms_(histograms),
-        target_block_size_(min_block_size),
-        block_size_(0),
-        curr_histogram_ix_(0),
-        merge_last_count_(0) {
-    size_t max_num_blocks = num_symbols / min_block_size + 1;
-    // We have to allocate one more histogram than the maximum number of block
-    // types for the current histogram when the meta-block is too big.
-    size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
-    split_->lengths.resize(max_num_blocks);
-    split_->types.resize(max_num_blocks);
-    histograms_->resize(max_num_types);
-    last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
-  }
-
-  // Adds the next symbol to the current histogram. When the current histogram
-  // reaches the target size, decides on merging the block.
-  void AddSymbol(size_t symbol) {
-    (*histograms_)[curr_histogram_ix_].Add(symbol);
-    ++block_size_;
-    if (block_size_ == target_block_size_) {
-      FinishBlock(/* is_final = */ false);
-    }
+#define FN(X) X ## Literal
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+void BrotliBuildMetaBlockGreedy(MemoryManager* m,
+                                const uint8_t* ringbuffer,
+                                size_t pos,
+                                size_t mask,
+                                const Command *commands,
+                                size_t n_commands,
+                                MetaBlockSplit* mb) {
+  BlockSplitterLiteral lit_blocks;
+  BlockSplitterCommand cmd_blocks;
+  BlockSplitterDistance dist_blocks;
+  size_t num_literals = 0;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    num_literals += commands[i].insert_len_;
   }
 
-  // Does either of three things:
-  //   (1) emits the current block with a new block type;
-  //   (2) emits the current block with the type of the second last block;
-  //   (3) merges the current block with the last block.
-  void FinishBlock(bool is_final) {
-    if (block_size_ < min_block_size_) {
-      block_size_ = min_block_size_;
-    }
-    if (num_blocks_ == 0) {
-      // Create first block.
-      split_->lengths[0] = static_cast<uint32_t>(block_size_);
-      split_->types[0] = 0;
-      last_entropy_[0] =
-          BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
-      last_entropy_[1] = last_entropy_[0];
-      ++num_blocks_;
-      ++split_->num_types;
-      ++curr_histogram_ix_;
-      block_size_ = 0;
-    } else if (block_size_ > 0) {
-      double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
-                                   alphabet_size_);
-      HistogramType combined_histo[2];
-      double combined_entropy[2];
-      double diff[2];
-      for (size_t j = 0; j < 2; ++j) {
-        size_t last_histogram_ix = last_histogram_ix_[j];
-        combined_histo[j] = (*histograms_)[curr_histogram_ix_];
-        combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
-        combined_entropy[j] = BitsEntropy(
-            &combined_histo[j].data_[0], alphabet_size_);
-        diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
-      }
-
-      if (split_->num_types < kMaxBlockTypes &&
-          diff[0] > split_threshold_ &&
-          diff[1] > split_threshold_) {
-        // Create new block.
-        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
-        split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
-        last_histogram_ix_[1] = last_histogram_ix_[0];
-        last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
-        last_entropy_[1] = last_entropy_[0];
-        last_entropy_[0] = entropy;
-        ++num_blocks_;
-        ++split_->num_types;
-        ++curr_histogram_ix_;
-        block_size_ = 0;
-        merge_last_count_ = 0;
-        target_block_size_ = min_block_size_;
-      } else if (diff[1] < diff[0] - 20.0) {
-        // Combine this block with second last block.
-        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
-        split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
-        std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
-        (*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
-        last_entropy_[1] = last_entropy_[0];
-        last_entropy_[0] = combined_entropy[1];
-        ++num_blocks_;
-        block_size_ = 0;
-        (*histograms_)[curr_histogram_ix_].Clear();
-        merge_last_count_ = 0;
-        target_block_size_ = min_block_size_;
-      } else {
-        // Combine this block with last block.
-        split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
-        (*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
-        last_entropy_[0] = combined_entropy[0];
-        if (split_->num_types == 1) {
-          last_entropy_[1] = last_entropy_[0];
-        }
-        block_size_ = 0;
-        (*histograms_)[curr_histogram_ix_].Clear();
-        if (++merge_last_count_ > 1) {
-          target_block_size_ += min_block_size_;
-        }
-      }
+  InitBlockSplitterLiteral(m, &lit_blocks, 256, 512, 400.0, num_literals,
+      &mb->literal_split, &mb->literal_histograms,
+      &mb->literal_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
+      500.0, n_commands, &mb->command_split, &mb->command_histograms,
+      &mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms,
+      &mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t j;
+    BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      BlockSplitterAddSymbolLiteral(&lit_blocks, ringbuffer[pos & mask]);
+      ++pos;
     }
-    if (is_final) {
-      (*histograms_).resize(split_->num_types);
-      split_->types.resize(num_blocks_);
-      split_->lengths.resize(num_blocks_);
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
+      BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
     }
   }
 
- private:
-  static const uint16_t kMaxBlockTypes = 256;
+  BlockSplitterFinishBlockLiteral(&lit_blocks, /* is_final = */ 1);
+  BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ 1);
+  BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ 1);
+}
 
+/* Greedy block splitter for one block category (literal, command or distance).
+   Gathers histograms for all context buckets. */
+typedef struct ContextBlockSplitter {
   /* Alphabet size of particular block category. */
-  const size_t alphabet_size_;
+  size_t alphabet_size_;
+  size_t num_contexts_;
+  size_t max_block_types_;
   /* We collect at least this many symbols for each block. */
-  const size_t min_block_size_;
+  size_t min_block_size_;
   /* We merge histograms A and B if
        entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
      where A is the current histogram and B is the histogram of the last or the
      second last block type. */
-  const double split_threshold_;
+  double split_threshold_;
 
   size_t num_blocks_;
   BlockSplit* split_;  /* not owned */
-  std::vector<HistogramType>* histograms_;  /* not owned */
+  HistogramLiteral* histograms_;  /* not owned */
+  size_t* histograms_size_;  /* not owned */
 
   /* The number of symbols that we want to collect before deciding on whether
      or not to merge the block with a previous one or emit a new block. */
@@ -226,315 +213,302 @@ class BlockSplitter {
   /* Offset of the histograms of the previous two block types. */
   size_t last_histogram_ix_[2];
   /* Entropy of the previous two block types. */
-  double last_entropy_[2];
+  double* last_entropy_;
   /* The number of times we merged the current block with the last one. */
   size_t merge_last_count_;
-};
-
-void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
-                          size_t pos,
-                          size_t mask,
-                          const Command *commands,
-                          size_t n_commands,
-                          MetaBlockSplit* mb) {
-  size_t num_literals = 0;
-  for (size_t i = 0; i < n_commands; ++i) {
-    num_literals += commands[i].insert_len_;
-  }
-
-  BlockSplitter<HistogramLiteral> lit_blocks(
-      256, 512, 400.0, num_literals,
-      &mb->literal_split, &mb->literal_histograms);
-  BlockSplitter<HistogramCommand> cmd_blocks(
-      kNumCommandPrefixes, 1024, 500.0, n_commands,
-      &mb->command_split, &mb->command_histograms);
-  BlockSplitter<HistogramDistance> dist_blocks(
-      64, 512, 100.0, n_commands,
-      &mb->distance_split, &mb->distance_histograms);
-
-  for (size_t i = 0; i < n_commands; ++i) {
-    const Command cmd = commands[i];
-    cmd_blocks.AddSymbol(cmd.cmd_prefix_);
-    for (size_t j = cmd.insert_len_; j != 0; --j) {
-      lit_blocks.AddSymbol(ringbuffer[pos & mask]);
-      ++pos;
-    }
-    pos += cmd.copy_len();
-    if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
-      dist_blocks.AddSymbol(cmd.dist_prefix_);
-    }
-  }
-
-  lit_blocks.FinishBlock(/* is_final = */ true);
-  cmd_blocks.FinishBlock(/* is_final = */ true);
-  dist_blocks.FinishBlock(/* is_final = */ true);
+} ContextBlockSplitter;
+
+static void InitContextBlockSplitter(
+    MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
+    size_t num_contexts, size_t min_block_size, double split_threshold,
+    size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
+    size_t* histograms_size) {
+  size_t max_num_blocks = num_symbols / min_block_size + 1;
+  size_t max_num_types;
+
+  self->alphabet_size_ = alphabet_size;
+  self->num_contexts_ = num_contexts;
+  self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
+  self->min_block_size_ = min_block_size;
+  self->split_threshold_ = split_threshold;
+  self->num_blocks_ = 0;
+  self->split_ = split;
+  self->histograms_size_ = histograms_size;
+  self->target_block_size_ = min_block_size;
+  self->block_size_ = 0;
+  self->curr_histogram_ix_ = 0;
+  self->merge_last_count_ = 0;
+
+  /* We have to allocate one more histogram than the maximum number of block
+     types for the current histogram when the meta-block is too big. */
+  max_num_types =
+      BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
+  BROTLI_ENSURE_CAPACITY(m, uint8_t,
+      split->types, split->types_alloc_size, max_num_blocks);
+  BROTLI_ENSURE_CAPACITY(m, uint32_t,
+      split->lengths, split->lengths_alloc_size, max_num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  split->num_blocks = max_num_blocks;
+  self->last_entropy_ = BROTLI_ALLOC(m, double, 2 * num_contexts);
+  if (BROTLI_IS_OOM(m)) return;
+  assert(*histograms == 0);
+  *histograms_size = max_num_types * num_contexts;
+  *histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
+  self->histograms_ = *histograms;
+  if (BROTLI_IS_OOM(m)) return;
+  /* Clear only current historgram. */
+  ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
+  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
 }
 
-// Greedy block splitter for one block category (literal, command or distance).
-// Gathers histograms for all context buckets.
-template<typename HistogramType>
-class ContextBlockSplitter {
- public:
-  ContextBlockSplitter(size_t alphabet_size,
-                       size_t num_contexts,
-                       size_t min_block_size,
-                       double split_threshold,
-                       size_t num_symbols,
-                       BlockSplit* split,
-                       std::vector<HistogramType>* histograms)
-      : alphabet_size_(alphabet_size),
-        num_contexts_(num_contexts),
-        max_block_types_(kMaxBlockTypes / num_contexts),
-        min_block_size_(min_block_size),
-        split_threshold_(split_threshold),
-        num_blocks_(0),
-        split_(split),
-        histograms_(histograms),
-        target_block_size_(min_block_size),
-        block_size_(0),
-        curr_histogram_ix_(0),
-        last_entropy_(2 * num_contexts),
-        merge_last_count_(0) {
-    size_t max_num_blocks = num_symbols / min_block_size + 1;
-    // We have to allocate one more histogram than the maximum number of block
-    // types for the current histogram when the meta-block is too big.
-    size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
-    split_->lengths.resize(max_num_blocks);
-    split_->types.resize(max_num_blocks);
-    histograms_->resize(max_num_types * num_contexts);
-    last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
-  }
-
-  // Adds the next symbol to the current block type and context. When the
-  // current block reaches the target size, decides on merging the block.
-  void AddSymbol(size_t symbol, size_t context) {
-    (*histograms_)[curr_histogram_ix_ + context].Add(symbol);
-    ++block_size_;
-    if (block_size_ == target_block_size_) {
-      FinishBlock(/* is_final = */ false);
-    }
-  }
+static void CleanupContextBlockSplitter(
+    MemoryManager* m, ContextBlockSplitter* self) {
+  BROTLI_FREE(m, self->last_entropy_);
+}
 
 /* Does either of three things:
      (1) emits the current block with a new block type;
      (2) emits the current block with the type of the second last block;
      (3) merges the current block with the last block. */
-  void FinishBlock(bool is_final) {
-    if (block_size_ < min_block_size_) {
-      block_size_ = min_block_size_;
+static void ContextBlockSplitterFinishBlock(
+    MemoryManager* m, ContextBlockSplitter* self, int is_final) {
+  BlockSplit* split = self->split_;
+  const size_t num_contexts = self->num_contexts_;
+  double* last_entropy = self->last_entropy_;
+  HistogramLiteral* histograms = self->histograms_;
+
+  if (self->block_size_ < self->min_block_size_) {
+    self->block_size_ = self->min_block_size_;
+  }
+  if (self->num_blocks_ == 0) {
+    size_t i;
+    /* Create first block. */
+    split->lengths[0] = (uint32_t)self->block_size_;
+    split->types[0] = 0;
+
+    for (i = 0; i < num_contexts; ++i) {
+      last_entropy[i] =
+          BitsEntropy(histograms[i].data_, self->alphabet_size_);
+      last_entropy[num_contexts + i] = last_entropy[i];
     }
-    if (num_blocks_ == 0) {
-      // Create first block.
-      split_->lengths[0] = static_cast<uint32_t>(block_size_);
-      split_->types[0] = 0;
-      for (size_t i = 0; i < num_contexts_; ++i) {
-        last_entropy_[i] =
-            BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
-        last_entropy_[num_contexts_ + i] = last_entropy_[i];
-      }
-      ++num_blocks_;
-      ++split_->num_types;
-      curr_histogram_ix_ += num_contexts_;
-      block_size_ = 0;
-    } else if (block_size_ > 0) {
+    ++self->num_blocks_;
+    ++split->num_types;
+    self->curr_histogram_ix_ += num_contexts;
+    if (self->curr_histogram_ix_ < *self->histograms_size_) {
+      ClearHistogramsLiteral(
+          &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
+    }
+    self->block_size_ = 0;
+  } else if (self->block_size_ > 0) {
     /* Try merging the set of histograms for the current block type with the
        respective set of histograms for the last and second last block types.
        Decide over the split based on the total reduction of entropy across
        all contexts. */
-      std::vector<double> entropy(num_contexts_);
-      std::vector<HistogramType> combined_histo(2 * num_contexts_);
-      std::vector<double> combined_entropy(2 * num_contexts_);
-      double diff[2] = { 0.0 };
-      for (size_t i = 0; i < num_contexts_; ++i) {
-        size_t curr_histo_ix = curr_histogram_ix_ + i;
-        entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
-                                 alphabet_size_);
-        for (size_t j = 0; j < 2; ++j) {
-          size_t jx = j * num_contexts_ + i;
-          size_t last_histogram_ix = last_histogram_ix_[j] + i;
-          combined_histo[jx] = (*histograms_)[curr_histo_ix];
-          combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
-          combined_entropy[jx] = BitsEntropy(
-              &combined_histo[jx].data_[0], alphabet_size_);
-          diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
-        }
+    double* entropy = BROTLI_ALLOC(m, double, num_contexts);
+    HistogramLiteral* combined_histo =
+        BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
+    double* combined_entropy = BROTLI_ALLOC(m, double, 2 * num_contexts);
+    double diff[2] = { 0.0 };
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < num_contexts; ++i) {
+      size_t curr_histo_ix = self->curr_histogram_ix_ + i;
+      size_t j;
+      entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
+                               self->alphabet_size_);
+      for (j = 0; j < 2; ++j) {
+        size_t jx = j * num_contexts + i;
+        size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
+        combined_histo[jx] = histograms[curr_histo_ix];
+        HistogramAddHistogramLiteral(&combined_histo[jx],
+            &histograms[last_histogram_ix]);
+        combined_entropy[jx] = BitsEntropy(
+            &combined_histo[jx].data_[0], self->alphabet_size_);
+        diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
       }
+    }
 
-      if (split_->num_types < max_block_types_ &&
-          diff[0] > split_threshold_ &&
-          diff[1] > split_threshold_) {
-        // Create new block.
-        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
-        split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
-        last_histogram_ix_[1] = last_histogram_ix_[0];
-        last_histogram_ix_[0] = split_->num_types * num_contexts_;
-        for (size_t i = 0; i < num_contexts_; ++i) {
-          last_entropy_[num_contexts_ + i] = last_entropy_[i];
-          last_entropy_[i] = entropy[i];
-        }
-        ++num_blocks_;
-        ++split_->num_types;
-        curr_histogram_ix_ += num_contexts_;
-        block_size_ = 0;
-        merge_last_count_ = 0;
-        target_block_size_ = min_block_size_;
-      } else if (diff[1] < diff[0] - 20.0) {
-        // Combine this block with second last block.
-        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
-        split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
-        std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
-        for (size_t i = 0; i < num_contexts_; ++i) {
-          (*histograms_)[last_histogram_ix_[0] + i] =
-              combined_histo[num_contexts_ + i];
-          last_entropy_[num_contexts_ + i] = last_entropy_[i];
-          last_entropy_[i] = combined_entropy[num_contexts_ + i];
-          (*histograms_)[curr_histogram_ix_ + i].Clear();
-        }
-        ++num_blocks_;
-        block_size_ = 0;
-        merge_last_count_ = 0;
-        target_block_size_ = min_block_size_;
-      } else {
-        // Combine this block with last block.
-        split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
-        for (size_t i = 0; i < num_contexts_; ++i) {
-          (*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
-          last_entropy_[i] = combined_entropy[i];
-          if (split_->num_types == 1) {
-            last_entropy_[num_contexts_ + i] = last_entropy_[i];
-          }
-          (*histograms_)[curr_histogram_ix_ + i].Clear();
-        }
-        block_size_ = 0;
-        if (++merge_last_count_ > 1) {
-          target_block_size_ += min_block_size_;
+    if (split->num_types < self->max_block_types_ &&
+        diff[0] > self->split_threshold_ &&
+        diff[1] > self->split_threshold_) {
+      /* Create new block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = (uint8_t)split->num_types;
+      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
+      self->last_histogram_ix_[0] = split->num_types * num_contexts;
+      for (i = 0; i < num_contexts; ++i) {
+        last_entropy[num_contexts + i] = last_entropy[i];
+        last_entropy[i] = entropy[i];
+      }
+      ++self->num_blocks_;
+      ++split->num_types;
+      self->curr_histogram_ix_ += num_contexts;
+      if (self->curr_histogram_ix_ < *self->histograms_size_) {
+        ClearHistogramsLiteral(
+            &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
+      }
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else if (diff[1] < diff[0] - 20.0) {
+      /* Combine this block with second last block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
+      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
+      for (i = 0; i < num_contexts; ++i) {
+        histograms[self->last_histogram_ix_[0] + i] =
+            combined_histo[num_contexts + i];
+        last_entropy[num_contexts + i] = last_entropy[i];
+        last_entropy[i] = combined_entropy[num_contexts + i];
+        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
+      }
+      ++self->num_blocks_;
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else {
+      /* Combine this block with last block. */
+      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
+      for (i = 0; i < num_contexts; ++i) {
+        histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
+        last_entropy[i] = combined_entropy[i];
+        if (split->num_types == 1) {
+          last_entropy[num_contexts + i] = last_entropy[i];
         }
+        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
+      }
+      self->block_size_ = 0;
+      if (++self->merge_last_count_ > 1) {
+        self->target_block_size_ += self->min_block_size_;
       }
     }
-    if (is_final) {
-      (*histograms_).resize(split_->num_types * num_contexts_);
-      split_->types.resize(num_blocks_);
-      split_->lengths.resize(num_blocks_);
-    }
+    BROTLI_FREE(m, combined_entropy);
+    BROTLI_FREE(m, combined_histo);
+    BROTLI_FREE(m, entropy);
   }
+  if (is_final) {
+    *self->histograms_size_ = split->num_types * num_contexts;
+    split->num_blocks = self->num_blocks_;
+  }
+}
 
- private:
-  static const int kMaxBlockTypes = 256;
-
-  // Alphabet size of particular block category.
-  const size_t alphabet_size_;
-  const size_t num_contexts_;
-  const size_t max_block_types_;
-  // We collect at least this many symbols for each block.
-  const size_t min_block_size_;
-  // We merge histograms A and B if
-  //   entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
-  // where A is the current histogram and B is the histogram of the last or the
-  // second last block type.
-  const double split_threshold_;
-
-  size_t num_blocks_;
-  BlockSplit* split_;  // not owned
-  std::vector<HistogramType>* histograms_;  // not owned
+/* Adds the next symbol to the current block type and context. When the
+   current block reaches the target size, decides on merging the block. */
+static void ContextBlockSplitterAddSymbol(MemoryManager* m,
+    ContextBlockSplitter* self, size_t symbol, size_t context) {
+  HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
+      symbol);
+  ++self->block_size_;
+  if (self->block_size_ == self->target_block_size_) {
+    ContextBlockSplitterFinishBlock(m, self, /* is_final = */ 0);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+}
 
-  // The number of symbols that we want to collect before deciding on whether
-  // or not to merge the block with a previous one or emit a new block.
-  size_t target_block_size_;
-  // The number of symbols in the current histogram.
-  size_t block_size_;
-  // Offset of the current histogram.
-  size_t curr_histogram_ix_;
-  // Offset of the histograms of the previous two block types.
-  size_t last_histogram_ix_[2];
-  // Entropy of the previous two block types.
-  std::vector<double> last_entropy_;
-  // The number of times we merged the current block with the last one.
-  size_t merge_last_count_;
-};
-
-void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
-                                      size_t pos,
-                                      size_t mask,
-                                      uint8_t prev_byte,
-                                      uint8_t prev_byte2,
-                                      ContextType literal_context_mode,
-                                      size_t num_contexts,
-                                      const uint32_t* static_context_map,
-                                      const Command *commands,
-                                      size_t n_commands,
-                                      MetaBlockSplit* mb) {
+void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m,
+                                            const uint8_t* ringbuffer,
+                                            size_t pos,
+                                            size_t mask,
+                                            uint8_t prev_byte,
+                                            uint8_t prev_byte2,
+                                            ContextType literal_context_mode,
+                                            size_t num_contexts,
+                                            const uint32_t* static_context_map,
+                                            const Command *commands,
+                                            size_t n_commands,
+                                            MetaBlockSplit* mb) {
+  ContextBlockSplitter lit_blocks;
+  BlockSplitterCommand cmd_blocks;
+  BlockSplitterDistance dist_blocks;
   size_t num_literals = 0;
-  for (size_t i = 0; i < n_commands; ++i) {
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
     num_literals += commands[i].insert_len_;
   }
 
-  ContextBlockSplitter<HistogramLiteral> lit_blocks(
-      256, num_contexts, 512, 400.0, num_literals,
-      &mb->literal_split, &mb->literal_histograms);
-  BlockSplitter<HistogramCommand> cmd_blocks(
-      kNumCommandPrefixes, 1024, 500.0, n_commands,
-      &mb->command_split, &mb->command_histograms);
-  BlockSplitter<HistogramDistance> dist_blocks(
-      64, 512, 100.0, n_commands,
-      &mb->distance_split, &mb->distance_histograms);
-
-  for (size_t i = 0; i < n_commands; ++i) {
+  InitContextBlockSplitter(m, &lit_blocks, 256, num_contexts, 512, 400.0,
+      num_literals, &mb->literal_split, &mb->literal_histograms,
+      &mb->literal_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
+      500.0, n_commands, &mb->command_split, &mb->command_histograms,
+      &mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms,
+      &mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < n_commands; ++i) {
     const Command cmd = commands[i];
-    cmd_blocks.AddSymbol(cmd.cmd_prefix_);
-    for (size_t j = cmd.insert_len_; j != 0; --j) {
+    size_t j;
+    BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
       size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
       uint8_t literal = ringbuffer[pos & mask];
-      lit_blocks.AddSymbol(literal, static_context_map[context]);
+      ContextBlockSplitterAddSymbol(
+          m, &lit_blocks, literal, static_context_map[context]);
       prev_byte2 = prev_byte;
+      if (BROTLI_IS_OOM(m)) return;
       prev_byte = literal;
       ++pos;
     }
-    pos += cmd.copy_len();
-    if (cmd.copy_len()) {
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd)) {
       prev_byte2 = ringbuffer[(pos - 2) & mask];
       prev_byte = ringbuffer[(pos - 1) & mask];
       if (cmd.cmd_prefix_ >= 128) {
-        dist_blocks.AddSymbol(cmd.dist_prefix_);
+        BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
       }
     }
   }
 
-  lit_blocks.FinishBlock(/* is_final = */ true);
-  cmd_blocks.FinishBlock(/* is_final = */ true);
-  dist_blocks.FinishBlock(/* is_final = */ true);
-
-  mb->literal_context_map.resize(
-      mb->literal_split.num_types << kLiteralContextBits);
-  for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
-    for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
-      mb->literal_context_map[(i << kLiteralContextBits) + j] =
-          static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
+  ContextBlockSplitterFinishBlock(m, &lit_blocks, /* is_final = */ 1);
+  if (BROTLI_IS_OOM(m)) return;
+  CleanupContextBlockSplitter(m, &lit_blocks);
+  BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ 1);
+  BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ 1);
+
+  assert(mb->literal_context_map == 0);
+  mb->literal_context_map_size =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  mb->literal_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    size_t j;
+    for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
+      mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
+          (uint32_t)(i * num_contexts) + static_context_map[j];
     }
   }
 }
 
-void OptimizeHistograms(size_t num_direct_distance_codes,
-                        size_t distance_postfix_bits,
-                        MetaBlockSplit* mb) {
-  uint8_t* good_for_rle = new uint8_t[kNumCommandPrefixes];
-  for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
-    OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0],
-                                good_for_rle);
+void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
+                              size_t distance_postfix_bits,
+                              MetaBlockSplit* mb) {
+  uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
+  size_t num_distance_codes;
+  size_t i;
+  for (i = 0; i < mb->literal_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
+                                      good_for_rle);
   }
-  for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
-    OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
-                                &mb->command_histograms[i].data_[0],
-                                good_for_rle);
+  for (i = 0; i < mb->command_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
+                                      mb->command_histograms[i].data_,
+                                      good_for_rle);
   }
-  size_t num_distance_codes =
-      kNumDistanceShortCodes + num_direct_distance_codes +
-      (48u << distance_postfix_bits);
-  for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
-    OptimizeHuffmanCountsForRle(num_distance_codes,
-                                &mb->distance_histograms[i].data_[0],
-                                good_for_rle);
+  num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
+      num_direct_distance_codes + (48u << distance_postfix_bits);
+  for (i = 0; i < mb->distance_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
+                                      mb->distance_histograms[i].data_,
+                                      good_for_rle);
   }
-  delete[] good_for_rle;
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/metablock.h b/enc/metablock.h
index 35f2c87..8721221 100644
--- a/enc/metablock.h
+++ b/enc/metablock.h
@@ -10,72 +10,100 @@
 #ifndef BROTLI_ENC_METABLOCK_H_
 #define BROTLI_ENC_METABLOCK_H_
 
-#include <vector>
-
 #include "../common/types.h"
+#include "./block_splitter.h"
 #include "./command.h"
+#include "./context.h"
 #include "./histogram.h"
+#include "./memory.h"
+#include "./port.h"
 
-namespace brotli {
-
-struct BlockSplit {
-  BlockSplit(void) : num_types(0) {}
-
-  size_t num_types;
-  std::vector<uint8_t> types;
-  std::vector<uint32_t> lengths;
-};
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
-struct MetaBlockSplit {
+typedef struct MetaBlockSplit {
   BlockSplit literal_split;
   BlockSplit command_split;
   BlockSplit distance_split;
-  std::vector<uint32_t> literal_context_map;
-  std::vector<uint32_t> distance_context_map;
-  std::vector<HistogramLiteral> literal_histograms;
-  std::vector<HistogramCommand> command_histograms;
-  std::vector<HistogramDistance> distance_histograms;
-};
+  uint32_t* literal_context_map;
+  size_t literal_context_map_size;
+  uint32_t* distance_context_map;
+  size_t distance_context_map_size;
+  HistogramLiteral* literal_histograms;
+  size_t literal_histograms_size;
+  HistogramCommand* command_histograms;
+  size_t command_histograms_size;
+  HistogramDistance* distance_histograms;
+  size_t distance_histograms_size;
+} MetaBlockSplit;
+
+static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
+  BrotliInitBlockSplit(&mb->literal_split);
+  BrotliInitBlockSplit(&mb->command_split);
+  BrotliInitBlockSplit(&mb->distance_split);
+  mb->literal_context_map = 0;
+  mb->literal_context_map_size = 0;
+  mb->distance_context_map = 0;
+  mb->distance_context_map_size = 0;
+  mb->literal_histograms = 0;
+  mb->literal_histograms_size = 0;
+  mb->command_histograms = 0;
+  mb->command_histograms_size = 0;
+  mb->distance_histograms = 0;
+  mb->distance_histograms_size = 0;
+}
+
+static BROTLI_INLINE void DestroyMetaBlockSplit(
+    MemoryManager* m, MetaBlockSplit* mb) {
+  BrotliDestroyBlockSplit(m, &mb->literal_split);
+  BrotliDestroyBlockSplit(m, &mb->command_split);
+  BrotliDestroyBlockSplit(m, &mb->distance_split);
+  BROTLI_FREE(m, mb->literal_context_map);
+  BROTLI_FREE(m, mb->distance_context_map);
+  BROTLI_FREE(m, mb->literal_histograms);
+  BROTLI_FREE(m, mb->command_histograms);
+  BROTLI_FREE(m, mb->distance_histograms);
+}
 
 /* Uses the slow shortest-path block splitter and does context clustering. */
-void BuildMetaBlock(const uint8_t* ringbuffer,
-                    const size_t pos,
-                    const size_t mask,
-                    uint8_t prev_byte,
-                    uint8_t prev_byte2,
-                    const Command* cmds,
-                    size_t num_commands,
-                    ContextType literal_context_mode,
-                    MetaBlockSplit* mb);
+BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
+                                          const uint8_t* ringbuffer,
+                                          const size_t pos,
+                                          const size_t mask,
+                                          const int quality,
+                                          uint8_t prev_byte,
+                                          uint8_t prev_byte2,
+                                          const Command* cmds,
+                                          size_t num_commands,
+                                          ContextType literal_context_mode,
+                                          MetaBlockSplit* mb);
 
 /* Uses a fast greedy block splitter that tries to merge current block with the
    last or the second last block and does not do any context modeling. */
-void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
-                          size_t pos,
-                          size_t mask,
-                          const Command *commands,
-                          size_t n_commands,
-                          MetaBlockSplit* mb);
+BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(MemoryManager* m,
+                                                const uint8_t* ringbuffer,
+                                                size_t pos,
+                                                size_t mask,
+                                                const Command* commands,
+                                                size_t n_commands,
+                                                MetaBlockSplit* mb);
 
 /* Uses a fast greedy block splitter that tries to merge current block with the
    last or the second last block and uses a static context clustering which
    is the same for all block types. */
-void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
-                                      size_t pos,
-                                      size_t mask,
-                                      uint8_t prev_byte,
-                                      uint8_t prev_byte2,
-                                      ContextType literal_context_mode,
-                                      size_t num_contexts,
-                                      const uint32_t* static_context_map,
-                                      const Command *commands,
-                                      size_t n_commands,
-                                      MetaBlockSplit* mb);
+BROTLI_INTERNAL void BrotliBuildMetaBlockGreedyWithContexts(
+    MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
+    size_t num_contexts, const uint32_t* static_context_map,
+    const Command* commands, size_t n_commands, MetaBlockSplit* mb);
 
-void OptimizeHistograms(size_t num_direct_distance_codes,
-                        size_t distance_postfix_bits,
-                        MetaBlockSplit* mb);
+BROTLI_INTERNAL void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
+                                              size_t distance_postfix_bits,
+                                              MetaBlockSplit* mb);
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_METABLOCK_H_ */
diff --git a/enc/metablock_inc.h b/enc/metablock_inc.h
new file mode 100644
index 0000000..e56a4be
--- /dev/null
+++ b/enc/metablock_inc.h
@@ -0,0 +1,183 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+/* Greedy block splitter for one block category (literal, command or distance).
+*/
+typedef struct FN(BlockSplitter) {
+  /* Alphabet size of particular block category. */
+  size_t alphabet_size_;
+  /* We collect at least this many symbols for each block. */
+  size_t min_block_size_;
+  /* We merge histograms A and B if
+       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+     where A is the current histogram and B is the histogram of the last or the
+     second last block type. */
+  double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  /* not owned */
+  HistogramType* histograms_;  /* not owned */
+  size_t* histograms_size_;  /* not owned */
+
+  /* The number of symbols that we want to collect before deciding on whether
+     or not to merge the block with a previous one or emit a new block. */
+  size_t target_block_size_;
+  /* The number of symbols in the current histogram. */
+  size_t block_size_;
+  /* Offset of the current histogram. */
+  size_t curr_histogram_ix_;
+  /* Offset of the histograms of the previous two block types. */
+  size_t last_histogram_ix_[2];
+  /* Entropy of the previous two block types. */
+  double last_entropy_[2];
+  /* The number of times we merged the current block with the last one. */
+  size_t merge_last_count_;
+} FN(BlockSplitter);
+
+static void FN(InitBlockSplitter)(
+    MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
+    size_t min_block_size, double split_threshold, size_t num_symbols,
+    BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
+  size_t max_num_blocks = num_symbols / min_block_size + 1;
+  /* We have to allocate one more histogram than the maximum number of block
+     types for the current histogram when the meta-block is too big. */
+  size_t max_num_types =
+      BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
+  self->alphabet_size_ = alphabet_size;
+  self->min_block_size_ = min_block_size;
+  self->split_threshold_ = split_threshold;
+  self->num_blocks_ = 0;
+  self->split_ = split;
+  self->histograms_size_ = histograms_size;
+  self->target_block_size_ = min_block_size;
+  self->block_size_ = 0;
+  self->curr_histogram_ix_ = 0;
+  self->merge_last_count_ = 0;
+  BROTLI_ENSURE_CAPACITY(m, uint8_t,
+      split->types, split->types_alloc_size, max_num_blocks);
+  BROTLI_ENSURE_CAPACITY(m, uint32_t,
+      split->lengths, split->lengths_alloc_size, max_num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  self->split_->num_blocks = max_num_blocks;
+  assert(*histograms == 0);
+  *histograms_size = max_num_types;
+  *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
+  self->histograms_ = *histograms;
+  if (BROTLI_IS_OOM(m)) return;
+  /* Clear only current histogram. */
+  FN(HistogramClear)(&self->histograms_[0]);
+  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
+}
+
+/* Does either of three things:
+     (1) emits the current block with a new block type;
+     (2) emits the current block with the type of the second last block;
+     (3) merges the current block with the last block. */
+static void FN(BlockSplitterFinishBlock)(FN(BlockSplitter)* self,
+    int is_final) {
+  BlockSplit* split = self->split_;
+  double* last_entropy = self->last_entropy_;
+  HistogramType* histograms = self->histograms_;
+  self->block_size_ =
+      BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
+  if (self->num_blocks_ == 0) {
+    /* Create first block. */
+    split->lengths[0] = (uint32_t)self->block_size_;
+    split->types[0] = 0;
+    last_entropy[0] =
+        BitsEntropy(histograms[0].data_, self->alphabet_size_);
+    last_entropy[1] = last_entropy[0];
+    ++self->num_blocks_;
+    ++split->num_types;
+    ++self->curr_histogram_ix_;
+    if (self->curr_histogram_ix_ < *self->histograms_size_)
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+    self->block_size_ = 0;
+  } else if (self->block_size_ > 0) {
+    double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
+                                 self->alphabet_size_);
+    HistogramType combined_histo[2];
+    double combined_entropy[2];
+    double diff[2];
+    size_t j;
+    for (j = 0; j < 2; ++j) {
+      size_t last_histogram_ix = self->last_histogram_ix_[j];
+      combined_histo[j] = histograms[self->curr_histogram_ix_];
+      FN(HistogramAddHistogram)(&combined_histo[j],
+          &histograms[last_histogram_ix]);
+      combined_entropy[j] = BitsEntropy(
+          &combined_histo[j].data_[0], self->alphabet_size_);
+      diff[j] = combined_entropy[j] - entropy - last_entropy[j];
+    }
+
+    if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
+        diff[0] > self->split_threshold_ &&
+        diff[1] > self->split_threshold_) {
+      /* Create new block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = (uint8_t)split->num_types;
+      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
+      self->last_histogram_ix_[0] = (uint8_t)split->num_types;
+      last_entropy[1] = last_entropy[0];
+      last_entropy[0] = entropy;
+      ++self->num_blocks_;
+      ++split->num_types;
+      ++self->curr_histogram_ix_;
+      if (self->curr_histogram_ix_ < *self->histograms_size_)
+        FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else if (diff[1] < diff[0] - 20.0) {
+      /* Combine this block with second last block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
+      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
+      histograms[self->last_histogram_ix_[0]] = combined_histo[1];
+      last_entropy[1] = last_entropy[0];
+      last_entropy[0] = combined_entropy[1];
+      ++self->num_blocks_;
+      self->block_size_ = 0;
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else {
+      /* Combine this block with last block. */
+      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
+      histograms[self->last_histogram_ix_[0]] = combined_histo[0];
+      last_entropy[0] = combined_entropy[0];
+      if (split->num_types == 1) {
+        last_entropy[1] = last_entropy[0];
+      }
+      self->block_size_ = 0;
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      if (++self->merge_last_count_ > 1) {
+        self->target_block_size_ += self->min_block_size_;
+      }
+    }
+  }
+  if (is_final) {
+    *self->histograms_size_ = split->num_types;
+    split->num_blocks = self->num_blocks_;
+  }
+}
+
+/* Adds the next symbol to the current histogram. When the current histogram
+   reaches the target size, decides on merging the block. */
+static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
+  FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
+  ++self->block_size_;
+  if (self->block_size_ == self->target_block_size_) {
+    FN(BlockSplitterFinishBlock)(self, /* is_final = */ 0);
+  }
+}
+
+#undef HistogramType
diff --git a/enc/port.h b/enc/port.h
index a9c9ffe..7c92c69 100644
--- a/enc/port.h
+++ b/enc/port.h
@@ -62,13 +62,13 @@
    but note: the FPU still sends unaligned loads and stores to a trap handler!
 */
 
-#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
-#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
+#define BROTLI_UNALIGNED_LOAD32(_p) (*(const uint32_t *)(_p))
+#define BROTLI_UNALIGNED_LOAD64(_p) (*(const uint64_t *)(_p))
 
 #define BROTLI_UNALIGNED_STORE32(_p, _val) \
-  (*reinterpret_cast<uint32_t *>(_p) = (_val))
+  (*(uint32_t *)(_p) = (_val))
 #define BROTLI_UNALIGNED_STORE64(_p, _val) \
-  (*reinterpret_cast<uint64_t *>(_p) = (_val))
+  (*(uint64_t *)(_p) = (_val))
 
 #elif defined(__arm__) && \
   !defined(__ARM_ARCH_5__) && \
@@ -87,17 +87,17 @@
    do an unaligned read and rotate the words around a bit, or do the reads very
    slowly (trip through kernel mode). */
 
-#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_LOAD32(_p) (*(const uint32_t *)(_p))
 #define BROTLI_UNALIGNED_STORE32(_p, _val) \
-  (*reinterpret_cast<uint32_t *>(_p) = (_val))
+  (*(uint32_t *)(_p) = (_val))
 
-static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
   uint64_t t;
   memcpy(&t, p, sizeof t);
   return t;
 }
 
-static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
   memcpy(p, &v, sizeof v);
 }
 
@@ -106,26 +106,63 @@ static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
 /* These functions are provided for architectures that don't support */
 /* unaligned loads and stores. */
 
-static inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
+static BROTLI_INLINE uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
   uint32_t t;
   memcpy(&t, p, sizeof t);
   return t;
 }
 
-static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
   uint64_t t;
   memcpy(&t, p, sizeof t);
   return t;
 }
 
-static inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
   memcpy(p, &v, sizeof v);
 }
 
-static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
   memcpy(p, &v, sizeof v);
 }
 
 #endif
 
+#if !defined(__cplusplus) && !defined(c_plusplus) && __STDC_VERSION__ >= 199901L
+#define BROTLI_RESTRICT restrict
+#elif BROTLI_GCC_VERSION > 295 || defined(__llvm__)
+#define BROTLI_RESTRICT __restrict
+#else
+#define BROTLI_RESTRICT
+#endif
+
+#define _TEMPLATE(T)                                                           \
+  static BROTLI_INLINE T brotli_min_ ## T (T a, T b) { return a < b ? a : b; } \
+  static BROTLI_INLINE T brotli_max_ ## T (T a, T b) { return a > b ? a : b; }
+_TEMPLATE(double) _TEMPLATE(float) _TEMPLATE(int)
+_TEMPLATE(size_t) _TEMPLATE(uint32_t) _TEMPLATE(uint8_t)
+#undef _TEMPLATE
+#define BROTLI_MIN(T, A, B) (brotli_min_ ## T((A), (B)))
+#define BROTLI_MAX(T, A, B) (brotli_max_ ## T((A), (B)))
+
+#define BROTLI_SWAP(T, A, I, J) { \
+  T __brotli_swap_tmp = (A)[(I)]; \
+  (A)[(I)] = (A)[(J)];            \
+  (A)[(J)] = __brotli_swap_tmp;   \
+}
+
+#define BROTLI_ENSURE_CAPACITY(M, T, A, C, R) {  \
+  if (C < (R)) {                                 \
+    size_t _new_size = (C == 0) ? (R) : C;       \
+    T* new_array;                                \
+    while (_new_size < (R)) _new_size *= 2;      \
+    new_array = BROTLI_ALLOC((M), T, _new_size); \
+    if (!BROTLI_IS_OOM(m))                       \
+      memcpy(new_array, A, C * sizeof(T));       \
+    BROTLI_FREE((M), A);                         \
+    A = new_array;                               \
+    C = _new_size;                               \
+  }                                              \
+}
+
 #endif  /* BROTLI_ENC_PORT_H_ */
diff --git a/enc/prefix.h b/enc/prefix.h
index 237e68f..422accd 100644
--- a/enc/prefix.h
+++ b/enc/prefix.h
@@ -10,70 +10,43 @@
 #ifndef BROTLI_ENC_PREFIX_H_
 #define BROTLI_ENC_PREFIX_H_
 
+#include "../common/constants.h"
+#include "../common/port.h"
 #include "../common/types.h"
 #include "./fast_log.h"
 
-namespace brotli {
-
-static const uint32_t kNumInsertLenPrefixes = 24;
-static const uint32_t kNumCopyLenPrefixes = 24;
-static const uint32_t kNumCommandPrefixes = 704;
-static const uint32_t kNumBlockLenPrefixes = 26;
-static const uint32_t kNumDistanceShortCodes = 16;
-static const uint32_t kNumDistancePrefixes = 520;
-
-// Represents the range of values belonging to a prefix code:
-// [offset, offset + 2^nbits)
-struct PrefixCodeRange {
-  uint32_t offset;
-  uint32_t nbits;
-};
-
-static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
-  {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2},
-  {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3},
-  {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4},
-  { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5},
-  { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8},
-  { 753,  9}, { 1265, 10}, {2289, 11}, {4337, 12},
-  {8433, 13}, {16625, 24}
-};
-
-inline void GetBlockLengthPrefixCode(uint32_t len, uint32_t* code,
-                                     uint32_t* n_extra, uint32_t* extra) {
-  *code = 0;
-  while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
-    ++(*code);
-  }
-  *n_extra = kBlockLengthPrefixCode[*code].nbits;
-  *extra = len - kBlockLengthPrefixCode[*code].offset;
-}
-
-inline void PrefixEncodeCopyDistance(size_t distance_code,
-                                     size_t num_direct_codes,
-                                     size_t postfix_bits,
-                                     uint16_t* code,
-                                     uint32_t* extra_bits) {
-  if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
-    *code = static_cast<uint16_t>(distance_code);
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
+                                                   size_t num_direct_codes,
+                                                   size_t postfix_bits,
+                                                   uint16_t* code,
+                                                   uint32_t* extra_bits) {
+  if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
+    *code = (uint16_t)distance_code;
     *extra_bits = 0;
     return;
+  } else {
+    size_t dist = (1u << (postfix_bits + 2u)) +
+        (distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
+    size_t bucket = Log2FloorNonZero(dist) - 1;
+    size_t postfix_mask = (1u << postfix_bits) - 1;
+    size_t postfix = dist & postfix_mask;
+    size_t prefix = (dist >> bucket) & 1;
+    size_t offset = (2 + prefix) << bucket;
+    size_t nbits = bucket - postfix_bits;
+    *code = (uint16_t)(
+        (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
+         ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
+    *extra_bits = (uint32_t)(
+        (nbits << 24) | ((dist - offset) >> postfix_bits));
   }
-  distance_code -= kNumDistanceShortCodes + num_direct_codes;  /* >= 0 */
-  distance_code += (1u << (postfix_bits + 2u));  /* > 0 */
-  size_t bucket = Log2FloorNonZero(distance_code) - 1;
-  size_t postfix_mask = (1 << postfix_bits) - 1;
-  size_t postfix = distance_code & postfix_mask;
-  size_t prefix = (distance_code >> bucket) & 1;
-  size_t offset = (2 + prefix) << bucket;
-  size_t nbits = bucket - postfix_bits;
-  *code = static_cast<uint16_t>(
-      (kNumDistanceShortCodes + num_direct_codes +
-       ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
-  *extra_bits = static_cast<uint32_t>(
-      (nbits << 24) | ((distance_code - offset) >> postfix_bits));
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_PREFIX_H_ */
diff --git a/enc/ringbuffer.h b/enc/ringbuffer.h
index 5c4b569..e2a7599 100644
--- a/enc/ringbuffer.h
+++ b/enc/ringbuffer.h
@@ -9,12 +9,15 @@
 #ifndef BROTLI_ENC_RINGBUFFER_H_
 #define BROTLI_ENC_RINGBUFFER_H_
 
-#include <cstdlib>  /* free, realloc */
+#include <string.h>  /* memcpy */
 
 #include "../common/types.h"
+#include "./memory.h"
 #include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
    data in a circular manner: writing a byte writes it to:
@@ -25,121 +28,130 @@ namespace brotli {
    and another copy of the last two bytes:
      buffer_[-1] == buffer_[(1 << window_bits) - 1] and
      buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
-class RingBuffer {
- public:
-  RingBuffer(int window_bits, int tail_bits)
-      : size_(1u << window_bits),
-        mask_((1u << window_bits) - 1),
-        tail_size_(1u << tail_bits),
-        total_size_(size_ + tail_size_),
-        cur_size_(0),
-        pos_(0),
-        data_(0),
-        buffer_(0) {}
-
-  ~RingBuffer(void) {
-    free(data_);
-  }
+typedef struct RingBuffer {
+  /* Size of the ringbuffer is (1 << window_bits) + tail_size_. */
+  const uint32_t size_;
+  const uint32_t mask_;
+  const uint32_t tail_size_;
+  const uint32_t total_size_;
+
+  uint32_t cur_size_;
+  /* Position to write in the ring buffer. */
+  uint32_t pos_;
+  /* The actual ring buffer containing the copy of the last two bytes, the data,
+     and the copy of the beginning as a tail. */
+  uint8_t *data_;
+  /* The start of the ringbuffer. */
+  uint8_t *buffer_;
+} RingBuffer;
+
+static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
+  rb->cur_size_ = 0;
+  rb->pos_ = 0;
+  rb->data_ = 0;
+  rb->buffer_ = 0;
+}
+
+static BROTLI_INLINE void RingBufferSetup(
+    int window_bits, int tail_bits, RingBuffer* rb) {
+  *(uint32_t*)&rb->size_ = 1u << window_bits;
+  *(uint32_t*)&rb->mask_ = (1u << window_bits) - 1;
+  *(uint32_t*)&rb->tail_size_ = 1u << tail_bits;
+  *(uint32_t*)&rb->total_size_ = rb->size_ + rb->tail_size_;
+}
+
+static BROTLI_INLINE void RingBufferFree(MemoryManager* m, RingBuffer* rb) {
+  BROTLI_FREE(m, rb->data_);
+}
 
 /* Allocates or re-allocates data_ to the given length + plus some slack
    region before and after. Fills the slack regions with zeros. */
-  inline void InitBuffer(const uint32_t buflen) {
-    static const size_t kSlackForEightByteHashingEverywhere = 7;
-    cur_size_ = buflen;
-    data_ = static_cast<uint8_t*>(realloc(
-        data_, 2 + buflen + kSlackForEightByteHashingEverywhere));
-    buffer_ = data_ + 2;
-    buffer_[-2] = buffer_[-1] = 0;
-    for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
-      buffer_[cur_size_ + i] = 0;
-    }
+static BROTLI_INLINE void RingBufferInitBuffer(
+    MemoryManager* m, const uint32_t buflen, RingBuffer* rb) {
+  static const size_t kSlackForEightByteHashingEverywhere = 7;
+  uint8_t* new_data = BROTLI_ALLOC(
+      m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
+  size_t i;
+  if (BROTLI_IS_OOM(m)) return;
+  if (rb->data_) {
+    memcpy(new_data, rb->data_,
+        2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
+    BROTLI_FREE(m, rb->data_);
+  }
+  rb->data_ = new_data;
+  rb->cur_size_ = buflen;
+  rb->buffer_ = rb->data_ + 2;
+  rb->buffer_[-2] = rb->buffer_[-1] = 0;
+  for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
+    rb->buffer_[rb->cur_size_ + i] = 0;
   }
+}
+
+static BROTLI_INLINE void RingBufferWriteTail(
+    const uint8_t *bytes, size_t n, RingBuffer* rb) {
+  const size_t masked_pos = rb->pos_ & rb->mask_;
+  if (PREDICT_FALSE(masked_pos < rb->tail_size_)) {
+    /* Just fill the tail buffer with the beginning data. */
+    const size_t p = rb->size_ + masked_pos;
+    memcpy(&rb->buffer_[p], bytes,
+        BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
+  }
+}
 
 /* Push bytes into the ring buffer. */
-  void Write(const uint8_t *bytes, size_t n) {
-    if (pos_ == 0 && n < tail_size_) {
+static BROTLI_INLINE void RingBufferWrite(
+    MemoryManager* m, const uint8_t *bytes, size_t n, RingBuffer* rb) {
+  if (rb->pos_ == 0 && n < rb->tail_size_) {
     /* Special case for the first write: to process the first block, we don't
        need to allocate the whole ringbuffer and we don't need the tail
        either. However, we do this memory usage optimization only if the
        first write is less than the tail size, which is also the input block
        size, otherwise it is likely that other blocks will follow and we
        will need to reallocate to the full size anyway. */
-      pos_ = static_cast<uint32_t>(n);
-      InitBuffer(pos_);
-      memcpy(buffer_, bytes, n);
-      return;
-    }
-    if (cur_size_ < total_size_) {
+    rb->pos_ = (uint32_t)n;
+    RingBufferInitBuffer(m, rb->pos_, rb);
+    if (BROTLI_IS_OOM(m)) return;
+    memcpy(rb->buffer_, bytes, n);
+    return;
+  }
+  if (rb->cur_size_ < rb->total_size_) {
     /* Lazily allocate the full buffer. */
-      InitBuffer(total_size_);
+    RingBufferInitBuffer(m, rb->total_size_, rb);
+    if (BROTLI_IS_OOM(m)) return;
     /* Initialize the last two bytes to zero, so that we don't have to worry
        later when we copy the last two bytes to the first two positions. */
-      buffer_[size_ - 2] = 0;
-      buffer_[size_ - 1] = 0;
-    }
-    const size_t masked_pos = pos_ & mask_;
+    rb->buffer_[rb->size_ - 2] = 0;
+    rb->buffer_[rb->size_ - 1] = 0;
+  }
+  {
+    const size_t masked_pos = rb->pos_ & rb->mask_;
     /* The length of the writes is limited so that we do not need to worry
        about a write */
-    WriteTail(bytes, n);
-    if (PREDICT_TRUE(masked_pos + n <= size_)) {
+    RingBufferWriteTail(bytes, n, rb);
+    if (PREDICT_TRUE(masked_pos + n <= rb->size_)) {
       /* A single write fits. */
-      memcpy(&buffer_[masked_pos], bytes, n);
+      memcpy(&rb->buffer_[masked_pos], bytes, n);
     } else {
       /* Split into two writes.
          Copy into the end of the buffer, including the tail buffer. */
-      memcpy(&buffer_[masked_pos], bytes,
-             std::min(n, total_size_ - masked_pos));
+      memcpy(&rb->buffer_[masked_pos], bytes,
+             BROTLI_MIN(size_t, n, rb->total_size_ - masked_pos));
       /* Copy into the beginning of the buffer */
-      memcpy(&buffer_[0], bytes + (size_ - masked_pos),
-             n - (size_ - masked_pos));
-    }
-    buffer_[-2] = buffer_[size_ - 2];
-    buffer_[-1] = buffer_[size_ - 1];
-    pos_ += static_cast<uint32_t>(n);
-    if (pos_ > (1u << 30)) {  /* Wrap, but preserve not-a-first-lap feature. */
-      pos_ = (pos_ & ((1u << 30) - 1)) | (1u << 30);
+      memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
+             n - (rb->size_ - masked_pos));
     }
   }
-
-  void Reset(void) {
-    pos_ = 0;
+  rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
+  rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
+  rb->pos_ += (uint32_t)n;
+  if (rb->pos_ > (1u << 30)) {
+    /* Wrap, but preserve not-a-first-lap feature. */
+    rb->pos_ = (rb->pos_ & ((1u << 30) - 1)) | (1u << 30);
   }
+}
 
-  // Logical cursor position in the ring buffer.
-  uint32_t position(void) const { return pos_; }
-
-  // Bit mask for getting the physical position for a logical position.
-  uint32_t mask(void) const { return mask_; }
-
-  uint8_t *start(void) { return &buffer_[0]; }
-  const uint8_t *start(void) const { return &buffer_[0]; }
-
- private:
-  void WriteTail(const uint8_t *bytes, size_t n) {
-    const size_t masked_pos = pos_ & mask_;
-    if (PREDICT_FALSE(masked_pos < tail_size_)) {
-      // Just fill the tail buffer with the beginning data.
-      const size_t p = size_ + masked_pos;
-      memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos));
-    }
-  }
-
-  // Size of the ringbuffer is (1 << window_bits) + tail_size_.
-  const uint32_t size_;
-  const uint32_t mask_;
-  const uint32_t tail_size_;
-  const uint32_t total_size_;
-
-  uint32_t cur_size_;
-  // Position to write in the ring buffer.
-  uint32_t pos_;
-  // The actual ring buffer containing the copy of the last two bytes, the data,
-  // and the copy of the beginning as a tail.
-  uint8_t *data_;
-  // The start of the ringbuffer.
-  uint8_t *buffer_;
-};
-
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_RINGBUFFER_H_ */
diff --git a/enc/static_dict.c b/enc/static_dict.c
index 6a34b05..1b52dea 100644
--- a/enc/static_dict.c
+++ b/enc/static_dict.c
@@ -6,86 +6,102 @@
 
 #include "./static_dict.h"
 
-#include <algorithm>
-
 #include "../common/dictionary.h"
 #include "./find_match_length.h"
+#include "./port.h"
 #include "./static_dict_lut.h"
-#include "./transform.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static const uint8_t kUppercaseFirst = 10;
+static const uint8_t kOmitLastNTransforms[10] = {
+  0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
+};
 
-inline uint32_t Hash(const uint8_t *data) {
+static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
   uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
   /* The higher bits contain more mixture from the multiplication,
      so we take our results from there. */
   return h >> (32 - kDictNumBits);
 }
 
-inline void AddMatch(size_t distance, size_t len, size_t len_code,
-                     uint32_t* matches) {
-  uint32_t match = static_cast<uint32_t>((distance << 5) + len_code);
-  matches[len] = std::min(matches[len], match);
+static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
+                                   uint32_t* matches) {
+  uint32_t match = (uint32_t)((distance << 5) + len_code);
+  matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
 }
 
-inline size_t DictMatchLength(const uint8_t* data,
-                              size_t id,
-                              size_t len,
-                              size_t maxlen) {
+static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data,
+                                            size_t id,
+                                            size_t len,
+                                            size_t maxlen) {
   const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
   return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
-                                  std::min(len, maxlen));
+                                  BROTLI_MIN(size_t, len, maxlen));
 }
 
-inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
-  if (w.len > max_length) return false;
-  const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
-  const uint8_t* dict = &kBrotliDictionary[offset];
-  if (w.transform == 0) {
+static BROTLI_INLINE int IsMatch(
+    DictWord w, const uint8_t* data, size_t max_length) {
+  if (w.len > max_length) {
+    return 0;
+  } else {
+    const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] +
+        (size_t)w.len * (size_t)w.idx;
+    const uint8_t* dict = &kBrotliDictionary[offset];
+    if (w.transform == 0) {
       /* Match against base dictionary word. */
-    return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
-  } else if (w.transform == 10) {
+      return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
+    } else if (w.transform == 10) {
       /* Match against uppercase first transform.
          Note that there are only ASCII uppercase words in the lookup table. */
-    return (dict[0] >= 'a' && dict[0] <= 'z' &&
-            (dict[0] ^ 32) == data[0] &&
-            FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
-            w.len - 1u);
-  } else {
+      return (dict[0] >= 'a' && dict[0] <= 'z' &&
+              (dict[0] ^ 32) == data[0] &&
+              FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
+              w.len - 1u);
+    } else {
       /* Match against uppercase all transform.
          Note that there are only ASCII uppercase words in the lookup table. */
-    for (size_t i = 0; i < w.len; ++i) {
-      if (dict[i] >= 'a' && dict[i] <= 'z') {
-        if ((dict[i] ^ 32) != data[i]) return false;
-      } else {
-        if (dict[i] != data[i]) return false;
+      size_t i;
+      for (i = 0; i < w.len; ++i) {
+        if (dict[i] >= 'a' && dict[i] <= 'z') {
+          if ((dict[i] ^ 32) != data[i]) return 0;
+        } else {
+          if (dict[i] != data[i]) return 0;
+        }
       }
+      return 1;
     }
-    return true;
   }
 }
 
-bool FindAllStaticDictionaryMatches(const uint8_t* data,
-                                    size_t min_length,
-                                    size_t max_length,
-                                    uint32_t* matches) {
-  bool found_match = false;
-  size_t key = Hash(data);
-  size_t bucket = kStaticDictionaryBuckets[key];
-  if (bucket != 0) {
-    size_t num = bucket & 0xff;
-    size_t offset = bucket >> 8;
-    for (size_t i = 0; i < num; ++i) {
+int BrotliFindAllStaticDictionaryMatches(const uint8_t* data,
+                                         size_t min_length,
+                                         size_t max_length,
+                                         uint32_t* matches) {
+  int has_found_match = 0;
+  size_t key0 = Hash(data);
+  size_t bucket0 = kStaticDictionaryBuckets[key0];
+  if (bucket0 != 0) {
+    size_t num = bucket0 & 0xff;
+    size_t offset = bucket0 >> 8;
+    size_t i;
+    for (i = 0; i < num; ++i) {
       const DictWord w = kStaticDictionaryWords[offset + i];
       const size_t l = w.len;
       const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
       const size_t id = w.idx;
       if (w.transform == 0) {
         const size_t matchlen = DictMatchLength(data, id, l, max_length);
+        const uint8_t* s;
+        size_t minlen;
+        size_t maxlen;
+        size_t len;
         /* Transform "" + kIdentity + "" */
         if (matchlen == l) {
           AddMatch(id, l, l, matches);
-          found_match = true;
+          has_found_match = 1;
         }
         /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
         if (matchlen >= l - 1) {
@@ -95,20 +111,20 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
               data[l + 2] == ' ') {
             AddMatch(id + 49 * n, l + 3, l, matches);
           }
-          found_match = true;
+          has_found_match = 1;
         }
         /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
-        size_t minlen = min_length;
-        if (l > 9) minlen = std::max(minlen, l - 9);
-        size_t maxlen = std::min(matchlen, l - 2);
-        for (size_t len = minlen; len <= maxlen; ++len) {
+        minlen = min_length;
+        if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
+        maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
+        for (len = minlen; len <= maxlen; ++len) {
           AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
-          found_match = true;
+          has_found_match = 1;
         }
         if (matchlen < l || l + 6 >= max_length) {
           continue;
         }
-        const uint8_t* s = &data[l];
+        s = &data[l];
         /* Transforms "" + kIdentity + <suffix> */
         if (s[0] == ' ') {
           AddMatch(id + n, l + 1, l, matches);
@@ -258,44 +274,45 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
       } else {
         /* Set is_all_caps=0 for kUppercaseFirst and
                is_all_caps=1 otherwise (kUppercaseAll) transform. */
-        const bool t = w.transform != kUppercaseFirst;
+        const int is_all_caps = (w.transform != kUppercaseFirst) ? 1 : 0;
+        const uint8_t* s;
         if (!IsMatch(w, data, max_length)) {
           continue;
         }
         /* Transform "" + kUppercase{First,All} + "" */
-        AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
-        found_match = true;
+        AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
+        has_found_match = 1;
         if (l + 1 >= max_length) {
           continue;
         }
         /* Transforms "" + kUppercase{First,All} + <suffix> */
-        const uint8_t* s = &data[l];
+        s = &data[l];
         if (s[0] == ' ') {
-          AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
         } else if (s[0] == '"') {
-          AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
           if (s[1] == '>') {
-            AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches);
+            AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
           }
         } else if (s[0] == '.') {
-          AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
           if (s[1] == ' ') {
-            AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches);
+            AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
           }
         } else if (s[0] == ',') {
-          AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
           if (s[1] == ' ') {
-            AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches);
+            AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
           }
         } else if (s[0] == '\'') {
-          AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
         } else if (s[0] == '(') {
-          AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
         } else if (s[0] == '=') {
           if (s[1] == '"') {
-            AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches);
+            AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
           } else if (s[1] == '\'') {
-            AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches);
+            AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
           }
         }
       }
@@ -303,29 +320,31 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
   }
   /* Transforms with prefixes " " and "." */
   if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
-    bool is_space = (data[0] == ' ');
-    key = Hash(&data[1]);
-    bucket = kStaticDictionaryBuckets[key];
-    size_t num = bucket & 0xff;
-    size_t offset = bucket >> 8;
-    for (size_t i = 0; i < num; ++i) {
+    int is_space = (data[0] == ' ') ? 1 : 0;
+    size_t key1 = Hash(&data[1]);
+    size_t bucket1 = kStaticDictionaryBuckets[key1];
+    size_t num = bucket1 & 0xff;
+    size_t offset = bucket1 >> 8;
+    size_t i;
+    for (i = 0; i < num; ++i) {
       const DictWord w = kStaticDictionaryWords[offset + i];
       const size_t l = w.len;
       const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
       const size_t id = w.idx;
       if (w.transform == 0) {
+        const uint8_t* s;
         if (!IsMatch(w, &data[1], max_length - 1)) {
           continue;
         }
         /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
         AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
-        found_match = true;
+        has_found_match = 1;
         if (l + 2 >= max_length) {
           continue;
         }
         /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
         */
-        const uint8_t* s = &data[l + 1];
+        s = &data[l + 1];
         if (s[0] == ' ') {
           AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
         } else if (s[0] == '(') {
@@ -352,37 +371,38 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
       } else if (is_space) {
         /* Set is_all_caps=0 for kUppercaseFirst and
                is_all_caps=1 otherwise (kUppercaseAll) transform. */
-        const bool t = w.transform != kUppercaseFirst;
+        const int is_all_caps = (w.transform != kUppercaseFirst) ? 1 : 0;
+        const uint8_t* s;
         if (!IsMatch(w, &data[1], max_length - 1)) {
           continue;
         }
         /* Transforms " " + kUppercase{First,All} + "" */
-        AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
-        found_match = true;
+        AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
+        has_found_match = 1;
         if (l + 2 >= max_length) {
           continue;
         }
         /* Transforms " " + kUppercase{First,All} + <suffix> */
-        const uint8_t* s = &data[l + 1];
+        s = &data[l + 1];
         if (s[0] == ' ') {
-          AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
+          AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
         } else if (s[0] == ',') {
-          if (!t) {
+          if (!is_all_caps) {
             AddMatch(id + 109 * n, l + 2, l, matches);
-        }
+          }
           if (s[1] == ' ') {
-            AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches);
+            AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
           }
         } else if (s[0] == '.') {
-          AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches);
+          AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
           if (s[1] == ' ') {
-            AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches);
+            AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
           }
         } else if (s[0] == '=') {
           if (s[1] == '"') {
-            AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches);
+            AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
           } else if (s[1] == '\'') {
-            AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches);
+            AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
           }
         }
       }
@@ -393,11 +413,12 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
     if ((data[1] == ' ' &&
          (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
         (data[0] == 0xc2 && data[1] == 0xa0)) {
-      key = Hash(&data[2]);
-      bucket = kStaticDictionaryBuckets[key];
-      size_t num = bucket & 0xff;
-      size_t offset = bucket >> 8;
-      for (size_t i = 0; i < num; ++i) {
+      size_t key2 = Hash(&data[2]);
+      size_t bucket2 = kStaticDictionaryBuckets[key2];
+      size_t num = bucket2 & 0xff;
+      size_t offset = bucket2 >> 8;
+      size_t i;
+      for (i = 0; i < num; ++i) {
         const DictWord w = kStaticDictionaryWords[offset + i];
         const size_t l = w.len;
         const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
@@ -405,11 +426,11 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
         if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
           if (data[0] == 0xc2) {
             AddMatch(id + 102 * n, l + 2, l, matches);
-            found_match = true;
+            has_found_match = 1;
           } else if (l + 2 < max_length && data[l + 2] == ' ') {
             size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
             AddMatch(id + t * n, l + 3, l, matches);
-            found_match = true;
+            has_found_match = 1;
           }
         }
       }
@@ -421,18 +442,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
          data[3] == 'e' && data[4] == ' ') ||
         (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
          data[3] == 'm' && data[4] == '/')) {
-      key = Hash(&data[5]);
-      bucket = kStaticDictionaryBuckets[key];
-      size_t num = bucket & 0xff;
-      size_t offset = bucket >> 8;
-      for (size_t i = 0; i < num; ++i) {
+      size_t key5 = Hash(&data[5]);
+      size_t bucket5 = kStaticDictionaryBuckets[key5];
+      size_t num = bucket5 & 0xff;
+      size_t offset = bucket5 >> 8;
+      size_t i;
+      for (i = 0; i < num; ++i) {
         const DictWord w = kStaticDictionaryWords[offset + i];
         const size_t l = w.len;
         const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
         const size_t id = w.idx;
         if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
           AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
-          found_match = true;
+          has_found_match = 1;
           if (l + 5 < max_length) {
             const uint8_t* s = &data[l + 5];
             if (data[0] == ' ') {
@@ -450,7 +472,9 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
       }
     }
   }
-  return found_match;
+  return has_found_match;
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/static_dict.h b/enc/static_dict.h
index 7891186..24ccf49 100644
--- a/enc/static_dict.h
+++ b/enc/static_dict.h
@@ -10,10 +10,13 @@
 #define BROTLI_ENC_STATIC_DICT_H_
 
 #include "../common/types.h"
+#include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
-static const size_t kMaxDictionaryMatchLen = 37;
+#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
 static const uint32_t kInvalidMatch = 0xfffffff;
 
 /* Matches data against static dictionary words, and for each length l,
@@ -23,11 +26,13 @@ static const uint32_t kInvalidMatch = 0xfffffff;
    Prerequisites:
      matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
      all elements are initialized to kInvalidMatch */
-bool FindAllStaticDictionaryMatches(const uint8_t* data,
-                                    size_t min_length,
-                                    size_t max_length,
-                                    uint32_t* matches);
-
-}  // namespace brotli
+BROTLI_INTERNAL int BrotliFindAllStaticDictionaryMatches(const uint8_t* data,
+                                                         size_t min_length,
+                                                         size_t max_length,
+                                                         uint32_t* matches);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_STATIC_DICT_H_ */
diff --git a/enc/static_dict_lut.h b/enc/static_dict_lut.h
index b00a5f7..615c9a3 100644
--- a/enc/static_dict_lut.h
+++ b/enc/static_dict_lut.h
@@ -6,21 +6,23 @@
 
 /* Lookup table for static dictionary and transforms. */
 
-#ifndef BROTLI_ENC_DICTIONARY_LUT_H_
-#define BROTLI_ENC_DICTIONARY_LUT_H_
+#ifndef BROTLI_ENC_STATIC_DICT_LUT_H_
+#define BROTLI_ENC_STATIC_DICT_LUT_H_
 
 #include "../common/types.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 static const int kDictNumBits = 15;
 static const uint32_t kDictHashMul32 = 0x1e35a7bd;
 
-struct DictWord {
+typedef struct DictWord {
   uint8_t len;
   uint8_t transform;
   uint16_t idx;
-};
+} DictWord;
 
 static const uint32_t kStaticDictionaryBuckets[] = {
  0x000002, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000,
@@ -12050,6 +12052,8 @@ static const DictWord kStaticDictionaryWords[] = {
   { 12, 10,   542 }, { 14, 11,   410 }, {  9, 11,   660 }, { 10, 11,   347 },
 };
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_STATIC_DICT_LUT_H_ */
diff --git a/enc/streams.h b/enc/streams.h
index 7a595ea..2ae7733 100644
--- a/enc/streams.h
+++ b/enc/streams.h
@@ -13,7 +13,6 @@
 #include <string>
 
 #include "../common/types.h"
-#include "./port.h"
 
 namespace brotli {
 
diff --git a/enc/transform.h b/enc/transform.h
deleted file mode 100644
index 9f3e41f..0000000
--- a/enc/transform.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/* Copyright 2010 Google Inc. All Rights Reserved.
-
-   Distributed under MIT license.
-   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-
-// Transformations on dictionary words.
-
-#ifndef BROTLI_ENC_TRANSFORM_H_
-#define BROTLI_ENC_TRANSFORM_H_
-
-#include <string>
-
-#include "../common/dictionary.h"
-
-namespace brotli {
-
-enum WordTransformType {
-  kIdentity       = 0,
-  kOmitLast1      = 1,
-  kOmitLast2      = 2,
-  kOmitLast3      = 3,
-  kOmitLast4      = 4,
-  kOmitLast5      = 5,
-  kOmitLast6      = 6,
-  kOmitLast7      = 7,
-  kOmitLast8      = 8,
-  kOmitLast9      = 9,
-  kUppercaseFirst = 10,
-  kUppercaseAll   = 11,
-  kOmitFirst1     = 12,
-  kOmitFirst2     = 13,
-  kOmitFirst3     = 14,
-  kOmitFirst4     = 15,
-  kOmitFirst5     = 16,
-  kOmitFirst6     = 17,
-  kOmitFirst7     = 18,
-  kOmitFirst8     = 19,
-  kOmitFirst9     = 20
-};
-
-struct Transform {
-  const char* prefix;
-  WordTransformType word_transform;
-  const char* suffix;
-};
-
-static const Transform kTransforms[] = {
-     {         "", kIdentity,       ""           },
-     {         "", kIdentity,       " "          },
-     {        " ", kIdentity,       " "          },
-     {         "", kOmitFirst1,     ""           },
-     {         "", kUppercaseFirst, " "          },
-     {         "", kIdentity,       " the "      },
-     {        " ", kIdentity,       ""           },
-     {       "s ", kIdentity,       " "          },
-     {         "", kIdentity,       " of "       },
-     {         "", kUppercaseFirst, ""           },
-     {         "", kIdentity,       " and "      },
-     {         "", kOmitFirst2,     ""           },
-     {         "", kOmitLast1,      ""           },
-     {       ", ", kIdentity,       " "          },
-     {         "", kIdentity,       ", "         },
-     {        " ", kUppercaseFirst, " "          },
-     {         "", kIdentity,       " in "       },
-     {         "", kIdentity,       " to "       },
-     {       "e ", kIdentity,       " "          },
-     {         "", kIdentity,       "\""         },
-     {         "", kIdentity,       "."          },
-     {         "", kIdentity,       "\">"        },
-     {         "", kIdentity,       "\n"         },
-     {         "", kOmitLast3,      ""           },
-     {         "", kIdentity,       "]"          },
-     {         "", kIdentity,       " for "      },
-     {         "", kOmitFirst3,     ""           },
-     {         "", kOmitLast2,      ""           },
-     {         "", kIdentity,       " a "        },
-     {         "", kIdentity,       " that "     },
-     {        " ", kUppercaseFirst, ""           },
-     {         "", kIdentity,       ". "         },
-     {        ".", kIdentity,       ""           },
-     {        " ", kIdentity,       ", "         },
-     {         "", kOmitFirst4,     ""           },
-     {         "", kIdentity,       " with "     },
-     {         "", kIdentity,       "'"          },
-     {         "", kIdentity,       " from "     },
-     {         "", kIdentity,       " by "       },
-     {         "", kOmitFirst5,     ""           },
-     {         "", kOmitFirst6,     ""           },
-     {    " the ", kIdentity,       ""           },
-     {         "", kOmitLast4,      ""           },
-     {         "", kIdentity,       ". The "     },
-     {         "", kUppercaseAll,   ""           },
-     {         "", kIdentity,       " on "       },
-     {         "", kIdentity,       " as "       },
-     {         "", kIdentity,       " is "       },
-     {         "", kOmitLast7,      ""           },
-     {         "", kOmitLast1,      "ing "       },
-     {         "", kIdentity,       "\n\t"       },
-     {         "", kIdentity,       ":"          },
-     {        " ", kIdentity,       ". "         },
-     {         "", kIdentity,       "ed "        },
-     {         "", kOmitFirst9,     ""           },
-     {         "", kOmitFirst7,     ""           },
-     {         "", kOmitLast6,      ""           },
-     {         "", kIdentity,       "("          },
-     {         "", kUppercaseFirst, ", "         },
-     {         "", kOmitLast8,      ""           },
-     {         "", kIdentity,       " at "       },
-     {         "", kIdentity,       "ly "        },
-     {    " the ", kIdentity,       " of "       },
-     {         "", kOmitLast5,      ""           },
-     {         "", kOmitLast9,      ""           },
-     {        " ", kUppercaseFirst, ", "         },
-     {         "", kUppercaseFirst, "\""         },
-     {        ".", kIdentity,       "("          },
-     {         "", kUppercaseAll,   " "          },
-     {         "", kUppercaseFirst, "\">"        },
-     {         "", kIdentity,       "=\""        },
-     {        " ", kIdentity,       "."          },
-     {    ".com/", kIdentity,       ""           },
-     {    " the ", kIdentity,       " of the "   },
-     {         "", kUppercaseFirst, "'"          },
-     {         "", kIdentity,       ". This "    },
-     {         "", kIdentity,       ","          },
-     {        ".", kIdentity,       " "          },
-     {         "", kUppercaseFirst, "("          },
-     {         "", kUppercaseFirst, "."          },
-     {         "", kIdentity,       " not "      },
-     {        " ", kIdentity,       "=\""        },
-     {         "", kIdentity,       "er "        },
-     {        " ", kUppercaseAll,   " "          },
-     {         "", kIdentity,       "al "        },
-     {        " ", kUppercaseAll,   ""           },
-     {         "", kIdentity,       "='"         },
-     {         "", kUppercaseAll,   "\""         },
-     {         "", kUppercaseFirst, ". "         },
-     {        " ", kIdentity,       "("          },
-     {         "", kIdentity,       "ful "       },
-     {        " ", kUppercaseFirst, ". "         },
-     {         "", kIdentity,       "ive "       },
-     {         "", kIdentity,       "less "      },
-     {         "", kUppercaseAll,   "'"          },
-     {         "", kIdentity,       "est "       },
-     {        " ", kUppercaseFirst, "."          },
-     {         "", kUppercaseAll,   "\">"        },
-     {        " ", kIdentity,       "='"         },
-     {         "", kUppercaseFirst, ","          },
-     {         "", kIdentity,       "ize "       },
-     {         "", kUppercaseAll,   "."          },
-     { "\xc2\xa0", kIdentity,       ""           },
-     {        " ", kIdentity,       ","          },
-     {         "", kUppercaseFirst, "=\""        },
-     {         "", kUppercaseAll,   "=\""        },
-     {         "", kIdentity,       "ous "       },
-     {         "", kUppercaseAll,   ", "         },
-     {         "", kUppercaseFirst, "='"         },
-     {        " ", kUppercaseFirst, ","          },
-     {        " ", kUppercaseAll,   "=\""        },
-     {        " ", kUppercaseAll,   ", "         },
-     {         "", kUppercaseAll,   ","          },
-     {         "", kUppercaseAll,   "("          },
-     {         "", kUppercaseAll,   ". "         },
-     {        " ", kUppercaseAll,   "."          },
-     {         "", kUppercaseAll,   "='"         },
-     {        " ", kUppercaseAll,   ". "         },
-     {        " ", kUppercaseFirst, "=\""        },
-     {        " ", kUppercaseAll,   "='"         },
-     {        " ", kUppercaseFirst, "='"         },
-};
-
-static const size_t kNumTransforms =
-    sizeof(kTransforms) / sizeof(kTransforms[0]);
-
-static const size_t kOmitLastNTransforms[10] = {
-  0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
-};
-
-static size_t ToUpperCase(uint8_t *p, size_t len) {
-  if (len == 1 || p[0] < 0xc0) {
-    if (p[0] >= 'a' && p[0] <= 'z') {
-      p[0] ^= 32;
-    }
-    return 1;
-  }
-  if (p[0] < 0xe0) {
-    p[1] ^= 32;
-    return 2;
-  }
-  if (len == 2) {
-    return 2;
-  }
-  p[2] ^= 5;
-  return 3;
-}
-
-inline std::string TransformWord(
-    WordTransformType transform_type, const uint8_t* word, size_t len) {
-  if (transform_type <= kOmitLast9) {
-    if (len <= static_cast<size_t>(transform_type)) {
-      return std::string();
-    }
-    return std::string(word, word + len - transform_type);
-  }
-
-  if (transform_type >= kOmitFirst1) {
-    const size_t skip = transform_type - (kOmitFirst1 - 1);
-    if (len <= skip) {
-      return std::string();
-    }
-    return std::string(word + skip, word + len);
-  }
-
-  std::string ret = std::string(word, word + len);
-  uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]);
-  if (transform_type == kUppercaseFirst) {
-    ToUpperCase(uppercase, len);
-  } else if (transform_type == kUppercaseAll) {
-    size_t position = 0;
-    while (position < len) {
-      size_t step = ToUpperCase(uppercase, len - position);
-      uppercase += step;
-      position += step;
-    }
-  }
-  return ret;
-}
-
-inline std::string ApplyTransform(
-    const Transform& t, const uint8_t* word, size_t len) {
-  return std::string(t.prefix) +
-      TransformWord(t.word_transform, word, len) + std::string(t.suffix);
-}
-
-inline std::string GetTransformedDictionaryWord(size_t len_code,
-                                                size_t word_id) {
-  size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code];
-  size_t offset = kBrotliDictionaryOffsetsByLength[len_code];
-  size_t t = word_id / num_words;
-  size_t word_idx = word_id % num_words;
-  offset += len_code * word_idx;
-  const uint8_t* word = &kBrotliDictionary[offset];
-  return ApplyTransform(kTransforms[t], word, len_code);
-}
-
-}  // namespace brotli
-
-#endif  // BROTLI_ENC_TRANSFORM_H_
diff --git a/enc/utf8_util.c b/enc/utf8_util.c
index a5b0d2c..521ce29 100644
--- a/enc/utf8_util.c
+++ b/enc/utf8_util.c
@@ -10,11 +10,12 @@
 
 #include "../common/types.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
-namespace {
-
-size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
+static size_t BrotliParseAsUTF8(
+    int* symbol, const uint8_t* input, size_t size) {
   /* ASCII */
   if ((input[0] & 0x80) == 0) {
     *symbol = input[0];
@@ -63,21 +64,21 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
   return 1;
 }
 
-}  // namespace
-
 /* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
-bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
-                  const size_t length, const double min_fraction) {
+int BrotliIsMostlyUTF8(const uint8_t* data, const size_t pos,
+    const size_t mask, const size_t length, const double min_fraction) {
   size_t size_utf8 = 0;
   size_t i = 0;
   while (i < length) {
     int symbol;
-    size_t bytes_read = ParseAsUTF8(
-        &symbol, &data[(pos + i) & mask], length - i);
+    size_t bytes_read =
+        BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
     i += bytes_read;
     if (symbol < 0x110000) size_utf8 += bytes_read;
   }
-  return size_utf8 > min_fraction * static_cast<double>(length);
+  return (size_utf8 > min_fraction * (double)length) ? 1 : 0;
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
diff --git a/enc/utf8_util.h b/enc/utf8_util.h
index e5ed876..a5fb7bb 100644
--- a/enc/utf8_util.h
+++ b/enc/utf8_util.h
@@ -10,17 +10,23 @@
 #define BROTLI_ENC_UTF8_UTIL_H_
 
 #include "../common/types.h"
+#include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 static const double kMinUTF8Ratio = 0.75;
 
 /* Returns 1 if at least min_fraction of the bytes between pos and
    pos + length in the (data, mask) ringbuffer is UTF8-encoded, otherwise
    returns 0. */
-bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
-                  const size_t length, const double min_fraction);
+BROTLI_INTERNAL int BrotliIsMostlyUTF8(
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length, const double min_fraction);
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_UTF8_UTIL_H_ */
diff --git a/enc/write_bits.h b/enc/write_bits.h
index 1358da4..a3133af 100644
--- a/enc/write_bits.h
+++ b/enc/write_bits.h
@@ -15,7 +15,9 @@
 #include "../common/types.h"
 #include "./port.h"
 
-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
 
 /*#define BIT_WRITER_DEBUG */
 
@@ -34,15 +36,10 @@ namespace brotli {
 
    For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
    and locate the rest in BYTE+1, BYTE+2, etc. */
-inline void WriteBits(size_t n_bits,
-                      uint64_t bits,
-                      size_t * __restrict pos,
-                      uint8_t * __restrict array) {
-#ifdef BIT_WRITER_DEBUG
-  printf("WriteBits  %2d  0x%016llx  %10d\n", n_bits, bits, *pos);
-#endif
-  assert((bits >> n_bits) == 0);
-  assert(n_bits <= 56);
+static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
+                                          uint64_t bits,
+                                          size_t * BROTLI_RESTRICT pos,
+                                          uint8_t * BROTLI_RESTRICT array) {
 #ifdef IS_LITTLE_ENDIAN
   /* This branch of the code can write up to 56 bits at a time,
      7 bits are lost by being perhaps already in *p and at least
@@ -51,6 +48,11 @@ inline void WriteBits(size_t n_bits,
      access a byte that was never initialized). */
   uint8_t *p = &array[*pos >> 3];
   uint64_t v = *p;
+#ifdef BIT_WRITER_DEBUG
+  printf("WriteBits  %2d  0x%016llx  %10d\n", n_bits, bits, *pos);
+#endif
+  assert((bits >> n_bits) == 0);
+  assert(n_bits <= 56);
   v |= bits << (*pos & 7);
   BROTLI_UNALIGNED_STORE64(p, v);  /* Set some bits. */
   *pos += n_bits;
@@ -59,19 +61,20 @@ inline void WriteBits(size_t n_bits,
   uint8_t *array_pos = &array[*pos >> 3];
   const size_t bits_reserved_in_first_byte = (*pos & 7);
   bits <<= bits_reserved_in_first_byte;
-  *array_pos++ |= static_cast<uint8_t>(bits);
+  *array_pos++ |= (uint8_t)bits;
   for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte;
        bits_left_to_write >= 9;
        bits_left_to_write -= 8) {
     bits >>= 8;
-    *array_pos++ = static_cast<uint8_t>(bits);
+    *array_pos++ = (uint8_t)bits;
   }
   *array_pos = 0;
   *pos += n_bits;
 #endif
 }
 
-inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
+static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
+    size_t pos, uint8_t *array) {
 #ifdef BIT_WRITER_DEBUG
   printf("WriteBitsPrepareStorage            %10d\n", pos);
 #endif
@@ -79,6 +82,8 @@ inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
   array[pos >> 3] = 0;
 }
 
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
 
 #endif  /* BROTLI_ENC_WRITE_BITS_H_ */
diff --git a/python/tests/roundtrip_test.py b/python/tests/roundtrip_test.py
index 293fb1e..4cb7462 100755
--- a/python/tests/roundtrip_test.py
+++ b/python/tests/roundtrip_test.py
@@ -12,7 +12,7 @@ testdata/alice29.txt
 testdata/asyoulik.txt
 testdata/lcet10.txt
 testdata/plrabn12.txt
-../enc/encode.cc
+../enc/encode.c
 ../common/dictionary.h
 ../dec/decode.c
 %s
diff --git a/tests/roundtrip_test.sh b/tests/roundtrip_test.sh
index 20867bf..6bda135 100755
--- a/tests/roundtrip_test.sh
+++ b/tests/roundtrip_test.sh
@@ -10,7 +10,7 @@ testdata/alice29.txt
 testdata/asyoulik.txt
 testdata/lcet10.txt
 testdata/plrabn12.txt
-../enc/encode.cc
+../enc/encode.c
 ../common/dictionary.h
 ../dec/decode.c
 $BRO
diff --git a/tools/bro.cc b/tools/bro.cc
index b254f0f..8b3122a 100644
--- a/tools/bro.cc
+++ b/tools/bro.cc
@@ -15,9 +15,10 @@
 #include <cstring>
 #include <ctime>
 #include <string>
+#include <vector>
 
 #include "../dec/decode.h"
-#include "../enc/compressor.h"
+#include "../enc/encode.h"
 
 #if !defined(_WIN32)
 #include <unistd.h>
@@ -52,7 +53,6 @@ static inline int ms_open(const char *filename, int oflag, int pmode) {
 }
 #endif  /* WIN32 */
 
-
 static bool ParseQuality(const char* s, int* quality) {
   if (s[0] >= '0' && s[0] <= '9') {
     *quality = s[0] - '0';
@@ -68,6 +68,7 @@ static bool ParseQuality(const char* s, int* quality) {
 static void ParseArgv(int argc, char **argv,
                       char **input_path,
                       char **output_path,
+                      char **dictionary_path,
                       int *force,
                       int *quality,
                       int *decompress,
@@ -125,6 +126,13 @@ static void ParseArgv(int argc, char **argv,
         *output_path = argv[k + 1];
         ++k;
         continue;
+      } else if (!strcmp("--custom-dictionary", argv[k])) {
+        if (*dictionary_path != 0) {
+          goto error;
+        }
+        *dictionary_path = argv[k + 1];
+        ++k;
+        continue;
       } else if (!strcmp("--quality", argv[k]) ||
                  !strcmp("-q", argv[k])) {
         if (!ParseQuality(argv[k + 1], quality)) {
@@ -158,7 +166,7 @@ error:
   fprintf(stderr,
           "Usage: %s [--force] [--quality n] [--decompress]"
           " [--input filename] [--output filename] [--repeat iters]"
-          " [--verbose] [--window n]\n",
+          " [--verbose] [--window n] [--custom-dictionary filename]\n",
           argv[0]);
   exit(1);
 }
@@ -196,7 +204,7 @@ static FILE *OpenOutputFile(const char *output_path, const int force) {
   return fdopen(fd, "wb");
 }
 
-static int64_t FileSize(char *path) {
+static int64_t FileSize(const char *path) {
   FILE *f = fopen(path, "rb");
   if (f == NULL) {
     return -1;
@@ -212,13 +220,50 @@ static int64_t FileSize(char *path) {
   return retval;
 }
 
+static std::vector<uint8_t> ReadDictionary(const char* path) {
+  FILE *f = fopen(path, "rb");
+  if (f == NULL) {
+    perror("fopen");
+    exit(1);
+  }
+
+  int64_t file_size = FileSize(path);
+  if (file_size == -1) {
+    fprintf(stderr, "could not get size of dictionary file");
+    exit(1);
+  }
+
+  static const int kMaxDictionarySize = (1 << 24) - 16;
+  if (file_size > kMaxDictionarySize) {
+    fprintf(stderr, "dictionary is larger than maximum allowed: %d\n",
+            kMaxDictionarySize);
+    exit(1);
+  }
+
+  std::vector<uint8_t> buffer;
+  buffer.resize(static_cast<size_t>(file_size));
+  size_t bytes_read = fread(buffer.data(), sizeof(uint8_t), buffer.size(), f);
+  if (bytes_read != buffer.size()) {
+    fprintf(stderr, "could not read dictionary\n");
+    exit(1);
+  }
+  fclose(f);
+  return buffer;
+}
+
 static const size_t kFileBufferSize = 65536;
 
-static void Decompresss(FILE* fin, FILE* fout) {
+static int Decompress(FILE* fin, FILE* fout, const char* dictionary_path) {
+  /* Dictionary should be kept during first rounds of decompression. */
+  std::vector<uint8_t> dictionary;
   BrotliState* s = BrotliCreateState(NULL, NULL, NULL);
   if (!s) {
     fprintf(stderr, "out of memory\n");
-    exit(1);
+    return 0;
+  }
+  if (dictionary_path != NULL) {
+    dictionary = ReadDictionary(dictionary_path);
+    BrotliSetCustomDictionary(dictionary.size(), dictionary.data(), s);
   }
   uint8_t* input = new uint8_t[kFileBufferSize];
   uint8_t* output = new uint8_t[kFileBufferSize];
@@ -259,47 +304,109 @@ static void Decompresss(FILE* fin, FILE* fout) {
   BrotliDestroyState(s);
   if ((result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) || ferror(fout)) {
     fprintf(stderr, "failed to write output\n");
-    exit(1);
+    return 0;
   } else if (result != BROTLI_RESULT_SUCCESS) { /* Error or needs more input. */
     fprintf(stderr, "corrupt input\n");
-    exit(1);
+    return 0;
   }
+  return 1;
+}
+
+static int Compress(int quality, int lgwin, FILE* fin, FILE* fout,
+    const char *dictionary_path) {
+  BrotliEncoderState* s = BrotliEncoderCreateInstance(0, 0, 0);
+  uint8_t* buffer = reinterpret_cast<uint8_t*>(malloc(kFileBufferSize << 1));
+  uint8_t* input = buffer;
+  uint8_t* output = buffer + kFileBufferSize;
+  size_t available_in = 0;
+  const uint8_t* next_in = NULL;
+  size_t available_out = kFileBufferSize;
+  uint8_t* next_out = output;
+  int is_eof = 0;
+  int is_ok = 1;
+
+  if (!s || !buffer) {
+    is_ok = 0;
+    goto finish;
+  }
+
+  BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
+  BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
+  if (dictionary_path != NULL) {
+    std::vector<uint8_t> dictionary = ReadDictionary(dictionary_path);
+    BrotliEncoderSetCustomDictionary(s, dictionary.size(),
+        reinterpret_cast<const uint8_t*>(dictionary.data()));
+  }
+
+  while (1) {
+    if (available_in == 0 && !is_eof) {
+      available_in = fread(input, 1, kFileBufferSize, fin);
+      next_in = input;
+      if (ferror(fin)) break;
+      is_eof = feof(fin);
+    }
+
+    if (!BrotliEncoderCompressStream(s,
+        is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+        &available_in, &next_in, &available_out, &next_out, NULL)) {
+      is_ok = 0;
+      break;
+    }
+
+    if (available_out != kFileBufferSize) {
+      size_t out_size = kFileBufferSize - available_out;
+      fwrite(output, 1, out_size, fout);
+      if (ferror(fout)) break;
+      available_out = kFileBufferSize;
+      next_out = output;
+    }
+
+    if (BrotliEncoderIsFinished(s)) break;
+  }
+
+finish:
+  free(buffer);
+  BrotliEncoderDestroyInstance(s);
+
+  if (!is_ok) {
+    /* Should detect OOM? */
+    fprintf(stderr, "failed to compress data\n");
+    return 0;
+  } else if (ferror(fout)) {
+    fprintf(stderr, "failed to write output\n");
+    return 0;
+  } else if (ferror(fin)) {
+    fprintf(stderr, "failed to read input\n");
+    return 0;
+  }
+  return 1;
 }
 
 int main(int argc, char** argv) {
   char *input_path = 0;
   char *output_path = 0;
+  char *dictionary_path = 0;
   int force = 0;
   int quality = 11;
   int decompress = 0;
   int repeat = 1;
   int verbose = 0;
   int lgwin = 0;
-  ParseArgv(argc, argv, &input_path, &output_path, &force,
+  ParseArgv(argc, argv, &input_path, &output_path, &dictionary_path, &force,
             &quality, &decompress, &repeat, &verbose, &lgwin);
   const clock_t clock_start = clock();
   for (int i = 0; i < repeat; ++i) {
     FILE* fin = OpenInputFile(input_path);
     FILE* fout = OpenOutputFile(output_path, force);
+    int is_ok = false;
     if (decompress) {
-      Decompresss(fin, fout);
+      is_ok = Decompress(fin, fout, dictionary_path);
     } else {
-      brotli::BrotliParams params;
-      params.lgwin = lgwin;
-      params.quality = quality;
-      try {
-        brotli::BrotliFileIn in(fin, 1 << 16);
-        brotli::BrotliFileOut out(fout);
-        if (!BrotliCompress(params, &in, &out)) {
-          fprintf(stderr, "compression failed\n");
-          unlink(output_path);
-          exit(1);
-        }
-      } catch (std::bad_alloc&) {
-        fprintf(stderr, "not enough memory\n");
-        unlink(output_path);
-        exit(1);
-      }
+      is_ok = Compress(quality, lgwin, fin, fout, dictionary_path);
+    }
+    if (!is_ok) {
+      unlink(output_path);
+      exit(1);
     }
     if (fclose(fin) != 0) {
       perror("fclose");
author	Eugene Kliuchnikov <eustas@google.com>	2016-06-13 11:01:04 +0200
committer	Eugene Kliuchnikov <eustas@google.com>	2016-06-13 11:01:04 +0200
commit	b972c67780f03256a3fbf81dc3350a4bf00aa4ad (patch)
tree	908b04861a1be24988db41ca390b0777679c833f
parent	63111b21e8e35df764b0ebb7891533291d77ed18 (diff)
download	brotli-b972c67780f03256a3fbf81dc3350a4bf00aa4ad.zip brotli-b972c67780f03256a3fbf81dc3350a4bf00aa4ad.tar.gz brotli-b972c67780f03256a3fbf81dc3350a4bf00aa4ad.tar.bz2