From 58cecf1783c1a814f282f859609721fefebf74aa Mon Sep 17 00:00:00 2001 From: Ivan Nikulin Date: Thu, 15 Sep 2016 10:44:19 +0200 Subject: Add distance encoding research tools. --- research/draw_histogram.cc | 201 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 research/draw_histogram.cc (limited to 'research/draw_histogram.cc') diff --git a/research/draw_histogram.cc b/research/draw_histogram.cc new file mode 100644 index 0000000..6560f66 --- /dev/null +++ b/research/draw_histogram.cc @@ -0,0 +1,201 @@ +/* Copyright 2016 Google Inc. All Rights Reserved. + Author: vanickulin@google.com (Ivan Nikulin) + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* Backward reference visualization tool. Accepts file with backward references + as an input and produces PGM image with histogram of those references. */ + +#include /* min */ +#include +#include /* memset */ +#include /* log, round */ +#include /* fscanf, fprintf */ +#include + +#include "./read_dist.h" + +const int FLAGS_height = 1000; // Height of the resulting histogam. +const int FLAGS_width = 1000; // Width of the resulting histogam. +int FLAGS_size; // Size of the compressed file. +const int FLAGS_brotli_window = 0; // Size of brotli window in bits. +const uint64_t FLAGS_min_distance = 0; // Minimum distance. +uint64_t FLAGS_max_distance = 0; // Maximum distance. +const bool FLAGS_with_copies = false; // True if input contains copy length. +const bool FLAGS_simple = false; // True if using only black and white pixels. +const bool FLAGS_linear = true; // True if using linear distance mapping. +const uint64_t FLAGS_skip = 0; // Number of bytes to skip. + +inline double DistanceTransform(double x) { + static bool linear = FLAGS_linear; + if (linear) { + return x; + } else { + /* Using log^2 scale because log scale produces big white gap at the bottom + of image. */ + return log(x) * log(x); + } +} + +/* Mapping pixel density on arc function to increase contrast. */ +inline double DensityTransform(double x) { + double z = 255 - x; + return sqrt(255 * 255 - z * z); +} + +inline int GetMaxDistance() { + return FLAGS_max_distance; +} + +void AdjustPosition(int* pos) { + static uint32_t offset = 0; + static int last = 0; + static uint32_t window_size = (1 << FLAGS_brotli_window); + assert(*pos >= 0 && *pos < window_size); + if (*pos < last) { + offset += window_size; + } + last = *pos; + *pos += offset; +} + +void CalculateStuff(FILE* fin, int** stuff) { + int height = FLAGS_height; + int width = FLAGS_width; + int skip = FLAGS_skip; + size_t min_distance = FLAGS_min_distance; + + printf("height = %d, width = %d\n", height, width); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + stuff[i][j] = 0; + } + } + + int max_pos = FLAGS_size - skip; + double min_dist = min_distance > 0 ? DistanceTransform(min_distance) : 0; + double max_dist = DistanceTransform(GetMaxDistance()) - min_dist; + int copy, pos, distance, x, y; + double dist; + while (ReadBackwardReference(fin, ©, &pos, &distance)) { + if (pos == -1) continue; // In case when only insert is present. + if (distance < min_distance || distance >= GetMaxDistance()) continue; + if (FLAGS_brotli_window != 0) { + AdjustPosition(&pos); + } + if (pos >= skip && distance <= pos) { + pos -= skip; + if (pos >= max_pos) break; + dist = DistanceTransform(static_cast(distance)) - min_dist; + + x = std::min(static_cast(round(dist / max_dist * height)), + height - 1); + y = 1ul * pos * width / max_pos; + if (!(y >= 0 && y < width)) { + printf("pos = %d, max_pos = %d, y = %d\n", pos, max_pos, y); + assert(y >= 0 && y < width); + } + + if (FLAGS_with_copies) { + int right = 1ul * (pos + copy - 1) * width / max_pos; + if (right < 0) { + printf("pos = %d, distance = %d, copy = %d, y = %d, right = %d\n", + pos, distance, copy, y, right); + assert(right >= 0); + } + if (y == right) { + stuff[x][y] += copy; + } else { + int pos2 = static_cast(ceil(1.0 * (y + 1) * max_pos / width)); + stuff[x][y] += pos2 - pos; + for (int i = y + 1; i < right && i < width; ++i) { + stuff[x][i] += max_pos / width; // Sometimes 1 more, but who cares. + } + // Make sure the match doesn't go beyond the image. + if (right < width) { + pos2 = static_cast(ceil(1.0 * right * max_pos / width)); + stuff[x][right] += pos + copy - 1 - pos2 + 1; + } + } + } else { + stuff[x][y]++; + } + } + } +} + +void ConvertToPixels(int** stuff, uint8_t** pixel) { + int height = FLAGS_height; + int width = FLAGS_width; + + int maxs = 0; + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + if (maxs < stuff[i][j]) maxs = stuff[i][j]; + } + } + + bool simple = FLAGS_simple; + double max_stuff = static_cast(maxs); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + if (simple) { + pixel[i][j] = stuff[i][j] > 0 ? 0 : 255; + } else { + pixel[i][j] = static_cast( + 255 - DensityTransform(stuff[i][j] / max_stuff * 255)); + } + } + } +} + +void DrawPixels(uint8_t** pixel, FILE* fout) { + int height = FLAGS_height; + int width = FLAGS_width; + + fprintf(fout, "P5\n%d %d\n255\n", width, height); + for (int i = height - 1; i >= 0; i--) { + fwrite(pixel[i], 1, width, fout); + } +} + +int main(int argc, char* argv[]) { + if (argc != 4) { + printf("usage: draw_histogram.cc dist_file input_size output_file\n"); + return 1; + } + + int height = FLAGS_height; + int width = FLAGS_width; + + FILE* fin = fopen(argv[1], "r"); + FILE* fout = fopen(argv[3], "wb"); + + FLAGS_size = atoi(argv[2]); + + if (FLAGS_max_distance == 0) FLAGS_max_distance = FLAGS_size; + + /* The bio of stuff: + 1. Copy length of backward reference. + 2. Number of backward references per pixel/bucket. + */ + uint8_t** pixel = new uint8_t*[height]; + int** stuff = new int*[height]; + for (int i = 0; i < height; i++) { + pixel[i] = new uint8_t[width]; + stuff[i] = new int[width]; + } + + CalculateStuff(fin, stuff); + fclose(fin); + + ConvertToPixels(stuff, pixel); + + DrawPixels(pixel, fout); + fclose(fout); + + return 0; +} -- cgit v1.1