// Copyright (C) 2020-2025 Free Software Foundation, Inc.

// This file is part of GCC.

// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.

// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// for more details.

// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3.  If not see
// <http://www.gnu.org/licenses/>.

// This file provides functions for punycode conversion
// See https://datatracker.ietf.org/doc/html/rfc3492

#include "rust-system.h"
#include "rust-unicode.h"
#include "optional.h"
#include "selftest.h"

namespace Rust {

// https://tools.ietf.org/html/rfc3492#section-4.
constexpr uint32_t BASE = 36;
constexpr uint32_t TMIN = 1;
constexpr uint32_t TMAX = 26;
constexpr uint32_t SKEW = 38;
constexpr uint32_t DAMP = 700;
constexpr uint32_t INITIAL_BIAS = 72;
constexpr uint32_t INITIAL_N = 128;
constexpr char DELIMITER = '-';

std::string
extract_basic_string (const std::vector<Codepoint> &src)
{
  std::string basic_string;
  for (auto c : src)
    {
      if (c.is_ascii ())
	basic_string += c.as_string ();
    }
  return basic_string;
}

uint32_t
adapt_bias (uint32_t delta, const uint32_t n_points, const bool is_first)
{
  delta /= is_first ? DAMP : 2;
  delta += delta / n_points;
  uint32_t k = 0;

  while (delta > (BASE - TMIN) * TMAX / 2)
    {
      delta /= BASE - TMIN;
      k += BASE;
    }
  return k + (BASE - TMIN + 1) * delta / (delta + SKEW);
}

uint32_t
clamped_sub (const uint32_t min, const uint32_t lhs, const uint32_t rhs,
	     const uint32_t max)
{
  if (min + rhs >= lhs)
    return min;
  else if (max + rhs <= lhs)
    return max;
  else
    return lhs - rhs;
}

uint32_t
min_gt_or_eq (const std::vector<Codepoint> &l, const uint32_t threshold)
{
  uint32_t min = UINT32_MAX;
  for (auto c : l)
    if (c.value >= threshold && c.value < min)
      min = c.value;
  return min;
}

char
encode_digit (const uint32_t d)
{
  return d + 22 + (d < 26 ? 75 : 0);
}

tl::optional<std::string>
encode_punycode (const Utf8String &input)
{
  std::vector<Codepoint> input_chars = input.get_chars ();

  uint32_t n = INITIAL_N;
  uint32_t delta = 0;
  uint32_t bias = INITIAL_BIAS;

  std::string output = extract_basic_string (input_chars);
  uint32_t h = output.size ();
  const uint32_t b = h;
  if (b > 0)
    output += DELIMITER;

  while (h < input_chars.size ())
    {
      const uint32_t m = min_gt_or_eq (input_chars, n);

      if (m - n > ((UINT32_MAX - delta) / (h + 1)))
	return tl::nullopt;

      delta += (m - n) * (h + 1);
      n = m;

      for (const auto c : input_chars)
	{
	  if (c.value < n)
	    delta++;
	  else if (c.value == n)
	    {
	      uint32_t q = delta;
	      // encode as a variable length integer
	      for (uint32_t k = 1;; k++)
		{
		  const uint32_t kb = k * BASE;
		  const uint32_t t = clamped_sub (TMIN, kb, bias, TMAX);
		  if (q < t)
		    break;

		  output += encode_digit (t + (q - t) % (BASE - t));
		  q = (q - t) / (BASE - t);
		}
	      output += encode_digit (q);

	      bias = adapt_bias (delta, h + 1, h == b);
	      delta = 0;
	      h++;
	    }
	}
      delta++;
      n++;
    }

  return {output};
}

} // namespace Rust

#if CHECKING_P

namespace selftest {

void
encode_assert (const std::string &input, const std::string &expected)
{
  Rust::Utf8String input_utf8
    = Rust::Utf8String::make_utf8_string (input).value ();
  std::string actual = Rust::encode_punycode (input_utf8).value ();
  ASSERT_EQ (actual, expected);
}

void
rust_punycode_encode_test ()
{
  encode_assert ("abc", "abc-");
  encode_assert ("12345", "12345-");
  encode_assert ("香港", "j6w193g");

  // Examples from https://datatracker.ietf.org/doc/html/rfc3492#section-7.1
  encode_assert ("ليهمابتكلموشعربي؟", "egbpdaj6bu4bxfgehfvwxn");
  encode_assert ("他们为什么不说中文", "ihqwcrb4cv8a8dqg056pqjye");
  encode_assert ("他們爲什麽不說中文", "ihqwctvzc91f659drss3x8bo0yb");
  encode_assert ("Pročprostěnemluvíčesky", "Proprostnemluvesky-uyb24dma41a");
}

} // namespace selftest

#endif // CHECKING_P