1 files changed, 227 insertions, 0 deletions
diff --git a/math/s_compoundn_template.c b/math/s_compoundn_template.c
new file mode 100644
index 0000000..ddbcc14
--- /dev/null
+++ b/math/s_compoundn_template.c
@@ -0,0 +1,227 @@
+/* Return (1+X)^Y for integer Y.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <fenv_private.h>
+#include <limits.h>
+#include <math.h>
+#include <math-barriers.h>
+#include <math-narrow-eval.h>
+#include <math_private.h>
+#include <stdlib.h>
+
+
+/* Calculate X + Y exactly and store the result in *HI + *LO.  It is
+   given that |X| >= |Y| and the values are small enough that no
+   overflow occurs.  */
+
+static inline void
+add_split (FLOAT *hi, FLOAT *lo, FLOAT x, FLOAT y)
+{
+  /* Apply Dekker's algorithm.  */
+  *hi = math_narrow_eval (x + y);
+  *lo = (x - *hi) + y;
+}
+
+
+/* Store floating-point values that add up to A * (B + C + D) in
+   OUT[0] through OUT[5].  It is given that no overflow or underflow
+   can occur.  */
+
+static inline void
+mul3_split (FLOAT *out, FLOAT a, FLOAT b, FLOAT c, FLOAT d)
+{
+  out[0] = a * b;
+  out[1] = M_SUF (fma) (a, b, -out[0]);
+  out[2] = a * c;
+  out[3] = M_SUF (fma) (a, c, -out[2]);
+  out[4] = a * d;
+  out[5] = M_SUF (fma) (a, d, -out[4]);
+}
+
+
+/* Compare absolute values of floating-point values pointed to by P
+   and Q for qsort.  */
+
+static int
+compare (const void *p, const void *q)
+{
+  FLOAT pd = fabs (*(const FLOAT *) p);
+  FLOAT qd = fabs (*(const FLOAT *) q);
+  if (pd < qd)
+    return -1;
+  else if (pd == qd)
+    return 0;
+  else
+    return 1;
+}
+
+FLOAT
+M_DECL_FUNC (__compoundn) (FLOAT x, long long int y)
+{
+  FLOAT ret;
+  if (issignaling (x))
+    return x + x;
+  if (isless (x, -M_LIT (1.0)))
+    {
+      __set_errno (EDOM);
+      return (x - x) / (x - x);
+    }
+  if (y == 0)
+    return M_LIT (1.0);
+  if (isnan (x))
+    return x;
+  if (x == -M_LIT (1.0))
+    {
+      if (y > 0)
+	return M_LIT (0.0);
+      else
+	{
+	  __set_errno (ERANGE);
+	  return M_LIT (1.0) / M_LIT (0.0);
+	}
+    }
+  if (isinf (x))
+    return y > 0 ? x : M_LIT (0.0);
+  if (y == 1)
+    {
+      /* Ensure overflow in FE_UPWARD mode when X is the largest
+	 positive finite value.  */
+      ret = math_narrow_eval (M_LIT (1.0) + x);
+      if (isinf (ret))
+	__set_errno (ERANGE);
+      return ret;
+    }
+  /* Now we know X is finite and greater than -1, and Y is not 0 or
+     1.  */
+  {
+    M_SET_RESTORE_ROUND (FE_TONEAREST);
+    x = math_opt_barrier (x);
+    /* Split 1 + X into high and low parts, where the sign of the low
+       part is the same as the sign of X to avoid possible spurious
+       intermediate overflow or underflow later.  */
+    FLOAT xhi, xlo;
+    if (x >= M_LIT (1.0))
+      add_split (&xhi, &xlo, x, M_LIT (1.0));
+    else
+      add_split (&xhi, &xlo, M_LIT (1.0), x);
+    if (xlo != M_LIT (0.0) && !!signbit (xlo) != !!signbit (x))
+      {
+	FLOAT xhi_n = (signbit (x)
+		       ? M_SUF (__nextup) (xhi)
+		       : M_SUF (__nextdown) (xhi));
+	xlo += xhi - xhi_n;
+	xhi = xhi_n;
+      }
+    ret = math_narrow_eval (M_SUF (__pown) (xhi, y));
+    /* The result is RET * (1 + XLO/XHI)^Y.  Evaluate XLO/XHI with
+       extra precision.  If XLO/XHI is sufficiently small, the extra
+       factor is not needed and internal underflow should be avoided.
+       If the calculation of RET has overflowed or underflowed, avoid
+       calculations of the extra factor that might end up as
+       Inf*0.  */
+    _Static_assert (-M_MANT_DIG - 65 > M_MIN_EXP,
+		    "no underflow from dividing EPSILON by long long");
+    if (!isinf (ret)
+	&& ret != M_LIT (0.0)
+	&& (xhi >= M_LIT (1.0)
+	    ? M_FABS (xlo) >= xhi * (M_EPSILON * M_LIT (0x1p-65))
+	    : xhi <= M_FABS (xlo) / (M_EPSILON * M_LIT (0x1p-65))))
+      {
+	FLOAT qhi, qlo, nqhi2;
+	qhi = xlo / xhi;
+	FLOAT xlo_rem = M_SUF (fma) (-qhi, xhi, xlo);
+	if (xhi >= M_LIT (1.0)
+	    ? M_FABS (xlo_rem) >= xhi * (M_EPSILON * M_LIT (0x1p-65))
+	    : xhi <= M_FABS (xlo_rem) / (M_EPSILON * M_LIT (0x1p-65)))
+	  qlo = xlo_rem / xhi;
+	else
+	  qlo = M_LIT (0.0);
+	if (M_FABS (qhi) >= M_EPSILON * M_LIT (0x1p-33))
+	  nqhi2 = qhi * qhi * -M_LIT (0.5);
+	else
+	  nqhi2 = M_LIT (0.0);
+	/* To sufficient precision, log1p(XLO/XHI) is QHI + QLO + NQHI2.  */
+#define NUM_PARTS ((LLONG_WIDTH + M_MANT_DIG - 1) / M_MANT_DIG)
+	_Static_assert (NUM_PARTS <= 3,
+			"long long fits in at most three FLOATs");
+	FLOAT parts[NUM_PARTS * 6];
+	FLOAT ypart;
+	ypart = y;
+	mul3_split (parts, ypart, qhi, qlo, nqhi2);
+#if NUM_PARTS >= 2
+	y -= ypart;
+	ypart = y;
+	mul3_split (parts + 6, ypart, qhi, qlo, nqhi2);
+#endif
+#if NUM_PARTS >= 3
+	y -= ypart;
+	ypart = y;
+	mul3_split (parts + 12, ypart, qhi, qlo, nqhi2);
+#endif
+	qsort (parts, NUM_PARTS * 6, sizeof (FLOAT), compare);
+	/* Add up the values so that each element of PARTS has
+	   absolute value at most equal to the last set bit of the
+	   next nonzero element.  */
+	for (size_t i = 0; i <= NUM_PARTS * 6 - 2; i++)
+	  {
+	    add_split (&parts[i + 1], &parts[i], parts[i + 1], parts[i]);
+	    qsort (parts + i + 1, NUM_PARTS * 6 - 1 - i, sizeof (FLOAT),
+		   compare);
+	  }
+	/* Add up the values in the other direction, so that each
+	   element of PARTS has absolute value less than NUM_PARTS * 3
+	   ulp of the next value.  */
+	size_t dstpos = NUM_PARTS * 6 - 1;
+	for (size_t i = 1; i <= NUM_PARTS * 6 - 1; i++)
+	  {
+	    if (parts[dstpos] == M_LIT (0.0))
+	      {
+		parts[dstpos] = parts[NUM_PARTS * 6 - 1 - i];
+		parts[NUM_PARTS * 6 - 1 - i] = M_LIT (0.0);
+	      }
+	    else
+	      {
+		add_split (&parts[dstpos], &parts[NUM_PARTS * 6 - 1 - i],
+			   parts[dstpos], parts[NUM_PARTS * 6 - 1 - i]);
+		if (parts[NUM_PARTS * 6 - 1 - i] != M_LIT (0.0))
+		  {
+		    if (NUM_PARTS * 6 - 1 - i < dstpos - 1)
+		      {
+			parts[dstpos - 1] = parts[NUM_PARTS * 6 - 1 - i];
+			parts[NUM_PARTS * 6 - 1 - i] = M_LIT (0.0);
+		      }
+		    dstpos--;
+		  }
+	      }
+	  }
+	ret *= (M_SUF (__exp) (parts[NUM_PARTS * 6 - 1])
+		* M_SUF (__exp) (parts[NUM_PARTS * 6 - 2]));
+      }
+    ret = math_narrow_eval (ret);
+    math_force_eval (ret);
+  }
+  if (isinf (ret))
+    ret = math_narrow_eval (M_MAX * M_MAX);
+  else if (ret == M_LIT (0.0))
+    ret = math_narrow_eval (M_MIN * M_MIN);
+  if (isinf (ret) || ret == M_LIT (0.0))
+    __set_errno (ERANGE);
+  return ret;
+}
+declare_mgen_alias (__compoundn, compoundn);