aboutsummaryrefslogtreecommitdiff
path: root/math/s_compoundn_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'math/s_compoundn_template.c')
-rw-r--r--math/s_compoundn_template.c227
1 files changed, 227 insertions, 0 deletions
diff --git a/math/s_compoundn_template.c b/math/s_compoundn_template.c
new file mode 100644
index 0000000..ddbcc14
--- /dev/null
+++ b/math/s_compoundn_template.c
@@ -0,0 +1,227 @@
+/* Return (1+X)^Y for integer Y.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <fenv_private.h>
+#include <limits.h>
+#include <math.h>
+#include <math-barriers.h>
+#include <math-narrow-eval.h>
+#include <math_private.h>
+#include <stdlib.h>
+
+
+/* Calculate X + Y exactly and store the result in *HI + *LO. It is
+ given that |X| >= |Y| and the values are small enough that no
+ overflow occurs. */
+
+static inline void
+add_split (FLOAT *hi, FLOAT *lo, FLOAT x, FLOAT y)
+{
+ /* Apply Dekker's algorithm. */
+ *hi = math_narrow_eval (x + y);
+ *lo = (x - *hi) + y;
+}
+
+
+/* Store floating-point values that add up to A * (B + C + D) in
+ OUT[0] through OUT[5]. It is given that no overflow or underflow
+ can occur. */
+
+static inline void
+mul3_split (FLOAT *out, FLOAT a, FLOAT b, FLOAT c, FLOAT d)
+{
+ out[0] = a * b;
+ out[1] = M_SUF (fma) (a, b, -out[0]);
+ out[2] = a * c;
+ out[3] = M_SUF (fma) (a, c, -out[2]);
+ out[4] = a * d;
+ out[5] = M_SUF (fma) (a, d, -out[4]);
+}
+
+
+/* Compare absolute values of floating-point values pointed to by P
+ and Q for qsort. */
+
+static int
+compare (const void *p, const void *q)
+{
+ FLOAT pd = fabs (*(const FLOAT *) p);
+ FLOAT qd = fabs (*(const FLOAT *) q);
+ if (pd < qd)
+ return -1;
+ else if (pd == qd)
+ return 0;
+ else
+ return 1;
+}
+
+FLOAT
+M_DECL_FUNC (__compoundn) (FLOAT x, long long int y)
+{
+ FLOAT ret;
+ if (issignaling (x))
+ return x + x;
+ if (isless (x, -M_LIT (1.0)))
+ {
+ __set_errno (EDOM);
+ return (x - x) / (x - x);
+ }
+ if (y == 0)
+ return M_LIT (1.0);
+ if (isnan (x))
+ return x;
+ if (x == -M_LIT (1.0))
+ {
+ if (y > 0)
+ return M_LIT (0.0);
+ else
+ {
+ __set_errno (ERANGE);
+ return M_LIT (1.0) / M_LIT (0.0);
+ }
+ }
+ if (isinf (x))
+ return y > 0 ? x : M_LIT (0.0);
+ if (y == 1)
+ {
+ /* Ensure overflow in FE_UPWARD mode when X is the largest
+ positive finite value. */
+ ret = math_narrow_eval (M_LIT (1.0) + x);
+ if (isinf (ret))
+ __set_errno (ERANGE);
+ return ret;
+ }
+ /* Now we know X is finite and greater than -1, and Y is not 0 or
+ 1. */
+ {
+ M_SET_RESTORE_ROUND (FE_TONEAREST);
+ x = math_opt_barrier (x);
+ /* Split 1 + X into high and low parts, where the sign of the low
+ part is the same as the sign of X to avoid possible spurious
+ intermediate overflow or underflow later. */
+ FLOAT xhi, xlo;
+ if (x >= M_LIT (1.0))
+ add_split (&xhi, &xlo, x, M_LIT (1.0));
+ else
+ add_split (&xhi, &xlo, M_LIT (1.0), x);
+ if (xlo != M_LIT (0.0) && !!signbit (xlo) != !!signbit (x))
+ {
+ FLOAT xhi_n = (signbit (x)
+ ? M_SUF (__nextup) (xhi)
+ : M_SUF (__nextdown) (xhi));
+ xlo += xhi - xhi_n;
+ xhi = xhi_n;
+ }
+ ret = math_narrow_eval (M_SUF (__pown) (xhi, y));
+ /* The result is RET * (1 + XLO/XHI)^Y. Evaluate XLO/XHI with
+ extra precision. If XLO/XHI is sufficiently small, the extra
+ factor is not needed and internal underflow should be avoided.
+ If the calculation of RET has overflowed or underflowed, avoid
+ calculations of the extra factor that might end up as
+ Inf*0. */
+ _Static_assert (-M_MANT_DIG - 65 > M_MIN_EXP,
+ "no underflow from dividing EPSILON by long long");
+ if (!isinf (ret)
+ && ret != M_LIT (0.0)
+ && (xhi >= M_LIT (1.0)
+ ? M_FABS (xlo) >= xhi * (M_EPSILON * M_LIT (0x1p-65))
+ : xhi <= M_FABS (xlo) / (M_EPSILON * M_LIT (0x1p-65))))
+ {
+ FLOAT qhi, qlo, nqhi2;
+ qhi = xlo / xhi;
+ FLOAT xlo_rem = M_SUF (fma) (-qhi, xhi, xlo);
+ if (xhi >= M_LIT (1.0)
+ ? M_FABS (xlo_rem) >= xhi * (M_EPSILON * M_LIT (0x1p-65))
+ : xhi <= M_FABS (xlo_rem) / (M_EPSILON * M_LIT (0x1p-65)))
+ qlo = xlo_rem / xhi;
+ else
+ qlo = M_LIT (0.0);
+ if (M_FABS (qhi) >= M_EPSILON * M_LIT (0x1p-33))
+ nqhi2 = qhi * qhi * -M_LIT (0.5);
+ else
+ nqhi2 = M_LIT (0.0);
+ /* To sufficient precision, log1p(XLO/XHI) is QHI + QLO + NQHI2. */
+#define NUM_PARTS ((LLONG_WIDTH + M_MANT_DIG - 1) / M_MANT_DIG)
+ _Static_assert (NUM_PARTS <= 3,
+ "long long fits in at most three FLOATs");
+ FLOAT parts[NUM_PARTS * 6];
+ FLOAT ypart;
+ ypart = y;
+ mul3_split (parts, ypart, qhi, qlo, nqhi2);
+#if NUM_PARTS >= 2
+ y -= ypart;
+ ypart = y;
+ mul3_split (parts + 6, ypart, qhi, qlo, nqhi2);
+#endif
+#if NUM_PARTS >= 3
+ y -= ypart;
+ ypart = y;
+ mul3_split (parts + 12, ypart, qhi, qlo, nqhi2);
+#endif
+ qsort (parts, NUM_PARTS * 6, sizeof (FLOAT), compare);
+ /* Add up the values so that each element of PARTS has
+ absolute value at most equal to the last set bit of the
+ next nonzero element. */
+ for (size_t i = 0; i <= NUM_PARTS * 6 - 2; i++)
+ {
+ add_split (&parts[i + 1], &parts[i], parts[i + 1], parts[i]);
+ qsort (parts + i + 1, NUM_PARTS * 6 - 1 - i, sizeof (FLOAT),
+ compare);
+ }
+ /* Add up the values in the other direction, so that each
+ element of PARTS has absolute value less than NUM_PARTS * 3
+ ulp of the next value. */
+ size_t dstpos = NUM_PARTS * 6 - 1;
+ for (size_t i = 1; i <= NUM_PARTS * 6 - 1; i++)
+ {
+ if (parts[dstpos] == M_LIT (0.0))
+ {
+ parts[dstpos] = parts[NUM_PARTS * 6 - 1 - i];
+ parts[NUM_PARTS * 6 - 1 - i] = M_LIT (0.0);
+ }
+ else
+ {
+ add_split (&parts[dstpos], &parts[NUM_PARTS * 6 - 1 - i],
+ parts[dstpos], parts[NUM_PARTS * 6 - 1 - i]);
+ if (parts[NUM_PARTS * 6 - 1 - i] != M_LIT (0.0))
+ {
+ if (NUM_PARTS * 6 - 1 - i < dstpos - 1)
+ {
+ parts[dstpos - 1] = parts[NUM_PARTS * 6 - 1 - i];
+ parts[NUM_PARTS * 6 - 1 - i] = M_LIT (0.0);
+ }
+ dstpos--;
+ }
+ }
+ }
+ ret *= (M_SUF (__exp) (parts[NUM_PARTS * 6 - 1])
+ * M_SUF (__exp) (parts[NUM_PARTS * 6 - 2]));
+ }
+ ret = math_narrow_eval (ret);
+ math_force_eval (ret);
+ }
+ if (isinf (ret))
+ ret = math_narrow_eval (M_MAX * M_MAX);
+ else if (ret == M_LIT (0.0))
+ ret = math_narrow_eval (M_MIN * M_MIN);
+ if (isinf (ret) || ret == M_LIT (0.0))
+ __set_errno (ERANGE);
+ return ret;
+}
+declare_mgen_alias (__compoundn, compoundn);